From ab725de6a1101ef43b2a4b9a0b8778a84c76e94c Mon Sep 17 00:00:00 2001 From: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> Date: Thu, 29 Jan 2026 14:26:30 -0500 Subject: [PATCH] fix: base extension without spdx upstream update Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> --- internal/spdxlicense/license.go | 19 +++++++---- internal/spdxlicense/license_url_test.go | 34 +++++++++++++++++++ .../spdxlicense/supplemental_license_urls.go | 24 +++++++++++++ syft/pkg/cataloger/java/archive_parser.go | 7 +--- 4 files changed, 72 insertions(+), 12 deletions(-) create mode 100644 internal/spdxlicense/supplemental_license_urls.go diff --git a/internal/spdxlicense/license.go b/internal/spdxlicense/license.go index 3678da845..fe47fcfbc 100644 --- a/internal/spdxlicense/license.go +++ b/internal/spdxlicense/license.go @@ -41,14 +41,21 @@ type LicenseInfo struct { ID string } -// LicenseByURL returns the license ID and name for a given URL from the SPDX license list -// The URL should match one of the URLs in the seeAlso field of an SPDX license +// LicenseByURL returns the license ID for a given URL. +// It first checks supplemental mappings (user-contributed URLs not in the official +// SPDX list), then the auto-generated SPDX license list mappings. func LicenseByURL(url string) (LicenseInfo, bool) { url = strings.TrimSpace(url) - if id, exists := urlToLicense[url]; exists { - return LicenseInfo{ - ID: id, - }, true + + // Check supplemental mappings first (user-contributed URLs) + if id, exists := supplementalURLToLicense[url]; exists { + return LicenseInfo{ID: id}, true } + + // Fall back to auto-generated SPDX mappings + if id, exists := urlToLicense[url]; exists { + return LicenseInfo{ID: id}, true + } + return LicenseInfo{}, false } diff --git a/internal/spdxlicense/license_url_test.go b/internal/spdxlicense/license_url_test.go index d4c8e92f7..b444b122f 100644 --- a/internal/spdxlicense/license_url_test.go +++ b/internal/spdxlicense/license_url_test.go @@ -88,6 +88,40 @@ func TestLicenseByURL(t *testing.T) { } } +func TestLicenseByURL_SupplementalURLs(t *testing.T) { + // Test that supplemental URLs (not in the official SPDX list) are resolved correctly + // These URLs are defined in supplemental_license_urls.go + + tests := []struct { + name string + url string + wantID string + }{ + { + name: "LGPL-2.1 http variant (supplemental)", + url: "http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html", + wantID: "LGPL-2.1-only", + }, + { + name: "EDL/BSD-3-Clause http variant (supplemental)", + url: "http://www.eclipse.org/org/documents/edl-v10.php", + wantID: "BSD-3-Clause", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + info, found := LicenseByURL(tt.url) + if !found { + t.Fatalf("LicenseByURL(%q) not found, expected %s", tt.url, tt.wantID) + } + if info.ID != tt.wantID { + t.Errorf("LicenseByURL(%q) = %s, want %s", tt.url, info.ID, tt.wantID) + } + }) + } +} + func TestLicenseByURL_DeprecatedLicenses(t *testing.T) { // Test that deprecated license URLs map to their replacement licenses // For example, GPL-2.0+ should map to GPL-2.0-or-later diff --git a/internal/spdxlicense/supplemental_license_urls.go b/internal/spdxlicense/supplemental_license_urls.go new file mode 100644 index 000000000..c5e37ea82 --- /dev/null +++ b/internal/spdxlicense/supplemental_license_urls.go @@ -0,0 +1,24 @@ +package spdxlicense + +// supplementalURLToLicense contains URL-to-SPDX-ID mappings that are not in the +// official SPDX license list but are commonly found in real-world packages. +// +// These mappings supplement the auto-generated urlToLicense map from license_list.go. +// Add new entries here when you encounter license URLs that should map to SPDX IDs +// but aren't covered by the official SPDX seeAlso URLs. +// +// Guidelines for adding entries: +// - Verify the URL actually corresponds to the SPDX license +// - Prefer adding to SPDX upstream if the URL is canonical (https://github.com/spdx/license-list-XML) +// - Use this map for common variants (http vs https, alternate paths) that SPDX won't accept +var supplementalURLToLicense = map[string]string{ + // LGPL-2.1: Common http:// variant of the old-licenses path + // SPDX has https://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html + // but many Java packages use this simpler http:// URL + "http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html": "LGPL-2.1-only", + + // BSD-3-Clause (EDL): http:// variant of Eclipse Distribution License + // SPDX has https://www.eclipse.org/org/documents/edl-v10.php + // but many Java packages use http:// instead of https:// + "http://www.eclipse.org/org/documents/edl-v10.php": "BSD-3-Clause", +} diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index ffe731d89..9d051d69b 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -25,7 +25,6 @@ import ( "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg/cataloger/generic" "github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven" - "github.com/anchore/syft/internal/spdxlicense" ) var archiveFormatGlobs = []string{ @@ -376,11 +375,7 @@ func toPkgLicenses(ctx context.Context, location *file.Location, licenses []mave if name == "" && url == "" { continue } - if licInfo, ok := spdxlicense.LicenseByURL(url); ok { - if name == "" { - name = licInfo.ID // use detected license ID if no name given - } - } + // NewLicenseFromFieldsWithContext handles URL-to-SPDX-ID lookup internally out = append(out, pkg.NewLicenseFromFieldsWithContext(ctx, name, url, location)) } return out