From 6ba087c72c411a8b7e405c1de1ce044162225e07 Mon Sep 17 00:00:00 2001 From: Christopher Angelo Phillips <32073428+spiffcs@users.noreply.github.com> Date: Fri, 2 May 2025 09:34:08 -0400 Subject: [PATCH] fix: Do not use hashes for SPDX license names/expressions (#3844) --------- Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> --- .../internal/spdxutil/helpers/license.go | 2 +- .../internal/spdxutil/helpers/license_test.go | 52 +++++++++++++++++++ .../internal/spdxutil/helpers/spdxid.go | 1 + 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/syft/format/internal/spdxutil/helpers/license.go b/syft/format/internal/spdxutil/helpers/license.go index 877c2a81e..ae19dfb1d 100644 --- a/syft/format/internal/spdxutil/helpers/license.go +++ b/syft/format/internal/spdxutil/helpers/license.go @@ -94,7 +94,7 @@ func generateLicenseID(l pkg.License) string { return l.SPDXExpression } if l.Value != "" { - return licenseSum(l.Value) + return spdxlicense.LicenseRefPrefix + SanitizeElementID(l.Value) } return licenseSum(l.FullText) } diff --git a/syft/format/internal/spdxutil/helpers/license_test.go b/syft/format/internal/spdxutil/helpers/license_test.go index b89b5e853..81f0359f6 100644 --- a/syft/format/internal/spdxutil/helpers/license_test.go +++ b/syft/format/internal/spdxutil/helpers/license_test.go @@ -105,6 +105,58 @@ func Test_License(t *testing.T) { } } +func TestGenerateLicenseID(t *testing.T) { + tests := []struct { + name string + license pkg.License + expected string + }{ + { + name: "SPDX expression is preferred", + license: pkg.License{ + SPDXExpression: "Apache-2.0", + Value: "SomeValue", + FullText: "Some text", + }, + expected: "Apache-2.0", + }, + { + name: "Uses value if no SPDX expression", + license: pkg.License{ + Value: "MIT", + }, + expected: spdxlicense.LicenseRefPrefix + "MIT", + }, + { + name: "Long value is sanitized correctly", + license: pkg.License{ + Value: "LGPLv2+ and LGPLv2+ with exceptions and GPLv2+ and GPLv2+ with exceptions and BSD and Inner-Net and ISC and Public Domain and GFDL", + }, + expected: spdxlicense.LicenseRefPrefix + + "LGPLv2--and-LGPLv2--with-exceptions-and-GPLv2--and-GPLv2--with-exceptions-and-BSD-and-Inner-Net-and-ISC-and-Public-Domain-and-GFDL", + }, + { + name: "Uses hash of fullText when nothing else is provided", + license: pkg.License{ + FullText: "This is a very long custom license text that should be hashed because it's more than 64 characters long.", + }, + expected: "", // We'll verify it starts with the correct prefix + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + id := generateLicenseID(tt.license) + if tt.expected == "" { + assert.True(t, len(id) > len(spdxlicense.LicenseRefPrefix)) + assert.Contains(t, id, spdxlicense.LicenseRefPrefix) + } else { + assert.Equal(t, tt.expected, id) + } + }) + } +} + func Test_joinLicenses(t *testing.T) { tests := []struct { name string diff --git a/syft/format/internal/spdxutil/helpers/spdxid.go b/syft/format/internal/spdxutil/helpers/spdxid.go index d7477190d..e476b9198 100644 --- a/syft/format/internal/spdxutil/helpers/spdxid.go +++ b/syft/format/internal/spdxutil/helpers/spdxid.go @@ -8,6 +8,7 @@ var expr = regexp.MustCompile("[^a-zA-Z0-9.-]") // SPDX spec says SPDXID must be: // "SPDXRef-"[idstring] where [idstring] is a unique string containing letters, numbers, ., and/or - +// https://spdx.github.io/spdx-spec/v2.3/snippet-information/ func SanitizeElementID(id string) string { return expr.ReplaceAllString(id, "-") }