diff --git a/internal/cmptest/location.go b/internal/cmptest/location.go index aa20b9cb0..1959926f0 100644 --- a/internal/cmptest/location.go +++ b/internal/cmptest/location.go @@ -2,6 +2,7 @@ package cmptest import ( "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/anchore/syft/syft/file" ) @@ -9,7 +10,7 @@ import ( type LocationComparer func(x, y file.Location) bool func DefaultLocationComparer(x, y file.Location) bool { - return cmp.Equal(x.Coordinates, y.Coordinates) && cmp.Equal(x.AccessPath, y.AccessPath) + return cmp.Equal(x.Coordinates, y.Coordinates, cmpopts.IgnoreUnexported(file.Coordinates{})) && cmp.Equal(x.AccessPath, y.AccessPath) } func LocationComparerWithoutLayer(x, y file.Location) bool { diff --git a/syft/format/common/spdxhelpers/to_format_model.go b/syft/format/common/spdxhelpers/to_format_model.go index 45b27caac..eca71d866 100644 --- a/syft/format/common/spdxhelpers/to_format_model.go +++ b/syft/format/common/spdxhelpers/to_format_model.go @@ -268,21 +268,34 @@ func toRootPackage(s source.Description) *spdx.Package { } func toSPDXID(identifiable artifact.Identifiable) spdx.ElementID { + id := string(identifiable.ID()) + if strings.HasPrefix(id, "SPDXRef-") { + // this is already an SPDX ID, no need to change it (except for the prefix) + return spdx.ElementID(helpers.SanitizeElementID(strings.TrimPrefix(id, "SPDXRef-"))) + } maxLen := 40 - id := "" switch it := identifiable.(type) { case pkg.Package: + if strings.HasPrefix(id, "Package-") { + // this is already an SPDX ID, no need to change it + return spdx.ElementID(helpers.SanitizeElementID(id)) + } switch { case it.Type != "" && it.Name != "": - id = fmt.Sprintf("Package-%s-%s-%s", it.Type, it.Name, it.ID()) + id = fmt.Sprintf("Package-%s-%s-%s", it.Type, it.Name, id) case it.Name != "": - id = fmt.Sprintf("Package-%s-%s", it.Name, it.ID()) + id = fmt.Sprintf("Package-%s-%s", it.Name, id) case it.Type != "": - id = fmt.Sprintf("Package-%s-%s", it.Type, it.ID()) + id = fmt.Sprintf("Package-%s-%s", it.Type, id) default: - id = fmt.Sprintf("Package-%s", it.ID()) + id = fmt.Sprintf("Package-%s", id) } case file.Coordinates: + if strings.HasPrefix(id, "File-") { + // this is already an SPDX ID, no need to change it. Note: there is no way to reach this case today + // from within syft, however, this covers future cases where the ID can be overridden + return spdx.ElementID(helpers.SanitizeElementID(id)) + } p := "" parts := strings.Split(it.RealPath, "/") for i := len(parts); i > 0; i-- { @@ -296,9 +309,7 @@ func toSPDXID(identifiable artifact.Identifiable) spdx.ElementID { } p = path.Join(part, p) } - id = fmt.Sprintf("File-%s-%s", p, it.ID()) - default: - id = string(identifiable.ID()) + id = fmt.Sprintf("File-%s-%s", p, id) } // NOTE: the spdx library prepend SPDXRef-, so we don't do it here return spdx.ElementID(helpers.SanitizeElementID(id)) diff --git a/syft/format/common/spdxhelpers/to_format_model_test.go b/syft/format/common/spdxhelpers/to_format_model_test.go index 13eb46ee3..67bc2885e 100644 --- a/syft/format/common/spdxhelpers/to_format_model_test.go +++ b/syft/format/common/spdxhelpers/to_format_model_test.go @@ -861,6 +861,34 @@ func Test_toSPDXID(t *testing.T) { }, expected: "Package-npm-some-package", }, + { + name: "package with existing SPDX ID", + it: func() pkg.Package { + p := pkg.Package{ + Type: pkg.NpmPkg, + Name: "some-package", + } + // SPDXRef- prefix is removed on decode (when everything is working as it should) + p.OverrideID("Package-npm-some-package-extra!") + return p + }(), + // note: we still sanitize out the "!" which is not allowed in SPDX IDs + expected: "Package-npm-some-package-extra", + }, + { + name: "package with existing SPDX Ref", + it: func() pkg.Package { + p := pkg.Package{ + Type: pkg.NpmPkg, + Name: "some-package", + } + // someone incorrectly added SPDXRef- prefix + p.OverrideID("SPDXRef-Package-npm-some-package-extra!") + return p + }(), + // note: we still sanitize out the "!" which is not allowed in SPDX IDs + expected: "Package-npm-some-package-extra", + }, } for _, test := range tests { diff --git a/syft/format/common/spdxhelpers/to_syft_model.go b/syft/format/common/spdxhelpers/to_syft_model.go index d1db08c3d..5037e5b11 100644 --- a/syft/format/common/spdxhelpers/to_syft_model.go +++ b/syft/format/common/spdxhelpers/to_syft_model.go @@ -509,7 +509,12 @@ func toSyftPackage(p *spdx.Package) pkg.Package { Metadata: extractMetadata(p, info), } - sP.SetID() + if p.PackageSPDXIdentifier != "" { + // always prefer the IDs from the SBOM over derived IDs + sP.OverrideID(artifact.ID(p.PackageSPDXIdentifier)) + } else { + sP.SetID() + } return *sP } diff --git a/syft/format/common/spdxhelpers/to_syft_model_test.go b/syft/format/common/spdxhelpers/to_syft_model_test.go index e8526bab5..574675902 100644 --- a/syft/format/common/spdxhelpers/to_syft_model_test.go +++ b/syft/format/common/spdxhelpers/to_syft_model_test.go @@ -664,7 +664,7 @@ func Test_directPackageFiles(t *testing.T) { Packages: []*spdx.Package{ { PackageName: "some-package", - PackageSPDXIdentifier: "1", + PackageSPDXIdentifier: "1", // important! PackageVersion: "1.0.5", Files: []*spdx.File{ { @@ -689,7 +689,7 @@ func Test_directPackageFiles(t *testing.T) { Name: "some-package", Version: "1.0.5", } - p.SetID() + p.OverrideID("1") // the same as the spdxID on the package element f := file.Location{ LocationData: file.LocationData{ Coordinates: file.Coordinates{ @@ -730,3 +730,32 @@ func Test_directPackageFiles(t *testing.T) { require.Equal(t, s, got) } + +func Test_useSPDXIdentifierOverDerivedSyftArtifactID(t *testing.T) { + doc := &spdx.Document{ + SPDXVersion: "SPDX-2.3", + Packages: []*spdx.Package{ + { + PackageName: "some-package", + PackageSPDXIdentifier: "1", // important! + PackageVersion: "1.0.5", + Files: []*spdx.File{ + { + FileName: "some-file", + FileSPDXIdentifier: "2", + Checksums: []spdx.Checksum{ + { + Algorithm: "SHA1", + Value: "a8d733c64f9123", + }, + }, + }, + }, + }, + }, + } + s, err := ToSyftModel(doc) + + assert.Nil(t, err) + assert.NotNil(t, s.Artifacts.Packages.Package("1")) +} diff --git a/syft/format/cyclonedxjson/encoder_test.go b/syft/format/cyclonedxjson/encoder_test.go index 90af61412..00ece4a8a 100644 --- a/syft/format/cyclonedxjson/encoder_test.go +++ b/syft/format/cyclonedxjson/encoder_test.go @@ -3,6 +3,7 @@ package cyclonedxjson import ( "bytes" "flag" + "regexp" "strings" "testing" @@ -116,6 +117,14 @@ func TestCycloneDxImageEncoder(t *testing.T) { func redactor(values ...string) testutil.Redactor { return testutil.NewRedactions(). WithValuesRedacted(values...). + WithPatternRedactorSpec( + testutil.PatternReplacement{ + // only the source component bom-ref (not package or other component bom-refs) + Search: regexp.MustCompile(`"component": \{[^}]*"bom-ref":\s*"(?P.+)"[^}]*}`), + Groups: []string{"redact"}, // use the regex to anchore the search, but only replace bytes within the capture group + Replace: "redacted", + }, + ). WithPatternRedactors( map[string]string{ // UUIDs @@ -126,9 +135,6 @@ func redactor(values ...string) testutil.Redactor { // image hashes `sha256:[A-Fa-f0-9]{64}`: `sha256:redacted`, - - // BOM refs - `"bom-ref":\s*"[^"]+"`: `"bom-ref":"redacted"`, }, ) } diff --git a/syft/format/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden b/syft/format/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden index e4e7bd752..264e3ac9c 100644 --- a/syft/format/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden +++ b/syft/format/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden @@ -17,14 +17,14 @@ ] }, "component": { - "bom-ref":"redacted", + "bom-ref": "redacted", "type": "file", "name": "some/path" } }, "components": [ { - "bom-ref":"redacted", + "bom-ref": "4dd25c6ee16b729a", "type": "library", "name": "package-1", "version": "1.0.1", @@ -61,7 +61,7 @@ ] }, { - "bom-ref":"redacted", + "bom-ref": "pkg:deb/debian/package-2@2.0.1?package-id=39392bb5e270f669", "type": "library", "name": "package-2", "version": "2.0.1", @@ -91,7 +91,7 @@ ] }, { - "bom-ref":"redacted", + "bom-ref": "os:debian@1.2.3", "type": "operating-system", "name": "debian", "version": "1.2.3", diff --git a/syft/format/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden b/syft/format/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden index 62750e9e6..3237b30d9 100644 --- a/syft/format/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden +++ b/syft/format/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden @@ -17,7 +17,7 @@ ] }, "component": { - "bom-ref":"redacted", + "bom-ref": "redacted", "type": "container", "name": "user-image-input", "version": "sha256:redacted" @@ -25,7 +25,7 @@ }, "components": [ { - "bom-ref":"redacted", + "bom-ref": "72567175418f73f8", "type": "library", "name": "package-1", "version": "1.0.1", @@ -66,7 +66,7 @@ ] }, { - "bom-ref":"redacted", + "bom-ref": "pkg:deb/debian/package-2@2.0.1?package-id=4b756c6f6fb127a3", "type": "library", "name": "package-2", "version": "2.0.1", @@ -100,7 +100,7 @@ ] }, { - "bom-ref":"redacted", + "bom-ref": "os:debian@1.2.3", "type": "operating-system", "name": "debian", "version": "1.2.3", diff --git a/syft/format/cyclonedxxml/encoder_test.go b/syft/format/cyclonedxxml/encoder_test.go index cb0ae4a3b..fec64142e 100644 --- a/syft/format/cyclonedxxml/encoder_test.go +++ b/syft/format/cyclonedxxml/encoder_test.go @@ -90,16 +90,24 @@ func TestCycloneDxImageEncoder(t *testing.T) { func redactor(values ...string) testutil.Redactor { return testutil.NewRedactions(). WithValuesRedacted(values...). + WithPatternRedactorSpec( + testutil.PatternReplacement{ + // only the source component bom-ref (not package or other component bom-refs) + Search: regexp.MustCompile(``), + Groups: []string{"redact"}, // use the regex to anchore the search, but only replace bytes within the capture group + Replace: "redacted", + }, + ). WithPatternRedactors( map[string]string{ // dates `([0-9]+)-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])[Tt]([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9]|60)(\.[0-9]+)?(([Zz])|([+|\-]([01][0-9]|2[0-3]):[0-5][0-9]))`: `redacted`, - // image hashes and BOM refs + // image hashes `sha256:[A-Za-z0-9]{64}`: `sha256:redacted`, - // serial numbers and BOM refs - `(serialNumber|bom-ref)="[^"]+"`: `$1="redacted"`, + // serial numbers + `(serialNumber)="[^"]+"`: `$1="redacted"`, }, ) } diff --git a/syft/format/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden b/syft/format/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden index 9a9f7bce8..57a48832c 100644 --- a/syft/format/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden +++ b/syft/format/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden @@ -16,7 +16,7 @@ - + package-1 1.0.1 @@ -34,7 +34,7 @@ /some/path/pkg1 - + package-2 2.0.1 cpe:2.3:*:some:package:2:*:*:*:*:*:*:* @@ -47,7 +47,7 @@ 0 - + debian 1.2.3 debian diff --git a/syft/format/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden b/syft/format/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden index 12c99a5fa..7c55b107a 100644 --- a/syft/format/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden +++ b/syft/format/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden @@ -11,13 +11,13 @@ - + user-image-input sha256:redacted - + package-1 1.0.1 @@ -36,7 +36,7 @@ /somefile-1.txt - + package-2 2.0.1 cpe:2.3:*:some:package:2:*:*:*:*:*:*:* @@ -50,7 +50,7 @@ 0 - + debian 1.2.3 debian diff --git a/syft/format/internal/cyclonedxutil/helpers/decoder.go b/syft/format/internal/cyclonedxutil/helpers/decoder.go index c4c706e38..83845e061 100644 --- a/syft/format/internal/cyclonedxutil/helpers/decoder.go +++ b/syft/format/internal/cyclonedxutil/helpers/decoder.go @@ -5,7 +5,6 @@ import ( "github.com/CycloneDX/cyclonedx-go" - "github.com/anchore/packageurl-go" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/linux" "github.com/anchore/syft/syft/pkg" @@ -66,12 +65,16 @@ func collectPackages(component *cyclonedx.Component, s *sbom.SBOM, idMap map[str case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary: p := decodeComponent(component) idMap[component.BOMRef] = p - syftID := extractSyftPacakgeID(component.BOMRef) - if syftID != "" { - idMap[syftID] = p + if component.BOMRef != "" { + // always prefer the IDs from the SBOM over derived IDs + p.OverrideID(artifact.ID(component.BOMRef)) + } else { + p.SetID() + } + syftID := p.ID() + if syftID != "" { + idMap[string(syftID)] = p } - // TODO there must be a better way than needing to call this manually: - p.SetID() s.Artifacts.Packages.Add(*p) } @@ -82,19 +85,6 @@ func collectPackages(component *cyclonedx.Component, s *sbom.SBOM, idMap map[str } } -func extractSyftPacakgeID(i string) string { - instance, err := packageurl.FromString(i) - if err != nil { - return "" - } - for _, q := range instance.Qualifiers { - if q.Key == "package-id" { - return q.Value - } - } - return "" -} - func linuxReleaseFromComponents(components []cyclonedx.Component) *linux.Release { for i := range components { component := &components[i] diff --git a/syft/format/internal/cyclonedxutil/helpers/decoder_test.go b/syft/format/internal/cyclonedxutil/helpers/decoder_test.go index af08bac2d..b5c1f3c27 100644 --- a/syft/format/internal/cyclonedxutil/helpers/decoder_test.go +++ b/syft/format/internal/cyclonedxutil/helpers/decoder_test.go @@ -421,3 +421,39 @@ func Test_decodeDependencies(t *testing.T) { }) } } + +func Test_useBomRefOverDerivedSyftArtifactID(t *testing.T) { + + packageWithId := cyclonedx.Component{ + BOMRef: "pkg:maven/org.springframework.boot/spring-boot-starter-test?package-id=646a5a71a4abeee0", + Type: cyclonedx.ComponentTypeLibrary, + Name: "spring-boot-starter-test", + Version: "", + PackageURL: "pkg:maven/org.springframework.boot/spring-boot-starter-test", + } + + packageWithoutId := cyclonedx.Component{ + BOMRef: "pkg:maven/org.springframework.boot/spring-boot-starter-webflux", + Type: cyclonedx.ComponentTypeLibrary, + Name: "spring-boot-starter-webflux", + Version: "", + PackageURL: "pkg:maven/org.springframework.boot/spring-boot-starter-webflux", + } + + bom := cyclonedx.BOM{Metadata: nil, + Components: &[]cyclonedx.Component{ + packageWithId, + packageWithoutId, + }} + + s, err := ToSyftModel(&bom) + + assert.Nil(t, err) + assert.NotNil(t, s.Artifacts.Packages.Package("pkg:maven/org.springframework.boot/spring-boot-starter-test?package-id=646a5a71a4abeee0")) + + pkgsWithoutID := s.Artifacts.Packages.PackagesByName(packageWithoutId.Name) + + assert.Len(t, pkgsWithoutID, 1) + assert.NotEqual(t, "", pkgsWithoutID[0].ID()) + +} diff --git a/syft/format/internal/testutil/redactor.go b/syft/format/internal/testutil/redactor.go index 7dab19359..ef5d1f67c 100644 --- a/syft/format/internal/testutil/redactor.go +++ b/syft/format/internal/testutil/redactor.go @@ -28,6 +28,7 @@ func (r RedactorFn) Redact(b []byte) []byte { type PatternReplacement struct { Search *regexp.Regexp + Groups []string Replace string } @@ -39,7 +40,67 @@ func NewPatternReplacement(r *regexp.Regexp) PatternReplacement { } func (p PatternReplacement) Redact(b []byte) []byte { - return p.Search.ReplaceAll(b, []byte(p.Replace)) + if len(p.Groups) == 0 { + return p.Search.ReplaceAll(b, []byte(p.Replace)) + } + + return p.redactNamedGroups(b) +} + +func (p PatternReplacement) redactNamedGroups(b []byte) []byte { + groupsToReplace := make(map[string]bool) + for _, g := range p.Groups { + groupsToReplace[g] = true + } + + subexpNames := p.Search.SubexpNames() + + return p.Search.ReplaceAllFunc(b, func(match []byte) []byte { + indexes := p.Search.FindSubmatchIndex(match) + if indexes == nil { + return match + } + + result := make([]byte, len(match)) + copy(result, match) + + // keep track of the offset as we replace groups + offset := 0 + + // process each named group + for i, name := range subexpNames { + // skip the full match (i==0) and groups we don't want to replace + if i == 0 || !groupsToReplace[name] { + continue + } + + // get the start and end positions of this group + startPos := indexes[2*i] + endPos := indexes[2*i+1] + + // skip if the group didn't match + if startPos < 0 || endPos < 0 { + continue + } + + // adjust positions based on previous replacements + startPos += offset + endPos += offset + + // replace the group with our replacement text + beforeGroup := result[:startPos] + afterGroup := result[endPos:] + + // calculate the new offset + oldLen := endPos - startPos + newLen := len(p.Replace) + offset += (newLen - oldLen) + + result = append(beforeGroup, append([]byte(p.Replace), afterGroup...)...) //nolint:gocritic + } + + return result + }) } // Replace by value ////////////////////////////// @@ -86,6 +147,13 @@ func (r *Redactions) WithPatternRedactors(values map[string]string) *Redactions return r } +func (r *Redactions) WithPatternRedactorSpec(values ...PatternReplacement) *Redactions { + for _, v := range values { + r.redactors = append(r.redactors, v) + } + return r +} + func (r *Redactions) WithValueRedactors(values map[string]string) *Redactions { for k, v := range values { r.redactors = append(r.redactors, diff --git a/syft/format/syftjson/to_syft_model.go b/syft/format/syftjson/to_syft_model.go index 1c1566fbd..da56497fe 100644 --- a/syft/format/syftjson/to_syft_model.go +++ b/syft/format/syftjson/to_syft_model.go @@ -351,9 +351,7 @@ func toSyftPackage(p model.Package, idAliases map[string]string) pkg.Package { Metadata: p.Metadata, } - // we don't know if this package ID is truly unique, however, we need to trust the user input in case there are - // external references to it. That is, we can't derive our own ID (using pkg.SetID()) since consumers won't - // be able to historically interact with data that references the IDs from the original SBOM document being decoded now. + // always prefer the IDs from the SBOM over derived IDs out.OverrideID(artifact.ID(p.ID)) // this alias mapping is currently defunct, but could be useful in the future. diff --git a/syft/internal/fileresolver/container_image_squash_test.go b/syft/internal/fileresolver/container_image_squash_test.go index 7df268051..7525f8947 100644 --- a/syft/internal/fileresolver/container_image_squash_test.go +++ b/syft/internal/fileresolver/container_image_squash_test.go @@ -512,6 +512,7 @@ func Test_imageSquashResolver_resolvesLinks(t *testing.T) { func compareLocations(t *testing.T, expected, actual []file.Location) { t.Helper() ignoreUnexported := cmpopts.IgnoreUnexported(file.LocationData{}) + ignoreUnexportedCoord := cmpopts.IgnoreUnexported(file.Coordinates{}) ignoreMetadata := cmpopts.IgnoreFields(file.LocationMetadata{}, "Annotations") ignoreFS := cmpopts.IgnoreFields(file.Coordinates{}, "FileSystemID") @@ -520,6 +521,7 @@ func compareLocations(t *testing.T, expected, actual []file.Location) { if d := cmp.Diff(expected, actual, ignoreUnexported, + ignoreUnexportedCoord, ignoreFS, ignoreMetadata, ); d != "" { diff --git a/syft/pkg/package.go b/syft/pkg/package.go index 297e5aedb..a9fc65317 100644 --- a/syft/pkg/package.go +++ b/syft/pkg/package.go @@ -27,7 +27,7 @@ type Package struct { Type Type `cyclonedx:"type"` // the package type (e.g. Npm, Yarn, Python, Rpm, Deb, etc) CPEs []cpe.CPE `hash:"ignore"` // all possible Common Platform Enumerators (note: this is NOT included in the definition of the ID since all fields on a CPE are derived from other fields) PURL string `hash:"ignore"` // the Package URL (see https://github.com/package-url/purl-spec) - Metadata interface{} // additional data found while parsing the package source + Metadata any // additional data found while parsing the package source } func (p *Package) OverrideID(id artifact.ID) {