diff --git a/syft/format/common/spdxhelpers/to_syft_model.go b/syft/format/common/spdxhelpers/to_syft_model.go index d1db08c3d..96b218f44 100644 --- a/syft/format/common/spdxhelpers/to_syft_model.go +++ b/syft/format/common/spdxhelpers/to_syft_model.go @@ -499,14 +499,15 @@ func extractPkgInfo(p *spdx.Package) pkgInfo { func toSyftPackage(p *spdx.Package) pkg.Package { info := extractPkgInfo(p) sP := &pkg.Package{ - Type: info.typ, - Name: p.PackageName, - Version: p.PackageVersion, - Licenses: pkg.NewLicenseSet(parseSPDXLicenses(p)...), - CPEs: extractCPEs(p), - PURL: purlValue(info.purl), - Language: info.lang, - Metadata: extractMetadata(p, info), + Type: info.typ, + Name: p.PackageName, + Version: p.PackageVersion, + Licenses: pkg.NewLicenseSet(parseSPDXLicenses(p)...), + CPEs: extractCPEs(p), + PURL: purlValue(info.purl), + Language: info.lang, + Metadata: extractMetadata(p, info), + SupplementalData: []any{artifact.ID(p.PackageSPDXIdentifier)}, } sP.SetID() diff --git a/syft/format/common/spdxhelpers/to_syft_model_test.go b/syft/format/common/spdxhelpers/to_syft_model_test.go index e8526bab5..3593cf3a0 100644 --- a/syft/format/common/spdxhelpers/to_syft_model_test.go +++ b/syft/format/common/spdxhelpers/to_syft_model_test.go @@ -589,12 +589,18 @@ func Test_convertToAndFromFormat(t *testing.T) { got, err := ToSyftModel(doc) require.NoError(t, err) + for _, p := range got.Artifacts.Packages.Sorted() { + // all decoders should be setting an ID of sorts here (another test will verify the correctness of the value) + assert.NotEmpty(t, p.SupplementalData) + } + if diff := cmp.Diff(&s, got, cmpopts.IgnoreUnexported(artifact.Relationship{}), cmpopts.IgnoreUnexported(file.LocationSet{}), cmpopts.IgnoreUnexported(pkg.Collection{}), cmpopts.IgnoreUnexported(pkg.Package{}), cmpopts.IgnoreUnexported(pkg.LicenseSet{}), + cmpopts.IgnoreFields(pkg.Package{}, "SupplementalData"), // this is used by decoders to store additional data from the original format cmpopts.IgnoreFields(sbom.Artifacts{}, "FileMetadata", "FileDigests"), ); diff != "" { t.Fatalf("packages do not match:\n%s", diff) @@ -664,7 +670,7 @@ func Test_directPackageFiles(t *testing.T) { Packages: []*spdx.Package{ { PackageName: "some-package", - PackageSPDXIdentifier: "1", + PackageSPDXIdentifier: "1", // important! PackageVersion: "1.0.5", Files: []*spdx.File{ { @@ -686,10 +692,11 @@ func Test_directPackageFiles(t *testing.T) { require.NoError(t, err) p := pkg.Package{ - Name: "some-package", - Version: "1.0.5", + Name: "some-package", + Version: "1.0.5", + SupplementalData: []any{artifact.ID("1")}, // set by the decoders from the original element ID } - p.SetID() + p.OverrideID("1") // the same as the spdxID on the package element f := file.Location{ LocationData: file.LocationData{ Coordinates: file.Coordinates{ diff --git a/syft/format/cyclonedxjson/encoder_test.go b/syft/format/cyclonedxjson/encoder_test.go index 90af61412..00ece4a8a 100644 --- a/syft/format/cyclonedxjson/encoder_test.go +++ b/syft/format/cyclonedxjson/encoder_test.go @@ -3,6 +3,7 @@ package cyclonedxjson import ( "bytes" "flag" + "regexp" "strings" "testing" @@ -116,6 +117,14 @@ func TestCycloneDxImageEncoder(t *testing.T) { func redactor(values ...string) testutil.Redactor { return testutil.NewRedactions(). WithValuesRedacted(values...). + WithPatternRedactorSpec( + testutil.PatternReplacement{ + // only the source component bom-ref (not package or other component bom-refs) + Search: regexp.MustCompile(`"component": \{[^}]*"bom-ref":\s*"(?P.+)"[^}]*}`), + Groups: []string{"redact"}, // use the regex to anchore the search, but only replace bytes within the capture group + Replace: "redacted", + }, + ). WithPatternRedactors( map[string]string{ // UUIDs @@ -126,9 +135,6 @@ func redactor(values ...string) testutil.Redactor { // image hashes `sha256:[A-Fa-f0-9]{64}`: `sha256:redacted`, - - // BOM refs - `"bom-ref":\s*"[^"]+"`: `"bom-ref":"redacted"`, }, ) } diff --git a/syft/format/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden b/syft/format/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden index e4e7bd752..264e3ac9c 100644 --- a/syft/format/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden +++ b/syft/format/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden @@ -17,14 +17,14 @@ ] }, "component": { - "bom-ref":"redacted", + "bom-ref": "redacted", "type": "file", "name": "some/path" } }, "components": [ { - "bom-ref":"redacted", + "bom-ref": "4dd25c6ee16b729a", "type": "library", "name": "package-1", "version": "1.0.1", @@ -61,7 +61,7 @@ ] }, { - "bom-ref":"redacted", + "bom-ref": "pkg:deb/debian/package-2@2.0.1?package-id=39392bb5e270f669", "type": "library", "name": "package-2", "version": "2.0.1", @@ -91,7 +91,7 @@ ] }, { - "bom-ref":"redacted", + "bom-ref": "os:debian@1.2.3", "type": "operating-system", "name": "debian", "version": "1.2.3", diff --git a/syft/format/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden b/syft/format/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden index 62750e9e6..3237b30d9 100644 --- a/syft/format/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden +++ b/syft/format/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden @@ -17,7 +17,7 @@ ] }, "component": { - "bom-ref":"redacted", + "bom-ref": "redacted", "type": "container", "name": "user-image-input", "version": "sha256:redacted" @@ -25,7 +25,7 @@ }, "components": [ { - "bom-ref":"redacted", + "bom-ref": "72567175418f73f8", "type": "library", "name": "package-1", "version": "1.0.1", @@ -66,7 +66,7 @@ ] }, { - "bom-ref":"redacted", + "bom-ref": "pkg:deb/debian/package-2@2.0.1?package-id=4b756c6f6fb127a3", "type": "library", "name": "package-2", "version": "2.0.1", @@ -100,7 +100,7 @@ ] }, { - "bom-ref":"redacted", + "bom-ref": "os:debian@1.2.3", "type": "operating-system", "name": "debian", "version": "1.2.3", diff --git a/syft/format/cyclonedxxml/encoder_test.go b/syft/format/cyclonedxxml/encoder_test.go index cb0ae4a3b..fec64142e 100644 --- a/syft/format/cyclonedxxml/encoder_test.go +++ b/syft/format/cyclonedxxml/encoder_test.go @@ -90,16 +90,24 @@ func TestCycloneDxImageEncoder(t *testing.T) { func redactor(values ...string) testutil.Redactor { return testutil.NewRedactions(). WithValuesRedacted(values...). + WithPatternRedactorSpec( + testutil.PatternReplacement{ + // only the source component bom-ref (not package or other component bom-refs) + Search: regexp.MustCompile(``), + Groups: []string{"redact"}, // use the regex to anchore the search, but only replace bytes within the capture group + Replace: "redacted", + }, + ). WithPatternRedactors( map[string]string{ // dates `([0-9]+)-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])[Tt]([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9]|60)(\.[0-9]+)?(([Zz])|([+|\-]([01][0-9]|2[0-3]):[0-5][0-9]))`: `redacted`, - // image hashes and BOM refs + // image hashes `sha256:[A-Za-z0-9]{64}`: `sha256:redacted`, - // serial numbers and BOM refs - `(serialNumber|bom-ref)="[^"]+"`: `$1="redacted"`, + // serial numbers + `(serialNumber)="[^"]+"`: `$1="redacted"`, }, ) } diff --git a/syft/format/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden b/syft/format/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden index 9a9f7bce8..57a48832c 100644 --- a/syft/format/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden +++ b/syft/format/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden @@ -16,7 +16,7 @@ - + package-1 1.0.1 @@ -34,7 +34,7 @@ /some/path/pkg1 - + package-2 2.0.1 cpe:2.3:*:some:package:2:*:*:*:*:*:*:* @@ -47,7 +47,7 @@ 0 - + debian 1.2.3 debian diff --git a/syft/format/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden b/syft/format/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden index 12c99a5fa..7c55b107a 100644 --- a/syft/format/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden +++ b/syft/format/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden @@ -11,13 +11,13 @@ - + user-image-input sha256:redacted - + package-1 1.0.1 @@ -36,7 +36,7 @@ /somefile-1.txt - + package-2 2.0.1 cpe:2.3:*:some:package:2:*:*:*:*:*:*:* @@ -50,7 +50,7 @@ 0 - + debian 1.2.3 debian diff --git a/syft/format/internal/cyclonedxutil/helpers/component.go b/syft/format/internal/cyclonedxutil/helpers/component.go index 526094ac5..e04515ff1 100644 --- a/syft/format/internal/cyclonedxutil/helpers/component.go +++ b/syft/format/internal/cyclonedxutil/helpers/component.go @@ -6,6 +6,7 @@ import ( "github.com/CycloneDX/cyclonedx-go" "github.com/anchore/packageurl-go" + "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/internal/packagemetadata" "github.com/anchore/syft/syft/pkg" @@ -84,12 +85,13 @@ func decodeComponent(c *cyclonedx.Component) *pkg.Package { } p := &pkg.Package{ - Name: c.Name, - Version: c.Version, - Locations: decodeLocations(values), - Licenses: pkg.NewLicenseSet(decodeLicenses(c)...), - CPEs: decodeCPEs(c), - PURL: c.PackageURL, + Name: c.Name, + Version: c.Version, + Locations: decodeLocations(values), + Licenses: pkg.NewLicenseSet(decodeLicenses(c)...), + CPEs: decodeCPEs(c), + PURL: c.PackageURL, + SupplementalData: []any{artifact.ID(c.BOMRef)}, } DecodeInto(p, values, "syft:package", CycloneDXFields) diff --git a/syft/format/internal/cyclonedxutil/helpers/decoder.go b/syft/format/internal/cyclonedxutil/helpers/decoder.go index c4c706e38..3f30d04b6 100644 --- a/syft/format/internal/cyclonedxutil/helpers/decoder.go +++ b/syft/format/internal/cyclonedxutil/helpers/decoder.go @@ -70,7 +70,6 @@ func collectPackages(component *cyclonedx.Component, s *sbom.SBOM, idMap map[str if syftID != "" { idMap[syftID] = p } - // TODO there must be a better way than needing to call this manually: p.SetID() s.Artifacts.Packages.Add(*p) } diff --git a/syft/format/internal/testutil/redactor.go b/syft/format/internal/testutil/redactor.go index 7dab19359..ef5d1f67c 100644 --- a/syft/format/internal/testutil/redactor.go +++ b/syft/format/internal/testutil/redactor.go @@ -28,6 +28,7 @@ func (r RedactorFn) Redact(b []byte) []byte { type PatternReplacement struct { Search *regexp.Regexp + Groups []string Replace string } @@ -39,7 +40,67 @@ func NewPatternReplacement(r *regexp.Regexp) PatternReplacement { } func (p PatternReplacement) Redact(b []byte) []byte { - return p.Search.ReplaceAll(b, []byte(p.Replace)) + if len(p.Groups) == 0 { + return p.Search.ReplaceAll(b, []byte(p.Replace)) + } + + return p.redactNamedGroups(b) +} + +func (p PatternReplacement) redactNamedGroups(b []byte) []byte { + groupsToReplace := make(map[string]bool) + for _, g := range p.Groups { + groupsToReplace[g] = true + } + + subexpNames := p.Search.SubexpNames() + + return p.Search.ReplaceAllFunc(b, func(match []byte) []byte { + indexes := p.Search.FindSubmatchIndex(match) + if indexes == nil { + return match + } + + result := make([]byte, len(match)) + copy(result, match) + + // keep track of the offset as we replace groups + offset := 0 + + // process each named group + for i, name := range subexpNames { + // skip the full match (i==0) and groups we don't want to replace + if i == 0 || !groupsToReplace[name] { + continue + } + + // get the start and end positions of this group + startPos := indexes[2*i] + endPos := indexes[2*i+1] + + // skip if the group didn't match + if startPos < 0 || endPos < 0 { + continue + } + + // adjust positions based on previous replacements + startPos += offset + endPos += offset + + // replace the group with our replacement text + beforeGroup := result[:startPos] + afterGroup := result[endPos:] + + // calculate the new offset + oldLen := endPos - startPos + newLen := len(p.Replace) + offset += (newLen - oldLen) + + result = append(beforeGroup, append([]byte(p.Replace), afterGroup...)...) //nolint:gocritic + } + + return result + }) } // Replace by value ////////////////////////////// @@ -86,6 +147,13 @@ func (r *Redactions) WithPatternRedactors(values map[string]string) *Redactions return r } +func (r *Redactions) WithPatternRedactorSpec(values ...PatternReplacement) *Redactions { + for _, v := range values { + r.redactors = append(r.redactors, v) + } + return r +} + func (r *Redactions) WithValueRedactors(values map[string]string) *Redactions { for k, v := range values { r.redactors = append(r.redactors, diff --git a/syft/pkg/cataloger/bitnami/cataloger_test.go b/syft/pkg/cataloger/bitnami/cataloger_test.go index 38c1eb823..32ef0856b 100644 --- a/syft/pkg/cataloger/bitnami/cataloger_test.go +++ b/syft/pkg/cataloger/bitnami/cataloger_test.go @@ -3,6 +3,8 @@ package bitnami import ( "testing" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/anchore/syft/syft/artifact" @@ -493,6 +495,28 @@ func TestBitnamiCataloger(t *testing.T) { t.Run(tt.name, func(t *testing.T) { pkgtest.NewCatalogTester(). FromDirectory(t, tt.fixture). + WithCompareOptions(cmpopts.IgnoreFields(pkg.Package{}, "SupplementalData")). + ExpectsAssertion( + func(t *testing.T, pkgs []pkg.Package, relationships []artifact.Relationship) { + for _, p := range pkgs { + // assert there are supplemental data as artifact.ID and ID() matches + assert.NotEmpty(t, p.SupplementalData) + var id artifact.ID + for _, data := range p.SupplementalData { + switch d := data.(type) { + case artifact.ID: + id = d + break + case artifact.Identifiable: + id = d.ID() + break + } + } + assert.NotEmpty(t, id) + assert.Equal(t, p.ID(), id) + } + }, + ). Expects(tt.wantPkgs, tt.wantRelationships). WithErrorAssertion(tt.wantErr). TestCataloger(t, NewCataloger()) diff --git a/syft/pkg/package.go b/syft/pkg/package.go index 297e5aedb..8dacdc945 100644 --- a/syft/pkg/package.go +++ b/syft/pkg/package.go @@ -17,17 +17,18 @@ import ( // Package represents an application or library that has been bundled into a distributable format. // TODO: if we ignore FoundBy for ID generation should we merge the field to show it was found in two places? type Package struct { - id artifact.ID `hash:"ignore"` - Name string // the package name - Version string // the version of the package - FoundBy string `hash:"ignore" cyclonedx:"foundBy"` // the specific cataloger that discovered this package - Locations file.LocationSet // the locations that lead to the discovery of this package (note: this is not necessarily the locations that make up this package) - Licenses LicenseSet // licenses discovered with the package metadata - Language Language `hash:"ignore" cyclonedx:"language"` // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc) - Type Type `cyclonedx:"type"` // the package type (e.g. Npm, Yarn, Python, Rpm, Deb, etc) - CPEs []cpe.CPE `hash:"ignore"` // all possible Common Platform Enumerators (note: this is NOT included in the definition of the ID since all fields on a CPE are derived from other fields) - PURL string `hash:"ignore"` // the Package URL (see https://github.com/package-url/purl-spec) - Metadata interface{} // additional data found while parsing the package source + id artifact.ID `hash:"ignore"` + Name string // the package name + Version string // the version of the package + FoundBy string `hash:"ignore" cyclonedx:"foundBy"` // the specific cataloger that discovered this package + Locations file.LocationSet // the locations that lead to the discovery of this package (note: this is not necessarily the locations that make up this package) + Licenses LicenseSet // licenses discovered with the package metadata + Language Language `hash:"ignore" cyclonedx:"language"` // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc) + Type Type `cyclonedx:"type"` // the package type (e.g. Npm, Yarn, Python, Rpm, Deb, etc) + CPEs []cpe.CPE `hash:"ignore"` // all possible Common Platform Enumerators (note: this is NOT included in the definition of the ID since all fields on a CPE are derived from other fields) + PURL string `hash:"ignore"` // the Package URL (see https://github.com/package-url/purl-spec) + Metadata any // additional data found while parsing the package source + SupplementalData []any `hash:"ignore"` // additional data that is not part of the package metadata nor expressed in output formats } func (p *Package) OverrideID(id artifact.ID) { @@ -35,6 +36,24 @@ func (p *Package) OverrideID(id artifact.ID) { } func (p *Package) SetID() { + for _, data := range p.SupplementalData { + switch d := data.(type) { + case artifact.ID: + if d == "" { + continue + } + p.id = d + return + case artifact.Identifiable: + id := d.ID() + if id == "" { + continue + } + p.id = id + return + } + } + id, err := artifact.IDByHash(p) if err != nil { // TODO: what to do in this case?