diff --git a/internal/anchore/import_package_sbom_test.go b/internal/anchore/import_package_sbom_test.go index c6939c91f..3628a07c4 100644 --- a/internal/anchore/import_package_sbom_test.go +++ b/internal/anchore/import_package_sbom_test.go @@ -56,14 +56,14 @@ func sbomFixture() sbom.SBOM { Name: "name", Version: "version", FoundBy: "foundBy", - Locations: []source.Location{ - { + Locations: source.NewLocationSet( + source.Location{ Coordinates: source.Coordinates{ RealPath: "path", FileSystemID: "layerID", }, }, - }, + ), Licenses: []string{"license"}, Language: pkg.Python, Type: pkg.PythonPkg, diff --git a/internal/formats/common/cyclonedxhelpers/component.go b/internal/formats/common/cyclonedxhelpers/component.go index 0815fff16..949d6d004 100644 --- a/internal/formats/common/cyclonedxhelpers/component.go +++ b/internal/formats/common/cyclonedxhelpers/component.go @@ -13,8 +13,9 @@ import ( func encodeComponent(p pkg.Package) cyclonedx.Component { props := encodeProperties(p, "syft:package") props = append(props, encodeCPEs(p)...) - if len(p.Locations) > 0 { - props = append(props, encodeProperties(p.Locations, "syft:location")...) + locations := p.Locations.ToSlice() + if len(locations) > 0 { + props = append(props, encodeProperties(locations, "syft:location")...) } if hasMetadata(p) { props = append(props, encodeProperties(p.Metadata, "syft:metadata")...) @@ -73,10 +74,13 @@ func decodeComponent(c *cyclonedx.Component) *pkg.Package { return p } -func decodeLocations(vals map[string]string) []source.Location { +func decodeLocations(vals map[string]string) source.LocationSet { v := common.Decode(reflect.TypeOf([]source.Location{}), vals, "syft:location", CycloneDXFields) - out, _ := v.([]source.Location) - return out + out, ok := v.([]source.Location) + if !ok { + out = nil + } + return source.NewLocationSet(out...) } func decodePackageMetadata(vals map[string]string, c *cyclonedx.Component, typ pkg.MetadataType) interface{} { diff --git a/internal/formats/common/cyclonedxhelpers/component_test.go b/internal/formats/common/cyclonedxhelpers/component_test.go index b267abe6d..4a6dc5051 100644 --- a/internal/formats/common/cyclonedxhelpers/component_test.go +++ b/internal/formats/common/cyclonedxhelpers/component_test.go @@ -26,9 +26,9 @@ func Test_encodeComponentProperties(t *testing.T) { name: "from apk", input: pkg.Package{ FoundBy: "cataloger", - Locations: []source.Location{ - {Coordinates: source.Coordinates{RealPath: "test"}}, - }, + Locations: source.NewLocationSet( + source.Location{Coordinates: source.Coordinates{RealPath: "test"}}, + ), Metadata: pkg.ApkMetadata{ Package: "libc-utils", OriginPackage: "libc-dev", diff --git a/internal/formats/common/spdxhelpers/source_info.go b/internal/formats/common/spdxhelpers/source_info.go index ed5e2600b..1806f72df 100644 --- a/internal/formats/common/spdxhelpers/source_info.go +++ b/internal/formats/common/spdxhelpers/source_info.go @@ -35,7 +35,7 @@ func SourceInfo(p pkg.Package) string { answer = "acquired package info from the following paths" } var paths []string - for _, l := range p.Locations { + for _, l := range p.Locations.ToSlice() { paths = append(paths, l.RealPath) } diff --git a/internal/formats/common/spdxhelpers/source_info_test.go b/internal/formats/common/spdxhelpers/source_info_test.go index 56f6c5bbd..4016ad2be 100644 --- a/internal/formats/common/spdxhelpers/source_info_test.go +++ b/internal/formats/common/spdxhelpers/source_info_test.go @@ -18,10 +18,10 @@ func Test_SourceInfo(t *testing.T) { name: "locations are captured", input: pkg.Package{ // note: no type given - Locations: []source.Location{ + Locations: source.NewLocationSet( source.NewVirtualLocation("/a-place", "/b-place"), source.NewVirtualLocation("/c-place", "/d-place"), - }, + ), }, expected: []string{ "from the following paths", diff --git a/internal/formats/common/testutils/utils.go b/internal/formats/common/testutils/utils.go index 6b5b5f670..809889456 100644 --- a/internal/formats/common/testutils/utils.go +++ b/internal/formats/common/testutils/utils.go @@ -157,9 +157,9 @@ func populateImageCatalog(catalog *pkg.Catalog, img *image.Image) { catalog.Add(pkg.Package{ Name: "package-1", Version: "1.0.1", - Locations: []source.Location{ + Locations: source.NewLocationSet( source.NewLocationFromImage(string(ref1.RealPath), *ref1, img), - }, + ), Type: pkg.PythonPkg, FoundBy: "the-cataloger-1", Language: pkg.Python, @@ -177,9 +177,9 @@ func populateImageCatalog(catalog *pkg.Catalog, img *image.Image) { catalog.Add(pkg.Package{ Name: "package-2", Version: "2.0.1", - Locations: []source.Location{ + Locations: source.NewLocationSet( source.NewLocationFromImage(string(ref2.RealPath), *ref2, img), - }, + ), Type: pkg.DebPkg, FoundBy: "the-cataloger-2", MetadataType: pkg.DpkgMetadataType, @@ -234,9 +234,9 @@ func newDirectoryCatalog() *pkg.Catalog { Version: "1.0.1", Type: pkg.PythonPkg, FoundBy: "the-cataloger-1", - Locations: []source.Location{ + Locations: source.NewLocationSet( source.NewLocation("/some/path/pkg1"), - }, + ), Language: pkg.Python, MetadataType: pkg.PythonPackageMetadataType, Licenses: []string{"MIT"}, @@ -259,9 +259,9 @@ func newDirectoryCatalog() *pkg.Catalog { Version: "2.0.1", Type: pkg.DebPkg, FoundBy: "the-cataloger-2", - Locations: []source.Location{ + Locations: source.NewLocationSet( source.NewLocation("/some/path/pkg1"), - }, + ), MetadataType: pkg.DpkgMetadataType, Metadata: pkg.DpkgMetadata{ Package: "package-2", diff --git a/internal/formats/github/encoder.go b/internal/formats/github/encoder.go index 967c87e64..2e39f7c10 100644 --- a/internal/formats/github/encoder.go +++ b/internal/formats/github/encoder.go @@ -58,8 +58,9 @@ func toSnapshotMetadata(s *sbom.SBOM) Metadata { } func filesystem(p pkg.Package) string { - if len(p.Locations) > 0 { - return p.Locations[0].FileSystemID + locations := p.Locations.ToSlice() + if len(locations) > 0 { + return locations[0].FileSystemID } return "" } @@ -76,8 +77,9 @@ func toPath(s source.Metadata, p pkg.Package) string { if inputPath == "." { inputPath = "" } - if len(p.Locations) > 0 { - location := p.Locations[0] + locations := p.Locations.ToSlice() + if len(locations) > 0 { + location := locations[0] packagePath := location.RealPath if location.VirtualPath != "" { packagePath = location.VirtualPath diff --git a/internal/formats/github/encoder_test.go b/internal/formats/github/encoder_test.go index a08f72ce4..91ac83769 100644 --- a/internal/formats/github/encoder_test.go +++ b/internal/formats/github/encoder_test.go @@ -35,32 +35,38 @@ func Test_toGithubModel(t *testing.T) { { Name: "pkg-1", Version: "1.0.1", - Locations: []source.Location{{ - Coordinates: source.Coordinates{ - RealPath: "/usr/lib", - FileSystemID: "fsid-1", + Locations: source.NewLocationSet( + source.Location{ + Coordinates: source.Coordinates{ + RealPath: "/usr/lib", + FileSystemID: "fsid-1", + }, }, - }}, + ), }, { Name: "pkg-2", Version: "2.0.2", - Locations: []source.Location{{ - Coordinates: source.Coordinates{ - RealPath: "/usr/lib", - FileSystemID: "fsid-1", + Locations: source.NewLocationSet( + source.Location{ + Coordinates: source.Coordinates{ + RealPath: "/usr/lib", + FileSystemID: "fsid-1", + }, }, - }}, + ), }, { Name: "pkg-3", Version: "3.0.3", - Locations: []source.Location{{ - Coordinates: source.Coordinates{ - RealPath: "/etc", - FileSystemID: "fsid-1", + Locations: source.NewLocationSet( + source.Location{ + Coordinates: source.Coordinates{ + RealPath: "/etc", + FileSystemID: "fsid-1", + }, }, - }}, + ), }, } { p.PURL = packageurl.NewPackageURL( diff --git a/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONDirectoryEncoder.golden b/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONDirectoryEncoder.golden index c0eb44cd3..22f7729c7 100644 --- a/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONDirectoryEncoder.golden +++ b/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONDirectoryEncoder.golden @@ -3,18 +3,18 @@ "name": "/some/path", "spdxVersion": "SPDX-2.2", "creationInfo": { - "created": "2021-12-20T19:12:47.869816Z", + "created": "2022-03-30T21:48:28.297464Z", "creators": [ "Organization: Anchore, Inc", "Tool: syft-[not provided]" ], - "licenseListVersion": "3.15" + "licenseListVersion": "3.16" }, "dataLicense": "CC0-1.0", - "documentNamespace": "https://anchore.com/syft/dir/some/path-4b896ded-7852-4e31-b764-136b53bdf346", + "documentNamespace": "https://anchore.com/syft/dir/some/path-e188d59b-76f6-4c7f-a9f2-1ae7d0577781", "packages": [ { - "SPDXID": "SPDXRef-1d97af55efe9512f", + "SPDXID": "SPDXRef-b85dbb4e6ece5082", "name": "package-1", "licenseConcluded": "MIT", "downloadLocation": "NOASSERTION", @@ -36,7 +36,7 @@ "versionInfo": "1.0.1" }, { - "SPDXID": "SPDXRef-ad3d1c4abd84bf75", + "SPDXID": "SPDXRef-ceda99598967ae8d", "name": "package-2", "licenseConcluded": "NONE", "downloadLocation": "NOASSERTION", diff --git a/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONImageEncoder.golden b/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONImageEncoder.golden index e44ef6cf1..7e97a75fe 100644 --- a/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONImageEncoder.golden +++ b/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONImageEncoder.golden @@ -3,18 +3,18 @@ "name": "user-image-input", "spdxVersion": "SPDX-2.2", "creationInfo": { - "created": "2021-12-20T19:13:07.647486Z", + "created": "2022-03-30T21:48:28.303986Z", "creators": [ "Organization: Anchore, Inc", "Tool: syft-[not provided]" ], - "licenseListVersion": "3.15" + "licenseListVersion": "3.16" }, "dataLicense": "CC0-1.0", - "documentNamespace": "https://anchore.com/syft/image/user-image-input-174da656-1824-4bd3-8604-28919f8a65bc", + "documentNamespace": "https://anchore.com/syft/image/user-image-input-9e4f4190-c5ae-4e31-a852-d1ab71357516", "packages": [ { - "SPDXID": "SPDXRef-d16127444133b5c1", + "SPDXID": "SPDXRef-2a46171f91c8d4bc", "name": "package-1", "licenseConcluded": "MIT", "downloadLocation": "NOASSERTION", @@ -36,7 +36,7 @@ "versionInfo": "1.0.1" }, { - "SPDXID": "SPDXRef-24907357f3705420", + "SPDXID": "SPDXRef-ae77680e9b1d087e", "name": "package-2", "licenseConcluded": "NONE", "downloadLocation": "NOASSERTION", diff --git a/internal/formats/spdx22json/test-fixtures/snapshot/stereoscope-fixture-image-simple.golden b/internal/formats/spdx22json/test-fixtures/snapshot/stereoscope-fixture-image-simple.golden index c483fa49b..c1b1d2b79 100644 Binary files a/internal/formats/spdx22json/test-fixtures/snapshot/stereoscope-fixture-image-simple.golden and b/internal/formats/spdx22json/test-fixtures/snapshot/stereoscope-fixture-image-simple.golden differ diff --git a/internal/formats/spdx22tagvalue/test-fixtures/snapshot/TestSPDXTagValueDirectoryEncoder.golden b/internal/formats/spdx22tagvalue/test-fixtures/snapshot/TestSPDXTagValueDirectoryEncoder.golden index 84a7de316..7959c2f0d 100644 --- a/internal/formats/spdx22tagvalue/test-fixtures/snapshot/TestSPDXTagValueDirectoryEncoder.golden +++ b/internal/formats/spdx22tagvalue/test-fixtures/snapshot/TestSPDXTagValueDirectoryEncoder.golden @@ -2,16 +2,16 @@ SPDXVersion: SPDX-2.2 DataLicense: CC0-1.0 SPDXID: SPDXRef-DOCUMENT DocumentName: /some/path -DocumentNamespace: https://anchore.com/syft/dir/some/path-4b90f56d-d596-4ad8-b6a5-17f7d801350d +DocumentNamespace: https://anchore.com/syft/dir/some/path-71aa3553-1a73-405f-9f1f-6347d6d4593b LicenseListVersion: 3.16 Creator: Organization: Anchore, Inc Creator: Tool: syft-[not provided] -Created: 2022-02-10T21:09:27Z +Created: 2022-03-30T21:48:22Z ##### Package: package-2 PackageName: package-2 -SPDXID: SPDXRef-Package-deb-package-2-ad3d1c4abd84bf75 +SPDXID: SPDXRef-Package-deb-package-2-ceda99598967ae8d PackageVersion: 2.0.1 PackageDownloadLocation: NOASSERTION FilesAnalyzed: false @@ -24,7 +24,7 @@ ExternalRef: PACKAGE_MANAGER purl a-purl-2 ##### Package: package-1 PackageName: package-1 -SPDXID: SPDXRef-Package-python-package-1-1d97af55efe9512f +SPDXID: SPDXRef-Package-python-package-1-b85dbb4e6ece5082 PackageVersion: 1.0.1 PackageDownloadLocation: NOASSERTION FilesAnalyzed: false diff --git a/internal/formats/spdx22tagvalue/test-fixtures/snapshot/TestSPDXTagValueImageEncoder.golden b/internal/formats/spdx22tagvalue/test-fixtures/snapshot/TestSPDXTagValueImageEncoder.golden index f54713441..4d9011b1c 100644 --- a/internal/formats/spdx22tagvalue/test-fixtures/snapshot/TestSPDXTagValueImageEncoder.golden +++ b/internal/formats/spdx22tagvalue/test-fixtures/snapshot/TestSPDXTagValueImageEncoder.golden @@ -2,16 +2,16 @@ SPDXVersion: SPDX-2.2 DataLicense: CC0-1.0 SPDXID: SPDXRef-DOCUMENT DocumentName: user-image-input -DocumentNamespace: https://anchore.com/syft/image/user-image-input-26a2def6-53d0-4504-b99a-a046832508ac +DocumentNamespace: https://anchore.com/syft/image/user-image-input-e46e20f4-43a4-40e7-9f82-fd55b8a89e5f LicenseListVersion: 3.16 Creator: Organization: Anchore, Inc Creator: Tool: syft-[not provided] -Created: 2022-02-10T21:09:27Z +Created: 2022-03-30T21:48:22Z ##### Package: package-2 PackageName: package-2 -SPDXID: SPDXRef-Package-deb-package-2-73f796c846875b9e +SPDXID: SPDXRef-Package-deb-package-2-ae77680e9b1d087e PackageVersion: 2.0.1 PackageDownloadLocation: NOASSERTION FilesAnalyzed: false @@ -24,7 +24,7 @@ ExternalRef: PACKAGE_MANAGER purl a-purl-2 ##### Package: package-1 PackageName: package-1 -SPDXID: SPDXRef-Package-python-package-1-d9527e708c11f8b9 +SPDXID: SPDXRef-Package-python-package-1-2a46171f91c8d4bc PackageVersion: 1.0.1 PackageDownloadLocation: NOASSERTION FilesAnalyzed: false diff --git a/internal/formats/syftjson/encoder_test.go b/internal/formats/syftjson/encoder_test.go index 10d2576ca..9d8087d5e 100644 --- a/internal/formats/syftjson/encoder_test.go +++ b/internal/formats/syftjson/encoder_test.go @@ -42,13 +42,13 @@ func TestEncodeFullJSONDocument(t *testing.T) { p1 := pkg.Package{ Name: "package-1", Version: "1.0.1", - Locations: []source.Location{ - { + Locations: source.NewLocationSet( + source.Location{ Coordinates: source.Coordinates{ RealPath: "/a/place/a", }, }, - }, + ), Type: pkg.PythonPkg, FoundBy: "the-cataloger-1", Language: pkg.Python, @@ -68,13 +68,13 @@ func TestEncodeFullJSONDocument(t *testing.T) { p2 := pkg.Package{ Name: "package-2", Version: "2.0.1", - Locations: []source.Location{ - { + Locations: source.NewLocationSet( + source.Location{ Coordinates: source.Coordinates{ RealPath: "/b/place/b", }, }, - }, + ), Type: pkg.DebPkg, FoundBy: "the-cataloger-2", MetadataType: pkg.DpkgMetadataType, diff --git a/internal/formats/syftjson/test-fixtures/snapshot/TestDirectoryEncoder.golden b/internal/formats/syftjson/test-fixtures/snapshot/TestDirectoryEncoder.golden index 92b55b015..71d089e19 100644 --- a/internal/formats/syftjson/test-fixtures/snapshot/TestDirectoryEncoder.golden +++ b/internal/formats/syftjson/test-fixtures/snapshot/TestDirectoryEncoder.golden @@ -1,7 +1,7 @@ { "artifacts": [ { - "id": "1d97af55efe9512f", + "id": "b85dbb4e6ece5082", "name": "package-1", "version": "1.0.1", "type": "python", @@ -36,7 +36,7 @@ } }, { - "id": "ad3d1c4abd84bf75", + "id": "ceda99598967ae8d", "name": "package-2", "version": "2.0.1", "type": "deb", diff --git a/internal/formats/syftjson/test-fixtures/snapshot/TestEncodeFullJSONDocument.golden b/internal/formats/syftjson/test-fixtures/snapshot/TestEncodeFullJSONDocument.golden index 728667abd..2fe4ba0dc 100644 --- a/internal/formats/syftjson/test-fixtures/snapshot/TestEncodeFullJSONDocument.golden +++ b/internal/formats/syftjson/test-fixtures/snapshot/TestEncodeFullJSONDocument.golden @@ -1,7 +1,7 @@ { "artifacts": [ { - "id": "d9a7c58726ab4bef", + "id": "b3fa3ee64756b0c6", "name": "package-1", "version": "1.0.1", "type": "python", @@ -31,7 +31,7 @@ } }, { - "id": "ac462e450060da2c", + "id": "b324f4d9ee5413fe", "name": "package-2", "version": "2.0.1", "type": "deb", diff --git a/internal/formats/syftjson/test-fixtures/snapshot/TestImageEncoder.golden b/internal/formats/syftjson/test-fixtures/snapshot/TestImageEncoder.golden index baa8ed26a..a2f5673d0 100644 --- a/internal/formats/syftjson/test-fixtures/snapshot/TestImageEncoder.golden +++ b/internal/formats/syftjson/test-fixtures/snapshot/TestImageEncoder.golden @@ -1,7 +1,7 @@ { "artifacts": [ { - "id": "d9527e708c11f8b9", + "id": "2a46171f91c8d4bc", "name": "package-1", "version": "1.0.1", "type": "python", @@ -32,7 +32,7 @@ } }, { - "id": "73f796c846875b9e", + "id": "ae77680e9b1d087e", "name": "package-2", "version": "2.0.1", "type": "deb", diff --git a/internal/formats/syftjson/to_format_model.go b/internal/formats/syftjson/to_format_model.go index 7d4feee4a..ccf881a0d 100644 --- a/internal/formats/syftjson/to_format_model.go +++ b/internal/formats/syftjson/to_format_model.go @@ -175,8 +175,9 @@ func toPackageModel(p pkg.Package) model.Package { licenses = p.Licenses } - var coordinates = make([]source.Coordinates, len(p.Locations)) - for i, l := range p.Locations { + locations := p.Locations.ToSlice() + var coordinates = make([]source.Coordinates, len(locations)) + for i, l := range locations { coordinates[i] = l.Coordinates } diff --git a/internal/formats/syftjson/to_syft_model.go b/internal/formats/syftjson/to_syft_model.go index c8a40eb93..bac4df94b 100644 --- a/internal/formats/syftjson/to_syft_model.go +++ b/internal/formats/syftjson/to_syft_model.go @@ -53,7 +53,8 @@ func toSyftRelationships(doc *model.Document, catalog *pkg.Catalog, relationship for _, p := range catalog.Sorted() { idMap[string(p.ID())] = p - for _, l := range p.Locations { + locations := p.Locations.ToSlice() + for _, l := range locations { idMap[string(l.Coordinates.ID())] = l.Coordinates } } @@ -166,7 +167,7 @@ func toSyftPackage(p model.Package, idAliases map[string]string) pkg.Package { Name: p.Name, Version: p.Version, FoundBy: p.FoundBy, - Locations: locations, + Locations: source.NewLocationSet(locations...), Licenses: p.Licenses, Language: p.Language, Type: p.Type, diff --git a/syft/pkg/catalog.go b/syft/pkg/catalog.go index 1c8e8db56..0e4d1098e 100644 --- a/syft/pkg/catalog.go +++ b/syft/pkg/catalog.go @@ -10,11 +10,28 @@ import ( "github.com/jinzhu/copier" ) +type orderedIDSet struct { + slice []artifact.ID +} + +func (s *orderedIDSet) add(ids ...artifact.ID) { +loopNewIDs: + for _, newID := range ids { + for _, existingID := range s.slice { + if existingID == newID { + continue loopNewIDs + } + } + s.slice = append(s.slice, newID) + } +} + // Catalog represents a collection of Packages. type Catalog struct { byID map[artifact.ID]Package - idsByType map[Type][]artifact.ID - idsByPath map[string][]artifact.ID // note: this is real path or virtual path + idsByName map[string]orderedIDSet + idsByType map[Type]orderedIDSet + idsByPath map[string]orderedIDSet // note: this is real path or virtual path lock sync.RWMutex } @@ -22,8 +39,9 @@ type Catalog struct { func NewCatalog(pkgs ...Package) *Catalog { catalog := Catalog{ byID: make(map[artifact.ID]Package), - idsByType: make(map[Type][]artifact.ID), - idsByPath: make(map[string][]artifact.ID), + idsByName: make(map[string]orderedIDSet), + idsByType: make(map[Type]orderedIDSet), + idsByPath: make(map[string]orderedIDSet), } for _, p := range pkgs { @@ -55,7 +73,12 @@ func (c *Catalog) Package(id artifact.ID) *Package { // PackagesByPath returns all packages that were discovered from the given path. func (c *Catalog) PackagesByPath(path string) []Package { - return c.Packages(c.idsByPath[path]) + return c.Packages(c.idsByPath[path].slice) +} + +// PackagesByName returns all packages that were discovered with a matching name. +func (c *Catalog) PackagesByName(name string) []Package { + return c.Packages(c.idsByName[name].slice) } // Packages returns all packages for the given ID. @@ -81,26 +104,58 @@ func (c *Catalog) Add(p Package) { id = p.ID() } - // store by package ID - c.byID[id] = p + if existing, exists := c.byID[id]; exists { + // there is already a package with this fingerprint merge the existing record with the new one + if err := existing.merge(p); err != nil { + log.Warnf("failed to merge packages: %+v", err) + } else { + c.addPathsToIndex(p) + } + return + } - // store by package type - c.idsByType[p.Type] = append(c.idsByType[p.Type], id) + c.addToIndex(p) +} - // store by file location paths +func (c *Catalog) addToIndex(p Package) { + c.byID[p.id] = p + c.addNameToIndex(p) + c.addTypeToIndex(p) + c.addPathsToIndex(p) +} + +func (c *Catalog) addNameToIndex(p Package) { + nameIndex := c.idsByName[p.Name] + nameIndex.add(p.id) + c.idsByName[p.Name] = nameIndex +} + +func (c *Catalog) addTypeToIndex(p Package) { + typeIndex := c.idsByType[p.Type] + typeIndex.add(p.id) + c.idsByType[p.Type] = typeIndex +} + +func (c *Catalog) addPathsToIndex(p Package) { observedPaths := internal.NewStringSet() - for _, l := range p.Locations { + for _, l := range p.Locations.ToSlice() { if l.RealPath != "" && !observedPaths.Contains(l.RealPath) { - c.idsByPath[l.RealPath] = append(c.idsByPath[l.RealPath], id) + c.addPathToIndex(p.id, l.RealPath) observedPaths.Add(l.RealPath) } if l.VirtualPath != "" && l.RealPath != l.VirtualPath && !observedPaths.Contains(l.VirtualPath) { - c.idsByPath[l.VirtualPath] = append(c.idsByPath[l.VirtualPath], id) + c.addPathToIndex(p.id, l.VirtualPath) observedPaths.Add(l.VirtualPath) } } } +func (c *Catalog) addPathToIndex(id artifact.ID, path string) { + pathIndex := c.idsByPath[path] + pathIndex.add(id) + c.idsByPath[path] = pathIndex +} + // Enumerate all packages for the given type(s), enumerating all packages if no type is specified. func (c *Catalog) Enumerate(types ...Type) <-chan Package { channel := make(chan Package) @@ -124,7 +179,7 @@ func (c *Catalog) Enumerate(types ...Type) <-chan Package { continue } } - for _, id := range ids { + for _, id := range ids.slice { p := c.Package(id) if p != nil { channel <- *p @@ -145,8 +200,10 @@ func (c *Catalog) Sorted(types ...Type) (pkgs []Package) { sort.SliceStable(pkgs, func(i, j int) bool { if pkgs[i].Name == pkgs[j].Name { if pkgs[i].Version == pkgs[j].Version { - if pkgs[i].Type == pkgs[j].Type && len(pkgs[i].Locations) > 0 && len(pkgs[j].Locations) > 0 { - return pkgs[i].Locations[0].String() < pkgs[j].Locations[0].String() + iLocations := pkgs[i].Locations.ToSlice() + jLocations := pkgs[j].Locations.ToSlice() + if pkgs[i].Type == pkgs[j].Type && len(iLocations) > 0 && len(jLocations) > 0 { + return iLocations[0].String() < jLocations[0].String() } return pkgs[i].Type < pkgs[j].Type } diff --git a/syft/pkg/catalog_test.go b/syft/pkg/catalog_test.go index 005714c0c..43f52b87b 100644 --- a/syft/pkg/catalog_test.go +++ b/syft/pkg/catalog_test.go @@ -3,11 +3,11 @@ package pkg import ( "testing" - "github.com/stretchr/testify/assert" - - "github.com/scylladb/go-set/strset" - + "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/source" + "github.com/scylladb/go-set/strset" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) type expectedIndexes struct { @@ -19,17 +19,17 @@ func TestCatalogAddPopulatesIndex(t *testing.T) { var pkgs = []Package{ { - Locations: []source.Location{ + Locations: source.NewLocationSet( source.NewVirtualLocation("/a/path", "/another/path"), source.NewVirtualLocation("/b/path", "/bee/path"), - }, + ), Type: RpmPkg, }, { - Locations: []source.Location{ + Locations: source.NewLocationSet( source.NewVirtualLocation("/c/path", "/another/path"), source.NewVirtualLocation("/d/path", "/another/path"), - }, + ), Type: NpmPkg, }, } @@ -106,47 +106,169 @@ func assertIndexes(t *testing.T, c *Catalog, expectedIndexes expectedIndexes) { func TestCatalog_PathIndexDeduplicatesRealVsVirtualPaths(t *testing.T) { p1 := Package{ - Locations: []source.Location{ + Locations: source.NewLocationSet( source.NewVirtualLocation("/b/path", "/another/path"), source.NewVirtualLocation("/b/path", "/b/path"), - }, + ), Type: RpmPkg, Name: "Package-1", } p2 := Package{ - Locations: []source.Location{ + Locations: source.NewLocationSet( source.NewVirtualLocation("/b/path", "/b/path"), - }, + ), Type: RpmPkg, Name: "Package-2", } + p2Dup := Package{ + Locations: source.NewLocationSet( + source.NewVirtualLocation("/b/path", "/another/path"), + source.NewVirtualLocation("/b/path", "/c/path/b/dup"), + ), + Type: RpmPkg, + Name: "Package-2", + } + tests := []struct { - name string - pkg Package + name string + pkgs []Package + paths []string }{ { name: "multiple locations with shared path", - pkg: p1, + pkgs: []Package{p1}, + paths: []string{ + "/b/path", + "/another/path", + }, }, { name: "one location with shared path", - pkg: p2, + pkgs: []Package{p2}, + paths: []string{ + "/b/path", + }, + }, + { + name: "two instances with similar locations", + pkgs: []Package{p2, p2Dup}, + paths: []string{ + "/b/path", + "/another/path", + "/c/path/b/dup", // this updated the path index on merge + }, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - actual := NewCatalog(test.pkg).PackagesByPath("/b/path") - if len(actual) != 1 { - t.Errorf("expected exactly one package path, got %d", len(actual)) + for _, path := range test.paths { + actualPackages := NewCatalog(test.pkgs...).PackagesByPath(path) + require.Len(t, actualPackages, 1) } }) } } +func TestCatalog_MergeRecords(t *testing.T) { + var tests = []struct { + name string + pkgs []Package + expectedLocations []source.Location + }{ + { + name: "multiple Locations with shared path", + pkgs: []Package{ + { + Locations: source.NewLocationSet( + source.Location{ + Coordinates: source.Coordinates{ + RealPath: "/b/path", + FileSystemID: "a", + }, + VirtualPath: "/another/path", + }, + ), + Type: RpmPkg, + }, + { + Locations: source.NewLocationSet( + source.Location{ + Coordinates: source.Coordinates{ + RealPath: "/b/path", + FileSystemID: "b", + }, + VirtualPath: "/another/path", + }, + ), + Type: RpmPkg, + }, + }, + expectedLocations: []source.Location{ + { + Coordinates: source.Coordinates{ + RealPath: "/b/path", + FileSystemID: "a", + }, + VirtualPath: "/another/path", + }, + { + Coordinates: source.Coordinates{ + RealPath: "/b/path", + FileSystemID: "b", + }, + VirtualPath: "/another/path", + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actual := NewCatalog(tt.pkgs...).PackagesByPath("/b/path") + require.Len(t, actual, 1) + assert.Equal(t, tt.expectedLocations, actual[0].Locations.ToSlice()) + }) + } +} + func TestCatalog_EnumerateNilCatalog(t *testing.T) { var c *Catalog assert.Empty(t, c.Enumerate()) } + +func Test_idOrderedSet_add(t *testing.T) { + tests := []struct { + name string + input []artifact.ID + expected []artifact.ID + }{ + { + name: "elements deduplicated when added", + input: []artifact.ID{ + "1", "2", "3", "4", "1", "2", "3", "4", "1", "2", "3", "4", + }, + expected: []artifact.ID{ + "1", "2", "3", "4", + }, + }, + { + name: "elements retain ordering when added", + input: []artifact.ID{ + "4", "3", "2", "1", + }, + expected: []artifact.ID{ + "4", "3", "2", "1", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var s orderedIDSet + s.add(tt.input...) + assert.Equal(t, tt.expected, s.slice) + }) + } +} diff --git a/syft/pkg/cataloger/common/cpe/filter_test.go b/syft/pkg/cataloger/common/cpe/filter_test.go index 6200093e0..d57bc8b72 100644 --- a/syft/pkg/cataloger/common/cpe/filter_test.go +++ b/syft/pkg/cataloger/common/cpe/filter_test.go @@ -16,7 +16,7 @@ func Test_disallowJenkinsServerCPEForPluginPackage(t *testing.T) { }{ { name: "go case (filter out)", - cpe: mustCPE("cpe:2.3:a:name:jenkins:3.2:*:*:*:*:*:*:*"), + cpe: pkg.MustCPE("cpe:2.3:a:name:jenkins:3.2:*:*:*:*:*:*:*"), pkg: pkg.Package{ Type: pkg.JenkinsPluginPkg, }, @@ -24,7 +24,7 @@ func Test_disallowJenkinsServerCPEForPluginPackage(t *testing.T) { }, { name: "ignore jenkins plugins with unique name", - cpe: mustCPE("cpe:2.3:a:name:ci-jenkins:3.2:*:*:*:*:*:*:*"), + cpe: pkg.MustCPE("cpe:2.3:a:name:ci-jenkins:3.2:*:*:*:*:*:*:*"), pkg: pkg.Package{ Type: pkg.JenkinsPluginPkg, }, @@ -32,7 +32,7 @@ func Test_disallowJenkinsServerCPEForPluginPackage(t *testing.T) { }, { name: "ignore java packages", - cpe: mustCPE("cpe:2.3:a:name:jenkins:3.2:*:*:*:*:*:*:*"), + cpe: pkg.MustCPE("cpe:2.3:a:name:jenkins:3.2:*:*:*:*:*:*:*"), pkg: pkg.Package{ Type: pkg.JavaPkg, }, @@ -55,7 +55,7 @@ func Test_disallowJenkinsCPEsNotAssociatedWithJenkins(t *testing.T) { }{ { name: "filter out mismatched name (cloudbees vendor)", - cpe: mustCPE("cpe:2.3:a:cloudbees:jenkins:3.2:*:*:*:*:*:*:*"), + cpe: pkg.MustCPE("cpe:2.3:a:cloudbees:jenkins:3.2:*:*:*:*:*:*:*"), pkg: pkg.Package{ Name: "not-j*nkins", Type: pkg.JavaPkg, @@ -64,7 +64,7 @@ func Test_disallowJenkinsCPEsNotAssociatedWithJenkins(t *testing.T) { }, { name: "filter out mismatched name (jenkins vendor)", - cpe: mustCPE("cpe:2.3:a:jenkins:jenkins:3.2:*:*:*:*:*:*:*"), + cpe: pkg.MustCPE("cpe:2.3:a:jenkins:jenkins:3.2:*:*:*:*:*:*:*"), pkg: pkg.Package{ Name: "not-j*nkins", Type: pkg.JavaPkg, @@ -73,7 +73,7 @@ func Test_disallowJenkinsCPEsNotAssociatedWithJenkins(t *testing.T) { }, { name: "filter out mismatched name (any vendor)", - cpe: mustCPE("cpe:2.3:a:*:jenkins:3.2:*:*:*:*:*:*:*"), + cpe: pkg.MustCPE("cpe:2.3:a:*:jenkins:3.2:*:*:*:*:*:*:*"), pkg: pkg.Package{ Name: "not-j*nkins", Type: pkg.JavaPkg, @@ -82,7 +82,7 @@ func Test_disallowJenkinsCPEsNotAssociatedWithJenkins(t *testing.T) { }, { name: "ignore packages with the name jenkins", - cpe: mustCPE("cpe:2.3:a:*:jenkins:3.2:*:*:*:*:*:*:*"), + cpe: pkg.MustCPE("cpe:2.3:a:*:jenkins:3.2:*:*:*:*:*:*:*"), pkg: pkg.Package{ Name: "jenkins-thing", Type: pkg.JavaPkg, @@ -91,7 +91,7 @@ func Test_disallowJenkinsCPEsNotAssociatedWithJenkins(t *testing.T) { }, { name: "ignore product names that are not exactly 'jenkins'", - cpe: mustCPE("cpe:2.3:a:*:jenkins-something-else:3.2:*:*:*:*:*:*:*"), + cpe: pkg.MustCPE("cpe:2.3:a:*:jenkins-something-else:3.2:*:*:*:*:*:*:*"), pkg: pkg.Package{ Name: "not-j*nkins", Type: pkg.JavaPkg, @@ -115,7 +115,7 @@ func Test_disallowJiraClientServerMismatch(t *testing.T) { }{ { name: "filter out mismatched name (atlassian vendor)", - cpe: mustCPE("cpe:2.3:a:atlassian:jira:3.2:*:*:*:*:*:*:*"), + cpe: pkg.MustCPE("cpe:2.3:a:atlassian:jira:3.2:*:*:*:*:*:*:*"), pkg: pkg.Package{ Name: "something-client", Type: pkg.JavaPkg, @@ -124,7 +124,7 @@ func Test_disallowJiraClientServerMismatch(t *testing.T) { }, { name: "filter out mismatched name (jira vendor)", - cpe: mustCPE("cpe:2.3:a:jira:jira:3.2:*:*:*:*:*:*:*"), + cpe: pkg.MustCPE("cpe:2.3:a:jira:jira:3.2:*:*:*:*:*:*:*"), pkg: pkg.Package{ Name: "something-client", Type: pkg.JavaPkg, @@ -133,7 +133,7 @@ func Test_disallowJiraClientServerMismatch(t *testing.T) { }, { name: "filter out mismatched name (any vendor)", - cpe: mustCPE("cpe:2.3:a:*:jira:3.2:*:*:*:*:*:*:*"), + cpe: pkg.MustCPE("cpe:2.3:a:*:jira:3.2:*:*:*:*:*:*:*"), pkg: pkg.Package{ Name: "something-client", Type: pkg.JavaPkg, @@ -142,7 +142,7 @@ func Test_disallowJiraClientServerMismatch(t *testing.T) { }, { name: "ignore package names that do not have 'client'", - cpe: mustCPE("cpe:2.3:a:*:jira:3.2:*:*:*:*:*:*:*"), + cpe: pkg.MustCPE("cpe:2.3:a:*:jira:3.2:*:*:*:*:*:*:*"), pkg: pkg.Package{ Name: "jira-thing", Type: pkg.JavaPkg, @@ -151,7 +151,7 @@ func Test_disallowJiraClientServerMismatch(t *testing.T) { }, { name: "ignore product names that are not exactly 'jira'", - cpe: mustCPE("cpe:2.3:a:*:jira-something-else:3.2:*:*:*:*:*:*:*"), + cpe: pkg.MustCPE("cpe:2.3:a:*:jira-something-else:3.2:*:*:*:*:*:*:*"), pkg: pkg.Package{ Name: "not-j*ra", Type: pkg.JavaPkg, diff --git a/syft/pkg/cataloger/common/cpe/generate.go b/syft/pkg/cataloger/common/cpe/generate.go index f8360555d..4ed77ff2b 100644 --- a/syft/pkg/cataloger/common/cpe/generate.go +++ b/syft/pkg/cataloger/common/cpe/generate.go @@ -55,7 +55,7 @@ func Generate(p pkg.Package) []pkg.CPE { // filter out any known combinations that don't accurately represent this package cpes = filter(cpes, p, cpeFilters...) - sort.Sort(BySpecificity(cpes)) + sort.Sort(pkg.CPEBySpecificity(cpes)) return cpes } diff --git a/syft/pkg/cataloger/common/generic_cataloger.go b/syft/pkg/cataloger/common/generic_cataloger.go index 7d2e3d477..859b77ef3 100644 --- a/syft/pkg/cataloger/common/generic_cataloger.go +++ b/syft/pkg/cataloger/common/generic_cataloger.go @@ -58,9 +58,8 @@ func (c *GenericCataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, for _, p := range discoveredPackages { p.FoundBy = c.upstreamCataloger - p.Locations = append(p.Locations, location) + p.Locations.Add(location) p.SetID() - packages = append(packages, *p) } diff --git a/syft/pkg/cataloger/common/generic_cataloger_test.go b/syft/pkg/cataloger/common/generic_cataloger_test.go index 68dfe6fd1..2e3a7234a 100644 --- a/syft/pkg/cataloger/common/generic_cataloger_test.go +++ b/syft/pkg/cataloger/common/generic_cataloger_test.go @@ -53,7 +53,7 @@ func TestGenericCataloger(t *testing.T) { assert.Len(t, actualPkgs, len(expectedPkgs)) for _, p := range actualPkgs { - ref := p.Locations[0] + ref := p.Locations.ToSlice()[0] exP, ok := expectedPkgs[ref.RealPath] if !ok { t.Errorf("missing expected pkg: ref=%+v", ref) diff --git a/syft/pkg/cataloger/deb/cataloger.go b/syft/pkg/cataloger/deb/cataloger.go index f0c3b3ea8..4b9146fd7 100644 --- a/syft/pkg/cataloger/deb/cataloger.go +++ b/syft/pkg/cataloger/deb/cataloger.go @@ -60,7 +60,7 @@ func (c *Cataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, []arti for i := range pkgs { p := &pkgs[i] p.FoundBy = c.Name() - p.Locations = []source.Location{dbLocation} + p.Locations.Add(dbLocation) // the current entry only has what may have been listed in the status file, however, there are additional // files that are listed in multiple other locations. We should retrieve them all and merge the file lists @@ -88,7 +88,7 @@ func addLicenses(resolver source.FileResolver, dbLocation source.Location, p *pk p.Licenses = parseLicensesFromCopyright(copyrightReader) // keep a record of the file where this was discovered - p.Locations = append(p.Locations, *copyrightLocation) + p.Locations.Add(*copyrightLocation) } } @@ -117,7 +117,7 @@ loopNewFiles: p.Metadata = metadata // persist location information from each new source of information - p.Locations = append(p.Locations, infoLocations...) + p.Locations.Add(infoLocations...) } func getAdditionalFileListing(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) ([]pkg.DpkgFileRecord, []source.Location) { diff --git a/syft/pkg/cataloger/deb/cataloger_test.go b/syft/pkg/cataloger/deb/cataloger_test.go index 72a837f06..37e6eef50 100644 --- a/syft/pkg/cataloger/deb/cataloger_test.go +++ b/syft/pkg/cataloger/deb/cataloger_test.go @@ -1,6 +1,7 @@ package deb import ( + "github.com/stretchr/testify/assert" "testing" "github.com/anchore/syft/syft/file" @@ -115,15 +116,13 @@ func TestDpkgCataloger(t *testing.T) { for idx := range actual { a := &actual[idx] // we will test the sources separately - var sourcesList = make([]string, len(a.Locations)) - for i, s := range a.Locations { + var sourcesList = make([]string, len(a.Locations.ToSlice())) + for i, s := range a.Locations.ToSlice() { sourcesList[i] = s.RealPath } - a.Locations = nil + a.Locations = source.NewLocationSet() - for _, d := range deep.Equal(sourcesList, test.sources[a.Name]) { - t.Errorf("diff: %+v", d) - } + assert.ElementsMatch(t, sourcesList, test.sources[a.Name]) } // test remaining fields... diff --git a/syft/pkg/cataloger/golang/parse_go_bin.go b/syft/pkg/cataloger/golang/parse_go_bin.go index 1a4f0f10f..1fc980d62 100644 --- a/syft/pkg/cataloger/golang/parse_go_bin.go +++ b/syft/pkg/cataloger/golang/parse_go_bin.go @@ -44,14 +44,12 @@ func newGoBinaryPackage(dep *debug.Module, goVersion, architecture string, locat } p := pkg.Package{ - FoundBy: catalogerName, - Name: dep.Path, - Version: dep.Version, - Language: pkg.Go, - Type: pkg.GoModulePkg, - Locations: []source.Location{ - location, - }, + FoundBy: catalogerName, + Name: dep.Path, + Version: dep.Version, + Language: pkg.Go, + Type: pkg.GoModulePkg, + Locations: source.NewLocationSet(location), MetadataType: pkg.GolangBinMetadataType, Metadata: pkg.GolangBinMetadata{ GoCompiledVersion: goVersion, diff --git a/syft/pkg/cataloger/golang/parse_go_bin_test.go b/syft/pkg/cataloger/golang/parse_go_bin_test.go index 2cd1cfcbd..8ac560d8c 100644 --- a/syft/pkg/cataloger/golang/parse_go_bin_test.go +++ b/syft/pkg/cataloger/golang/parse_go_bin_test.go @@ -133,14 +133,14 @@ func TestBuildGoPkgInfo(t *testing.T) { FoundBy: catalogerName, Language: pkg.Go, Type: pkg.GoModulePkg, - Locations: []source.Location{ - { + Locations: source.NewLocationSet( + source.Location{ Coordinates: source.Coordinates{ RealPath: "/a-path", FileSystemID: "layer-id", }, }, - }, + ), MetadataType: pkg.GolangBinMetadataType, Metadata: pkg.GolangBinMetadata{ GoCompiledVersion: goCompiledVersion, @@ -190,14 +190,14 @@ func TestBuildGoPkgInfo(t *testing.T) { Version: "v0.2.1", Language: pkg.Go, Type: pkg.GoModulePkg, - Locations: []source.Location{ - { + Locations: source.NewLocationSet( + source.Location{ Coordinates: source.Coordinates{ RealPath: "/a-path", FileSystemID: "layer-id", }, }, - }, + ), MetadataType: pkg.GolangBinMetadataType, Metadata: pkg.GolangBinMetadata{ GoCompiledVersion: goCompiledVersion, @@ -252,14 +252,14 @@ func TestBuildGoPkgInfo(t *testing.T) { Version: "v0.2.1", Language: pkg.Go, Type: pkg.GoModulePkg, - Locations: []source.Location{ - { + Locations: source.NewLocationSet( + source.Location{ Coordinates: source.Coordinates{ RealPath: "/a-path", FileSystemID: "layer-id", }, }, - }, + ), MetadataType: pkg.GolangBinMetadataType, Metadata: pkg.GolangBinMetadata{ GoCompiledVersion: goCompiledVersion, @@ -273,14 +273,14 @@ func TestBuildGoPkgInfo(t *testing.T) { Version: "v0.0.0-20210222170800-9c70f9b80bcf", Language: pkg.Go, Type: pkg.GoModulePkg, - Locations: []source.Location{ - { + Locations: source.NewLocationSet( + source.Location{ Coordinates: source.Coordinates{ RealPath: "/a-path", FileSystemID: "layer-id", }, }, - }, + ), MetadataType: pkg.GolangBinMetadataType, Metadata: pkg.GolangBinMetadata{ GoCompiledVersion: goCompiledVersion, @@ -327,12 +327,14 @@ func TestBuildGoPkgInfo(t *testing.T) { Version: "v0.0.0-20211006194710-c8a6f5223071", Language: pkg.Go, Type: pkg.GoModulePkg, - Locations: []source.Location{ - { + Locations: source.NewLocationSet( + source.Location{ Coordinates: source.Coordinates{ RealPath: "/a-path", FileSystemID: "layer-id", - }}}, + }, + }, + ), MetadataType: pkg.GolangBinMetadataType, Metadata: pkg.GolangBinMetadata{ GoCompiledVersion: goCompiledVersion, @@ -344,14 +346,14 @@ func TestBuildGoPkgInfo(t *testing.T) { Version: "v0.0.0-20210916214954-140adaaadfaf", Language: pkg.Go, Type: pkg.GoModulePkg, - Locations: []source.Location{ - { + Locations: source.NewLocationSet( + source.Location{ Coordinates: source.Coordinates{ RealPath: "/a-path", FileSystemID: "layer-id", }, }, - }, + ), MetadataType: pkg.GolangBinMetadataType, Metadata: pkg.GolangBinMetadata{ GoCompiledVersion: goCompiledVersion, diff --git a/syft/pkg/cataloger/python/package_cataloger.go b/syft/pkg/cataloger/python/package_cataloger.go index ac0a214bc..a966e865a 100644 --- a/syft/pkg/cataloger/python/package_cataloger.go +++ b/syft/pkg/cataloger/python/package_cataloger.go @@ -80,7 +80,7 @@ func (c *PackageCataloger) catalogEggOrWheel(resolver source.FileResolver, metad Name: metadata.Name, Version: metadata.Version, FoundBy: c.Name(), - Locations: sources, + Locations: source.NewLocationSet(sources...), Licenses: licenses, Language: pkg.Python, Type: pkg.PythonPkg, diff --git a/syft/pkg/cataloger/python/package_cataloger_test.go b/syft/pkg/cataloger/python/package_cataloger_test.go index ccca539a9..7479f9ef3 100644 --- a/syft/pkg/cataloger/python/package_cataloger_test.go +++ b/syft/pkg/cataloger/python/package_cataloger_test.go @@ -144,7 +144,7 @@ func TestPythonPackageWheelCataloger(t *testing.T) { t.Fatal(err) } - test.expectedPackage.Locations = locations + test.expectedPackage.Locations = source.NewLocationSet(locations...) actual, _, err := NewPythonPackageCataloger().Catalog(resolver) if err != nil { diff --git a/syft/pkg/cataloger/python/parse_pipfile_lock_test.go b/syft/pkg/cataloger/python/parse_pipfile_lock_test.go index d4f3607eb..d9fd1a02d 100644 --- a/syft/pkg/cataloger/python/parse_pipfile_lock_test.go +++ b/syft/pkg/cataloger/python/parse_pipfile_lock_test.go @@ -1,6 +1,7 @@ package python import ( + "github.com/anchore/syft/syft/source" "os" "testing" @@ -48,7 +49,14 @@ func TestParsePipFileLock(t *testing.T) { t.Fatalf("failed to parse requirements: %+v", err) } - if diff := cmp.Diff(expected, actual, cmp.AllowUnexported(pkg.Package{})); diff != "" { + if diff := cmp.Diff(expected, actual, + cmp.AllowUnexported(pkg.Package{}), + cmp.Comparer( + func(x, y source.LocationSet) bool { + return cmp.Equal(x.ToSlice(), y.ToSlice()) + }, + ), + ); diff != "" { t.Errorf("unexpected result from parsing (-expected +actual)\n%s", diff) } } diff --git a/syft/pkg/cataloger/python/parse_requirements_test.go b/syft/pkg/cataloger/python/parse_requirements_test.go index d57d2e36a..3a1871059 100644 --- a/syft/pkg/cataloger/python/parse_requirements_test.go +++ b/syft/pkg/cataloger/python/parse_requirements_test.go @@ -1,6 +1,7 @@ package python import ( + "github.com/anchore/syft/syft/source" "os" "testing" @@ -42,7 +43,14 @@ func TestParseRequirementsTxt(t *testing.T) { t.Fatalf("failed to parse requirements: %+v", err) } - if diff := cmp.Diff(expected, actual, cmp.AllowUnexported(pkg.Package{})); diff != "" { + if diff := cmp.Diff(expected, actual, + cmp.AllowUnexported(pkg.Package{}), + cmp.Comparer( + func(x, y source.LocationSet) bool { + return cmp.Equal(x.ToSlice(), y.ToSlice()) + }, + ), + ); diff != "" { t.Errorf("unexpected result from parsing (-expected +actual)\n%s", diff) } } diff --git a/syft/pkg/cataloger/python/parse_setup_test.go b/syft/pkg/cataloger/python/parse_setup_test.go index 41fc19156..b5614fa3d 100644 --- a/syft/pkg/cataloger/python/parse_setup_test.go +++ b/syft/pkg/cataloger/python/parse_setup_test.go @@ -1,6 +1,7 @@ package python import ( + "github.com/anchore/syft/syft/source" "os" "testing" @@ -54,7 +55,14 @@ func TestParseSetup(t *testing.T) { t.Fatalf("failed to parse requirements: %+v", err) } - if diff := cmp.Diff(expected, actual, cmp.AllowUnexported(pkg.Package{})); diff != "" { + if diff := cmp.Diff(expected, actual, + cmp.AllowUnexported(pkg.Package{}), + cmp.Comparer( + func(x, y source.LocationSet) bool { + return cmp.Equal(x.ToSlice(), y.ToSlice()) + }, + ), + ); diff != "" { t.Errorf("unexpected result from parsing (-expected +actual)\n%s", diff) } } diff --git a/syft/pkg/cataloger/rpmdb/parse_rpmdb.go b/syft/pkg/cataloger/rpmdb/parse_rpmdb.go index 74f694c09..c1b794162 100644 --- a/syft/pkg/cataloger/rpmdb/parse_rpmdb.go +++ b/syft/pkg/cataloger/rpmdb/parse_rpmdb.go @@ -63,7 +63,7 @@ func parseRpmDB(resolver source.FilePathResolver, dbLocation source.Location, re p := pkg.Package{ Name: entry.Name, Version: toELVersion(metadata), - Locations: []source.Location{dbLocation}, + Locations: source.NewLocationSet(dbLocation), FoundBy: catalogerName, Type: pkg.RpmPkg, MetadataType: pkg.RpmdbMetadataType, diff --git a/syft/pkg/cataloger/rpmdb/parse_rpmdb_test.go b/syft/pkg/cataloger/rpmdb/parse_rpmdb_test.go index 0bad62396..d378e36e9 100644 --- a/syft/pkg/cataloger/rpmdb/parse_rpmdb_test.go +++ b/syft/pkg/cataloger/rpmdb/parse_rpmdb_test.go @@ -71,7 +71,7 @@ func TestParseRpmDB(t *testing.T) { "dive": { Name: "dive", Version: "0.9.2-1", - Locations: []source.Location{dbLocation}, + Locations: source.NewLocationSet(dbLocation), FoundBy: catalogerName, Type: pkg.RpmPkg, MetadataType: pkg.RpmdbMetadataType, @@ -98,7 +98,7 @@ func TestParseRpmDB(t *testing.T) { "dive": { Name: "dive", Version: "0.9.2-1", - Locations: []source.Location{dbLocation}, + Locations: source.NewLocationSet(dbLocation), FoundBy: catalogerName, Type: pkg.RpmPkg, MetadataType: pkg.RpmdbMetadataType, diff --git a/syft/pkg/cataloger/common/cpe/sort_by_specificity.go b/syft/pkg/cpe_by_specificity.go similarity index 84% rename from syft/pkg/cataloger/common/cpe/sort_by_specificity.go rename to syft/pkg/cpe_by_specificity.go index f68050982..73cce8d04 100644 --- a/syft/pkg/cataloger/common/cpe/sort_by_specificity.go +++ b/syft/pkg/cpe_by_specificity.go @@ -1,4 +1,4 @@ -package cpe +package pkg import ( "sort" @@ -6,15 +6,15 @@ import ( "github.com/facebookincubator/nvdtools/wfn" ) -var _ sort.Interface = (*BySpecificity)(nil) +var _ sort.Interface = (*CPEBySpecificity)(nil) -type BySpecificity []wfn.Attributes +type CPEBySpecificity []wfn.Attributes -func (c BySpecificity) Len() int { return len(c) } +func (c CPEBySpecificity) Len() int { return len(c) } -func (c BySpecificity) Swap(i, j int) { c[i], c[j] = c[j], c[i] } +func (c CPEBySpecificity) Swap(i, j int) { c[i], c[j] = c[j], c[i] } -func (c BySpecificity) Less(i, j int) bool { +func (c CPEBySpecificity) Less(i, j int) bool { iScore := weightedCountForSpecifiedFields(c[i]) jScore := weightedCountForSpecifiedFields(c[j]) diff --git a/syft/pkg/cataloger/common/cpe/sort_by_specificity_test.go b/syft/pkg/cpe_by_specificity_test.go similarity index 87% rename from syft/pkg/cataloger/common/cpe/sort_by_specificity_test.go rename to syft/pkg/cpe_by_specificity_test.go index f979bd8fb..54f8e9f13 100644 --- a/syft/pkg/cataloger/common/cpe/sort_by_specificity_test.go +++ b/syft/pkg/cpe_by_specificity_test.go @@ -1,40 +1,32 @@ -package cpe +package pkg import ( "sort" "testing" - "github.com/anchore/syft/syft/pkg" "github.com/stretchr/testify/assert" ) -func mustCPE(c string) pkg.CPE { - return must(pkg.NewCPE(c)) -} - -func must(c pkg.CPE, e error) pkg.CPE { - if e != nil { - panic(e) - } - return c +func mustCPE(c string) CPE { + return must(NewCPE(c)) } func TestCPESpecificity(t *testing.T) { tests := []struct { name string - input []pkg.CPE - expected []pkg.CPE + input []CPE + expected []CPE }{ { name: "sort strictly by wfn *", - input: []pkg.CPE{ + input: []CPE{ mustCPE("cpe:2.3:a:*:package:1:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:some:package:1:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:*:package:1:*:*:*:*:some:*:*"), mustCPE("cpe:2.3:a:some:package:1:*:*:*:*:some:*:*"), mustCPE("cpe:2.3:a:some:package:*:*:*:*:*:*:*:*"), }, - expected: []pkg.CPE{ + expected: []CPE{ mustCPE("cpe:2.3:a:some:package:1:*:*:*:*:some:*:*"), mustCPE("cpe:2.3:a:some:package:1:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:some:package:*:*:*:*:*:*:*:*"), @@ -44,7 +36,7 @@ func TestCPESpecificity(t *testing.T) { }, { name: "sort strictly by field length", - input: []pkg.CPE{ + input: []CPE{ mustCPE("cpe:2.3:a:1:22:1:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:55555:1:1:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:1:1:333:*:*:*:*:1:*:*"), @@ -52,7 +44,7 @@ func TestCPESpecificity(t *testing.T) { mustCPE("cpe:2.3:a:1:1:1:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:1:1:1:*:*:*:*:4444:*:*"), }, - expected: []pkg.CPE{ + expected: []CPE{ mustCPE("cpe:2.3:a:1:666666:1:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:55555:1:1:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:1:1:1:*:*:*:*:4444:*:*"), @@ -63,7 +55,7 @@ func TestCPESpecificity(t *testing.T) { }, { name: "sort by mix of field length and specificity", - input: []pkg.CPE{ + input: []CPE{ mustCPE("cpe:2.3:a:1:666666:*:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:*:1:1:*:*:*:*:4444:*:*"), mustCPE("cpe:2.3:a:1:*:333:*:*:*:*:*:*:*"), @@ -71,7 +63,7 @@ func TestCPESpecificity(t *testing.T) { mustCPE("cpe:2.3:a:1:22:1:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:55555:1:1:*:*:*:*:1:*:*"), }, - expected: []pkg.CPE{ + expected: []CPE{ mustCPE("cpe:2.3:a:55555:1:1:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:1:22:1:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:1:1:1:*:*:*:*:1:*:*"), @@ -82,7 +74,7 @@ func TestCPESpecificity(t *testing.T) { }, { name: "sort by mix of field length, specificity, dash", - input: []pkg.CPE{ + input: []CPE{ mustCPE("cpe:2.3:a:alpine:alpine_keys:2.3-r1:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:alpine_keys:alpine_keys:2.3-r1:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:alpine-keys:alpine_keys:2.3-r1:*:*:*:*:*:*:*"), @@ -90,7 +82,7 @@ func TestCPESpecificity(t *testing.T) { mustCPE("cpe:2.3:a:alpine-keys:alpine-keys:2.3-r1:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:alpine_keys:alpine-keys:2.3-r1:*:*:*:*:*:*:*"), }, - expected: []pkg.CPE{ + expected: []CPE{ mustCPE("cpe:2.3:a:alpine-keys:alpine-keys:2.3-r1:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:alpine-keys:alpine_keys:2.3-r1:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:alpine_keys:alpine-keys:2.3-r1:*:*:*:*:*:*:*"), @@ -103,7 +95,7 @@ func TestCPESpecificity(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - sort.Sort(BySpecificity(test.input)) + sort.Sort(CPEBySpecificity(test.input)) assert.Equal(t, test.expected, test.input) }) } diff --git a/syft/pkg/java_metadata.go b/syft/pkg/java_metadata.go index 8581c218e..baa169a35 100644 --- a/syft/pkg/java_metadata.go +++ b/syft/pkg/java_metadata.go @@ -20,7 +20,7 @@ var jenkinsPluginPomPropertiesGroupIDs = []string{ // JavaMetadata encapsulates all Java ecosystem metadata for a package as well as an (optional) parent relationship. type JavaMetadata struct { - VirtualPath string `json:"virtualPath"` + VirtualPath string `json:"virtualPath" cyclonedx:"virtualPath"` // we need to include the virtual path in cyclonedx documents to prevent deduplication of jars within jars Manifest *JavaManifest `mapstructure:"Manifest" json:"manifest,omitempty"` PomProperties *PomProperties `mapstructure:"PomProperties" json:"pomProperties,omitempty" cyclonedx:"-"` PomProject *PomProject `mapstructure:"PomProject" json:"pomProject,omitempty"` diff --git a/syft/pkg/merge_cpes.go b/syft/pkg/merge_cpes.go new file mode 100644 index 000000000..cb766a72f --- /dev/null +++ b/syft/pkg/merge_cpes.go @@ -0,0 +1,25 @@ +package pkg + +import ( + "sort" +) + +func mergeCPEs(a, b []CPE) (result []CPE) { + aCPEs := make(map[string]CPE) + + // keep all CPEs from a and create a quick string-based lookup + for _, aCPE := range a { + aCPEs[aCPE.BindToFmtString()] = aCPE + result = append(result, aCPE) + } + + // keep all unique CPEs from b + for _, bCPE := range b { + if _, exists := aCPEs[bCPE.BindToFmtString()]; !exists { + result = append(result, bCPE) + } + } + + sort.Sort(CPEBySpecificity(result)) + return result +} diff --git a/syft/pkg/package.go b/syft/pkg/package.go index 6215bf594..13b19ca37 100644 --- a/syft/pkg/package.go +++ b/syft/pkg/package.go @@ -14,18 +14,18 @@ import ( // Package represents an application or library that has been bundled into a distributable format. // TODO: if we ignore FoundBy for ID generation should we merge the field to show it was found in two places? type Package struct { - id artifact.ID `hash:"ignore"` - Name string // the package name - Version string // the version of the package - FoundBy string `cyclonedx:"foundBy"` // the specific cataloger that discovered this package - Locations []source.Location // the locations that lead to the discovery of this package (note: this is not necessarily the locations that make up this package) - Licenses []string // licenses discovered with the package metadata - Language Language `cyclonedx:"language"` // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc) - Type Type `cyclonedx:"type"` // the package type (e.g. Npm, Yarn, Python, Rpm, Deb, etc) - CPEs []CPE `hash:"ignore"` // all possible Common Platform Enumerators (note: this is NOT included in the definition of the ID since all fields on a CPE are derived from other fields) - PURL string `hash:"ignore"` // the Package URL (see https://github.com/package-url/purl-spec) (note: this is NOT included in the definition of the ID since all fields on a pURL are derived from other fields) - MetadataType MetadataType `cyclonedx:"metadataType"` // the shape of the additional data in the "metadata" field - Metadata interface{} // additional data found while parsing the package source + id artifact.ID `hash:"ignore"` + Name string // the package name + Version string // the version of the package + FoundBy string `cyclonedx:"foundBy"` // the specific cataloger that discovered this package + Locations source.LocationSet // the locations that lead to the discovery of this package (note: this is not necessarily the locations that make up this package) + Licenses []string // licenses discovered with the package metadata + Language Language `cyclonedx:"language"` // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc) + Type Type `cyclonedx:"type"` // the package type (e.g. Npm, Yarn, Python, Rpm, Deb, etc) + CPEs []CPE `hash:"ignore"` // all possible Common Platform Enumerators (note: this is NOT included in the definition of the ID since all fields on a CPE are derived from other fields) + PURL string `hash:"ignore"` // the Package URL (see https://github.com/package-url/purl-spec) + MetadataType MetadataType `cyclonedx:"metadataType"` // the shape of the additional data in the "metadata" field + Metadata interface{} // additional data found while parsing the package source } func (p *Package) SetID() { @@ -46,3 +46,21 @@ func (p Package) ID() artifact.ID { func (p Package) String() string { return fmt.Sprintf("Pkg(name=%q version=%q type=%q id=%q)", p.Name, p.Version, p.Type, p.id) } + +func (p *Package) merge(other Package) error { + if p.id != other.id { + return fmt.Errorf("cannot merge packages with different IDs: %q vs %q", p.id, other.id) + } + if p.PURL != other.PURL { + log.Warnf("merging packages have with different pURLs: %q=%q vs %q=%q", p.id, p.PURL, other.id, other.PURL) + } + + p.Locations.Add(other.Locations.ToSlice()...) + + p.CPEs = mergeCPEs(p.CPEs, other.CPEs) + + if p.PURL == "" { + p.PURL = other.PURL + } + return nil +} diff --git a/syft/pkg/package_test.go b/syft/pkg/package_test.go index 228726a37..bdbcee8f1 100644 --- a/syft/pkg/package_test.go +++ b/syft/pkg/package_test.go @@ -3,24 +3,28 @@ package pkg import ( "testing" + "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/syft/source" + "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) -func TestFingerprint(t *testing.T) { +func TestIDUniqueness(t *testing.T) { + originalLocation := source.Location{ + Coordinates: source.Coordinates{ + RealPath: "39.0742° N, 21.8243° E", + FileSystemID: "Earth", + }, + VirtualPath: "/Ancient-Greece", + } originalPkg := Package{ Name: "pi", Version: "3.14", FoundBy: "Archimedes", - Locations: []source.Location{ - { - Coordinates: source.Coordinates{ - RealPath: "39.0742° N, 21.8243° E", - FileSystemID: "Earth", - }, - VirtualPath: "/Ancient-Greece", - }, - }, + Locations: source.NewLocationSet( + originalLocation, + ), Licenses: []string{ "cc0-1.0", "MIT", @@ -45,9 +49,9 @@ func TestFingerprint(t *testing.T) { // this is a set of differential tests, ensuring that select mutations are reflected in the fingerprint (or not) tests := []struct { - name string - transform func(pkg Package) Package - expectIdentical bool + name string + transform func(pkg Package) Package + expectedIDComparison assert.ComparisonAssertionFunc }{ { name: "go case (no transform)", @@ -55,7 +59,7 @@ func TestFingerprint(t *testing.T) { // do nothing! return pkg }, - expectIdentical: true, + expectedIDComparison: assert.Equal, }, { name: "same metadata is ignored", @@ -72,7 +76,7 @@ func TestFingerprint(t *testing.T) { } return pkg }, - expectIdentical: true, + expectedIDComparison: assert.Equal, }, { name: "licenses order is ignored", @@ -84,7 +88,7 @@ func TestFingerprint(t *testing.T) { } return pkg }, - expectIdentical: true, + expectedIDComparison: assert.Equal, }, { name: "name is reflected", @@ -92,7 +96,42 @@ func TestFingerprint(t *testing.T) { pkg.Name = "new!" return pkg }, - expectIdentical: false, + expectedIDComparison: assert.NotEqual, + }, + { + name: "location is reflected", + transform: func(pkg Package) Package { + locations := source.NewLocationSet(pkg.Locations.ToSlice()...) + locations.Add(source.NewLocation("/somewhere/new")) + pkg.Locations = locations + return pkg + }, + expectedIDComparison: assert.NotEqual, + }, + { + name: "same path for different filesystem is NOT reflected", + transform: func(pkg Package) Package { + newLocation := originalLocation + newLocation.FileSystemID = "Mars" + + pkg.Locations = source.NewLocationSet(newLocation) + return pkg + }, + expectedIDComparison: assert.Equal, + }, + { + name: "multiple equivalent paths for different filesystem is NOT reflected", + transform: func(pkg Package) Package { + newLocation := originalLocation + newLocation.FileSystemID = "Mars" + + locations := source.NewLocationSet(pkg.Locations.ToSlice()...) + locations.Add(newLocation, originalLocation) + + pkg.Locations = locations + return pkg + }, + expectedIDComparison: assert.Equal, }, { name: "version is reflected", @@ -100,7 +139,7 @@ func TestFingerprint(t *testing.T) { pkg.Version = "new!" return pkg }, - expectIdentical: false, + expectedIDComparison: assert.NotEqual, }, { name: "licenses is reflected", @@ -108,7 +147,7 @@ func TestFingerprint(t *testing.T) { pkg.Licenses = []string{"new!"} return pkg }, - expectIdentical: false, + expectedIDComparison: assert.NotEqual, }, { name: "type is reflected", @@ -116,7 +155,7 @@ func TestFingerprint(t *testing.T) { pkg.Type = RustPkg return pkg }, - expectIdentical: false, + expectedIDComparison: assert.NotEqual, }, { name: "metadata type is reflected", @@ -124,7 +163,7 @@ func TestFingerprint(t *testing.T) { pkg.MetadataType = RustCargoPackageMetadataType return pkg }, - expectIdentical: false, + expectedIDComparison: assert.NotEqual, }, { name: "CPEs is ignored", @@ -132,7 +171,7 @@ func TestFingerprint(t *testing.T) { pkg.CPEs = []CPE{} return pkg }, - expectIdentical: true, + expectedIDComparison: assert.Equal, }, { name: "pURL is ignored", @@ -140,7 +179,7 @@ func TestFingerprint(t *testing.T) { pkg.PURL = "new!" return pkg }, - expectIdentical: true, + expectedIDComparison: assert.Equal, }, { name: "language is reflected", @@ -148,7 +187,7 @@ func TestFingerprint(t *testing.T) { pkg.Language = Rust return pkg }, - expectIdentical: false, + expectedIDComparison: assert.NotEqual, }, { name: "foundBy is reflected", @@ -156,7 +195,7 @@ func TestFingerprint(t *testing.T) { pkg.FoundBy = "new!" return pkg }, - expectIdentical: false, + expectedIDComparison: assert.NotEqual, }, { name: "metadata mutation is reflected", @@ -166,7 +205,7 @@ func TestFingerprint(t *testing.T) { pkg.Metadata = metadata return pkg }, - expectIdentical: false, + expectedIDComparison: assert.NotEqual, }, { name: "new metadata is reflected", @@ -176,7 +215,7 @@ func TestFingerprint(t *testing.T) { } return pkg }, - expectIdentical: false, + expectedIDComparison: assert.NotEqual, }, { name: "nil metadata is reflected", @@ -184,7 +223,7 @@ func TestFingerprint(t *testing.T) { pkg.Metadata = nil return pkg }, - expectIdentical: false, + expectedIDComparison: assert.NotEqual, }, } @@ -199,12 +238,207 @@ func TestFingerprint(t *testing.T) { transformedFingerprint := transformedPkg.ID() assert.NotEmpty(t, transformedFingerprint) - if test.expectIdentical { - assert.Equal(t, originalFingerprint, transformedFingerprint) - } else { - assert.NotEqual(t, originalFingerprint, transformedFingerprint) - } - + test.expectedIDComparison(t, originalFingerprint, transformedFingerprint) + }) + } +} + +func TestPackage_Merge(t *testing.T) { + originalLocation := source.Location{ + Coordinates: source.Coordinates{ + RealPath: "39.0742° N, 21.8243° E", + FileSystemID: "Earth", + }, + VirtualPath: "/Ancient-Greece", + } + + similarLocation := originalLocation + similarLocation.FileSystemID = "Mars" + + tests := []struct { + name string + subject Package + other Package + expected *Package + }{ + { + name: "merge two packages (different cpes + locations)", + subject: Package{ + Name: "pi", + Version: "3.14", + FoundBy: "Archimedes", + Locations: source.NewLocationSet( + originalLocation, + ), + Licenses: []string{ + "cc0-1.0", + "MIT", + }, + Language: "math", + Type: PythonPkg, + CPEs: []CPE{ + must(NewCPE(`cpe:2.3:a:Archimedes:pi:3.14:*:*:*:*:math:*:*`)), + }, + PURL: "pkg:pypi/pi@3.14", + MetadataType: PythonPackageMetadataType, + Metadata: PythonPackageMetadata{ + Name: "pi", + Version: "3.14", + License: "cc0-1.0", + Author: "Archimedes", + AuthorEmail: "Archimedes@circles.io", + Platform: "universe", + SitePackagesRootPath: "Pi", + }, + }, + other: Package{ + Name: "pi", + Version: "3.14", + FoundBy: "Archimedes", + Locations: source.NewLocationSet( + similarLocation, // NOTE: difference; we have a different layer but the same path + ), + Licenses: []string{ + "cc0-1.0", + "MIT", + }, + Language: "math", + Type: PythonPkg, + CPEs: []CPE{ + must(NewCPE(`cpe:2.3:a:DIFFERENT:pi:3.14:*:*:*:*:math:*:*`)), // NOTE: difference + }, + PURL: "pkg:pypi/pi@3.14", + MetadataType: PythonPackageMetadataType, + Metadata: PythonPackageMetadata{ + Name: "pi", + Version: "3.14", + License: "cc0-1.0", + Author: "Archimedes", + AuthorEmail: "Archimedes@circles.io", + Platform: "universe", + SitePackagesRootPath: "Pi", + }, + }, + expected: &Package{ + Name: "pi", + Version: "3.14", + FoundBy: "Archimedes", + Locations: source.NewLocationSet( + originalLocation, + similarLocation, // NOTE: merge! + ), + Licenses: []string{ + "cc0-1.0", + "MIT", + }, + Language: "math", + Type: PythonPkg, + CPEs: []CPE{ + must(NewCPE(`cpe:2.3:a:Archimedes:pi:3.14:*:*:*:*:math:*:*`)), + must(NewCPE(`cpe:2.3:a:DIFFERENT:pi:3.14:*:*:*:*:math:*:*`)), // NOTE: merge! + }, + PURL: "pkg:pypi/pi@3.14", + MetadataType: PythonPackageMetadataType, + Metadata: PythonPackageMetadata{ + Name: "pi", + Version: "3.14", + License: "cc0-1.0", + Author: "Archimedes", + AuthorEmail: "Archimedes@circles.io", + Platform: "universe", + SitePackagesRootPath: "Pi", + }, + }, + }, + { + name: "error when there are different IDs", + subject: Package{ + Name: "pi", + Version: "3.14", + FoundBy: "Archimedes", + Locations: source.NewLocationSet( + originalLocation, + ), + Licenses: []string{ + "cc0-1.0", + "MIT", + }, + Language: "math", + Type: PythonPkg, + CPEs: []CPE{ + must(NewCPE(`cpe:2.3:a:Archimedes:pi:3.14:*:*:*:*:math:*:*`)), + }, + PURL: "pkg:pypi/pi@3.14", + MetadataType: PythonPackageMetadataType, + Metadata: PythonPackageMetadata{ + Name: "pi", + Version: "3.14", + License: "cc0-1.0", + Author: "Archimedes", + AuthorEmail: "Archimedes@circles.io", + Platform: "universe", + SitePackagesRootPath: "Pi", + }, + }, + other: Package{ + Name: "pi-DIFFERENT", // difference + Version: "3.14", + FoundBy: "Archimedes", + Locations: source.NewLocationSet( + originalLocation, + ), + Licenses: []string{ + "cc0-1.0", + "MIT", + }, + Language: "math", + Type: PythonPkg, + CPEs: []CPE{ + must(NewCPE(`cpe:2.3:a:Archimedes:pi:3.14:*:*:*:*:math:*:*`)), + }, + PURL: "pkg:pypi/pi@3.14", + MetadataType: PythonPackageMetadataType, + Metadata: PythonPackageMetadata{ + Name: "pi", + Version: "3.14", + License: "cc0-1.0", + Author: "Archimedes", + AuthorEmail: "Archimedes@circles.io", + Platform: "universe", + SitePackagesRootPath: "Pi", + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.subject.SetID() + tt.other.SetID() + + err := tt.subject.merge(tt.other) + if tt.expected == nil { + require.Error(t, err) + return + } + require.NoError(t, err) + + tt.expected.SetID() + require.Equal(t, tt.expected.id, tt.subject.id) + + if diff := cmp.Diff(*tt.expected, tt.subject, + cmp.AllowUnexported(Package{}), + cmp.Comparer( + func(x, y source.LocationSet) bool { + return cmp.Equal( + x.ToSlice(), y.ToSlice(), + cmp.AllowUnexported(source.Location{}), + cmp.AllowUnexported(file.Reference{}), + ) + }, + ), + ); diff != "" { + t.Errorf("unexpected result from parsing (-expected +actual)\n%s", diff) + } }) } } diff --git a/syft/pkg/relationships_by_file_ownership_test.go b/syft/pkg/relationships_by_file_ownership_test.go index 69867f270..42844b98e 100644 --- a/syft/pkg/relationships_by_file_ownership_test.go +++ b/syft/pkg/relationships_by_file_ownership_test.go @@ -18,10 +18,10 @@ func TestOwnershipByFilesRelationship(t *testing.T) { name: "owns-by-real-path", setup: func(t testing.TB) ([]Package, []artifact.Relationship) { parent := Package{ - Locations: []source.Location{ + Locations: source.NewLocationSet( source.NewVirtualLocation("/a/path", "/another/path"), source.NewVirtualLocation("/b/path", "/bee/path"), - }, + ), Type: RpmPkg, MetadataType: RpmdbMetadataType, Metadata: RpmdbMetadata{ @@ -35,10 +35,10 @@ func TestOwnershipByFilesRelationship(t *testing.T) { parent.SetID() child := Package{ - Locations: []source.Location{ + Locations: source.NewLocationSet( source.NewVirtualLocation("/c/path", "/another/path"), source.NewVirtualLocation("/d/path", "/another/path"), - }, + ), Type: NpmPkg, } child.SetID() @@ -61,10 +61,10 @@ func TestOwnershipByFilesRelationship(t *testing.T) { name: "owns-by-virtual-path", setup: func(t testing.TB) ([]Package, []artifact.Relationship) { parent := Package{ - Locations: []source.Location{ + Locations: source.NewLocationSet( source.NewVirtualLocation("/a/path", "/some/other/path"), source.NewVirtualLocation("/b/path", "/bee/path"), - }, + ), Type: RpmPkg, MetadataType: RpmdbMetadataType, Metadata: RpmdbMetadata{ @@ -78,10 +78,10 @@ func TestOwnershipByFilesRelationship(t *testing.T) { parent.SetID() child := Package{ - Locations: []source.Location{ + Locations: source.NewLocationSet( source.NewVirtualLocation("/c/path", "/another/path"), source.NewLocation("/d/path"), - }, + ), Type: NpmPkg, } child.SetID() @@ -103,10 +103,10 @@ func TestOwnershipByFilesRelationship(t *testing.T) { name: "ignore-empty-path", setup: func(t testing.TB) ([]Package, []artifact.Relationship) { parent := Package{ - Locations: []source.Location{ + Locations: source.NewLocationSet( source.NewVirtualLocation("/a/path", "/some/other/path"), source.NewVirtualLocation("/b/path", "/bee/path"), - }, + ), Type: RpmPkg, MetadataType: RpmdbMetadataType, Metadata: RpmdbMetadata{ @@ -121,10 +121,10 @@ func TestOwnershipByFilesRelationship(t *testing.T) { parent.SetID() child := Package{ - Locations: []source.Location{ + Locations: source.NewLocationSet( source.NewVirtualLocation("/c/path", "/another/path"), source.NewLocation("/d/path"), - }, + ), Type: NpmPkg, } diff --git a/syft/source/coordinate_set.go b/syft/source/coordinate_set.go new file mode 100644 index 000000000..0ae080c27 --- /dev/null +++ b/syft/source/coordinate_set.go @@ -0,0 +1,86 @@ +package source + +import ( + "sort" + + "github.com/mitchellh/hashstructure/v2" + "github.com/scylladb/go-set/strset" +) + +type CoordinateSet struct { + set map[Coordinates]struct{} +} + +func NewCoordinateSet(coordinates ...Coordinates) (s CoordinateSet) { + for _, l := range coordinates { + s.Add(l) + } + + return s +} + +func (s *CoordinateSet) Add(coordinates ...Coordinates) { + if s.set == nil { + s.set = make(map[Coordinates]struct{}) + } + for _, l := range coordinates { + s.set[l] = struct{}{} + } +} + +func (s CoordinateSet) Remove(coordinates ...Coordinates) { + if s.set == nil { + return + } + for _, l := range coordinates { + delete(s.set, l) + } +} + +func (s CoordinateSet) Contains(l Coordinates) bool { + if s.set == nil { + return false + } + _, ok := s.set[l] + return ok +} + +func (s CoordinateSet) Paths() []string { + if s.set == nil { + return nil + } + + paths := strset.New() + for _, c := range s.ToSlice() { + paths.Add(c.RealPath) + } + pathSlice := paths.List() + sort.Strings(pathSlice) + return pathSlice +} + +func (s CoordinateSet) ToSlice() []Coordinates { + if s.set == nil { + return nil + } + coordinates := make([]Coordinates, len(s.set)) + idx := 0 + for v := range s.set { + coordinates[idx] = v + idx++ + } + sort.SliceStable(coordinates, func(i, j int) bool { + if coordinates[i].RealPath == coordinates[j].RealPath { + return coordinates[i].FileSystemID < coordinates[j].FileSystemID + } + return coordinates[i].RealPath < coordinates[j].RealPath + }) + return coordinates +} + +func (s CoordinateSet) Hash() (uint64, error) { + return hashstructure.Hash(s.ToSlice(), hashstructure.FormatV2, &hashstructure.HashOptions{ + ZeroNil: true, + SlicesAsSets: true, + }) +} diff --git a/syft/source/coordinate_set_test.go b/syft/source/coordinate_set_test.go new file mode 100644 index 000000000..5601cdc91 --- /dev/null +++ b/syft/source/coordinate_set_test.go @@ -0,0 +1,117 @@ +package source + +import ( + "github.com/anchore/syft/syft/artifact" + "github.com/stretchr/testify/require" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestCoordinatesSet(t *testing.T) { + + binA := Coordinates{ + RealPath: "/bin", + FileSystemID: "a", + } + + binB := Coordinates{ + RealPath: "/bin", + FileSystemID: "b", + } + + tests := []struct { + name string + input []Coordinates + expected []Coordinates + }{ + { + name: "de-dup same location", + input: []Coordinates{ + binA, binA, binA, + }, + expected: []Coordinates{ + binA, + }, + }, + { + name: "dont de-dup different filesystem", + input: []Coordinates{ + binB, binA, + }, + expected: []Coordinates{ + binA, binB, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + set := NewCoordinateSet(test.input...) + assert.Equal(t, test.expected, set.ToSlice()) + }) + } +} + +func TestCoordinateSet_Hash(t *testing.T) { + etcA := Coordinates{ + RealPath: "/etc", + FileSystemID: "a", + } + + etcB := Coordinates{ + RealPath: "/etc", + FileSystemID: "b", + } + + binA := Coordinates{ + RealPath: "/bin", + FileSystemID: "a", + } + + binB := Coordinates{ + RealPath: "/bin", + FileSystemID: "b", + } + + tests := []struct { + name string + setA CoordinateSet + setB CoordinateSet + want assert.ComparisonAssertionFunc + }{ + { + name: "empty sets have the same hash", + setA: NewCoordinateSet(), + setB: NewCoordinateSet(), + want: assert.Equal, + }, + { + name: "sets with same elements have the same hash", + setA: NewCoordinateSet(binA, etcA), + setB: NewCoordinateSet(etcA, binA), + want: assert.Equal, + }, + { + name: "sets with different elements have different hashes", + setA: NewCoordinateSet(binA, etcA), + setB: NewCoordinateSet(binA), + want: assert.NotEqual, + }, + { + name: "sets with same paths but different FS IDs have different hashes", + setA: NewCoordinateSet(etcA, binA), + setB: NewCoordinateSet(etcB, binB), + want: assert.NotEqual, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotA, err := artifact.IDByHash(tt.setA) + require.NoError(t, err) + gotB, err := artifact.IDByHash(tt.setB) + require.NoError(t, err) + tt.want(t, gotA, gotB) + }) + } +} diff --git a/syft/source/coordinates.go b/syft/source/coordinates.go index 653670792..c35d3dcc2 100644 --- a/syft/source/coordinates.go +++ b/syft/source/coordinates.go @@ -2,7 +2,6 @@ package source import ( "fmt" - "sort" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/artifact" @@ -14,18 +13,6 @@ type Coordinates struct { FileSystemID string `json:"layerID,omitempty" cyclonedx:"layerID"` // An ID representing the filesystem. For container images, this is a layer digest. For directories or a root filesystem, this is blank. } -// CoordinateSet represents a set of string types. -type CoordinateSet map[Coordinates]struct{} - -// NewCoordinateSet creates a CoordinateSet populated with values from the given slice. -func NewCoordinateSet(start ...Coordinates) CoordinateSet { - ret := make(CoordinateSet) - for _, s := range start { - ret.Add(s) - } - return ret -} - func (c Coordinates) ID() artifact.ID { f, err := artifact.IDByHash(c) if err != nil { @@ -45,37 +32,3 @@ func (c Coordinates) String() string { } return fmt.Sprintf("Location<%s>", str) } - -// Add a string to the set. -func (s CoordinateSet) Add(i Coordinates) { - s[i] = struct{}{} -} - -// Remove a string from the set. -func (s CoordinateSet) Remove(i Coordinates) { - delete(s, i) -} - -// Contains indicates if the given string is contained within the set. -func (s CoordinateSet) Contains(i Coordinates) bool { - _, ok := s[i] - return ok -} - -// ToSlice returns a sorted slice of Locations that are contained within the set. -func (s CoordinateSet) ToSlice() []Coordinates { - ret := make([]Coordinates, len(s)) - idx := 0 - for v := range s { - ret[idx] = v - idx++ - } - - sort.SliceStable(ret, func(i, j int) bool { - if ret[i].RealPath == ret[j].RealPath { - return ret[i].FileSystemID < ret[j].FileSystemID - } - return ret[i].RealPath < ret[j].RealPath - }) - return ret -} diff --git a/syft/source/coordinates_test.go b/syft/source/coordinates_test.go deleted file mode 100644 index e9f8a4a30..000000000 --- a/syft/source/coordinates_test.go +++ /dev/null @@ -1,51 +0,0 @@ -package source - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestCoordinateSet(t *testing.T) { - - binA := Coordinates{ - RealPath: "/bin", - FileSystemID: "a", - } - - binB := Coordinates{ - RealPath: "/bin", - FileSystemID: "b", - } - - tests := []struct { - name string - input []Coordinates - expected []Coordinates - }{ - { - name: "de-dup same location", - input: []Coordinates{ - binA, binA, binA, - }, - expected: []Coordinates{ - binA, - }, - }, - { - name: "dont de-dup different filesystem", - input: []Coordinates{ - binB, binA, - }, - expected: []Coordinates{ - binA, binB, - }, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - assert.Equal(t, test.expected, NewCoordinateSet(test.input...).ToSlice()) - }) - } -} diff --git a/syft/source/location.go b/syft/source/location.go index 3f14ed413..04acee9f5 100644 --- a/syft/source/location.go +++ b/syft/source/location.go @@ -109,3 +109,9 @@ func (l Location) String() string { } return fmt.Sprintf("Location<%s>", str) } + +func (l Location) Equals(other Location) bool { + return l.RealPath == other.RealPath && + l.VirtualPath == other.VirtualPath && + l.FileSystemID == other.FileSystemID +} diff --git a/syft/source/location_set.go b/syft/source/location_set.go new file mode 100644 index 000000000..c2e915921 --- /dev/null +++ b/syft/source/location_set.go @@ -0,0 +1,78 @@ +package source + +import ( + "sort" + + "github.com/mitchellh/hashstructure/v2" +) + +type LocationSet struct { + set map[Location]struct{} +} + +func NewLocationSet(locations ...Location) (s LocationSet) { + for _, l := range locations { + s.Add(l) + } + + return s +} + +func (s *LocationSet) Add(locations ...Location) { + if s.set == nil { + s.set = make(map[Location]struct{}) + } + for _, l := range locations { + s.set[l] = struct{}{} + } +} + +func (s LocationSet) Remove(locations ...Location) { + if s.set == nil { + return + } + for _, l := range locations { + delete(s.set, l) + } +} + +func (s LocationSet) Contains(l Location) bool { + if s.set == nil { + return false + } + _, ok := s.set[l] + return ok +} + +func (s LocationSet) ToSlice() []Location { + if s.set == nil { + return nil + } + locations := make([]Location, len(s.set)) + idx := 0 + for v := range s.set { + locations[idx] = v + idx++ + } + sort.Sort(Locations(locations)) + return locations +} + +func (s *LocationSet) CoordinateSet() CoordinateSet { + if s.set == nil { + return NewCoordinateSet() + } + set := NewCoordinateSet() + for l := range s.set { + set.Add(l.Coordinates) + } + return set +} + +func (s LocationSet) Hash() (uint64, error) { + // access paths and filesystem IDs are not considered when hashing a location set, only the real paths + return hashstructure.Hash(s.CoordinateSet().Paths(), hashstructure.FormatV2, &hashstructure.HashOptions{ + ZeroNil: true, + SlicesAsSets: true, + }) +} diff --git a/syft/source/location_set_test.go b/syft/source/location_set_test.go new file mode 100644 index 000000000..dc80a4962 --- /dev/null +++ b/syft/source/location_set_test.go @@ -0,0 +1,178 @@ +package source + +import ( + "github.com/anchore/syft/syft/artifact" + "github.com/stretchr/testify/require" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestLocationSet(t *testing.T) { + + etcHostsLinkVar := Location{ + Coordinates: Coordinates{ + RealPath: "/etc/hosts", + FileSystemID: "a", + }, + VirtualPath: "/var/etc/hosts", + } + + etcHostsLinkHome := Location{ + Coordinates: Coordinates{ + RealPath: "/etc/hosts", + FileSystemID: "a", + }, + VirtualPath: "/home/wagoodman/hosts", + } + + binA := Location{ + Coordinates: Coordinates{ + RealPath: "/bin", + FileSystemID: "a", + }, + VirtualPath: "/usr/bin", + } + + binB := Location{ + Coordinates: Coordinates{ + RealPath: "/bin", + FileSystemID: "b", + }, + VirtualPath: "/usr/bin", + } + + tests := []struct { + name string + input []Location + expected []Location + }{ + { + name: "de-dup same location", + input: []Location{ + binA, binA, binA, + }, + expected: []Location{ + binA, + }, + }, + { + name: "dont de-dup different filesystem", + input: []Location{ + binB, binA, + }, + expected: []Location{ + binA, binB, + }, + }, + { + name: "dont de-dup different virtual paths", + input: []Location{ + etcHostsLinkVar, etcHostsLinkHome, + }, + expected: []Location{ + etcHostsLinkHome, etcHostsLinkVar, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + set := NewLocationSet(test.input...) + assert.Equal(t, test.expected, set.ToSlice()) + }) + } +} + +func TestLocationSet_Hash(t *testing.T) { + etcAlink := Location{ + Coordinates: Coordinates{ + RealPath: "/etc/hosts", + FileSystemID: "a", + }, + VirtualPath: "/var/etc/hosts", + } + + etcA := Location{ + Coordinates: Coordinates{ + RealPath: "/etc/hosts", + FileSystemID: "a", + }, + } + + etcB := Location{ + Coordinates: Coordinates{ + RealPath: "/etc/hosts", + FileSystemID: "b", + }, + } + + binA := Location{ + Coordinates: Coordinates{ + RealPath: "/bin", + FileSystemID: "a", + }, + VirtualPath: "/usr/bin", + } + + binB := Location{ + Coordinates: Coordinates{ + RealPath: "/bin", + FileSystemID: "b", + }, + VirtualPath: "/usr/bin", + } + + tests := []struct { + name string + setA LocationSet + setB LocationSet + want assert.ComparisonAssertionFunc + }{ + { + name: "empty sets have the same hash", + setA: NewLocationSet(), + setB: NewLocationSet(), + want: assert.Equal, + }, + { + name: "sets with same elements accessed through different paths have the same hash", + setA: NewLocationSet(binA, etcA), + setB: NewLocationSet(etcAlink, binA), + want: assert.Equal, + }, + { + name: "sets with same elements have the same hash", + setA: NewLocationSet(binA, etcA), + setB: NewLocationSet(etcA, binA), + want: assert.Equal, + }, + { + name: "sets with different element counts have different hashes", + setA: NewLocationSet(binA, etcA), + setB: NewLocationSet(binA), + want: assert.NotEqual, + }, + { + name: "sets with same path but different FS IDs have the same hash", + setA: NewLocationSet(binA), + setB: NewLocationSet(binB), + want: assert.Equal, + }, + { + name: "sets with same paths but different FS IDs have the same hash", + setA: NewLocationSet(etcA, binA), + setB: NewLocationSet(binB, etcB), + want: assert.Equal, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotA, err := artifact.IDByHash(tt.setA) + require.NoError(t, err) + gotB, err := artifact.IDByHash(tt.setB) + require.NoError(t, err) + tt.want(t, gotA, gotB) + }) + } +} diff --git a/syft/source/locations.go b/syft/source/locations.go new file mode 100644 index 000000000..045d1ed9d --- /dev/null +++ b/syft/source/locations.go @@ -0,0 +1,21 @@ +package source + +type Locations []Location + +func (l Locations) Len() int { + return len(l) +} + +func (l Locations) Less(i, j int) bool { + if l[i].RealPath == l[j].RealPath { + if l[i].VirtualPath == l[j].VirtualPath { + return l[i].FileSystemID < l[j].FileSystemID + } + return l[i].VirtualPath < l[j].VirtualPath + } + return l[i].RealPath < l[j].RealPath +} + +func (l Locations) Swap(i, j int) { + l[i], l[j] = l[j], l[i] +} diff --git a/test/cli/packages_cmd_test.go b/test/cli/packages_cmd_test.go index f35788e0d..898727af0 100644 --- a/test/cli/packages_cmd_test.go +++ b/test/cli/packages_cmd_test.go @@ -8,6 +8,7 @@ import ( ) func TestPackagesCmdFlags(t *testing.T) { + hiddenPackagesImage := "docker-archive:" + getFixtureImage(t, "image-hidden-packages") coverageImage := "docker-archive:" + getFixtureImage(t, "image-pkg-coverage") //badBinariesImage := "docker-archive:" + getFixtureImage(t, "image-bad-binaries") tmp := t.TempDir() + "/" @@ -100,21 +101,34 @@ func TestPackagesCmdFlags(t *testing.T) { }, }, { - name: "all-layers-scope-flag", - args: []string{"packages", "-o", "json", "-s", "all-layers", coverageImage}, + name: "squashed-scope-flag-hidden-packages", + args: []string{"packages", "-o", "json", "-s", "squashed", hiddenPackagesImage}, assertions: []traitAssertion{ - assertPackageCount(22), + assertPackageCount(162), + assertNotInOutput("vsftpd"), // hidden package + assertSuccessfulReturnCode, + }, + }, + { + name: "all-layers-scope-flag", + args: []string{"packages", "-o", "json", "-s", "all-layers", hiddenPackagesImage}, + assertions: []traitAssertion{ + assertPackageCount(163), // packages are now deduplicated for this case + assertInOutput("all-layers"), + assertInOutput("vsftpd"), // hidden package assertSuccessfulReturnCode, }, }, { name: "all-layers-scope-flag-by-env", - args: []string{"packages", "-o", "json", coverageImage}, + args: []string{"packages", "-o", "json", hiddenPackagesImage}, env: map[string]string{ "SYFT_PACKAGE_CATALOGER_SCOPE": "all-layers", }, assertions: []traitAssertion{ - assertPackageCount(22), + assertPackageCount(163), // packages are now deduplicated for this case + assertInOutput("all-layers"), + assertInOutput("vsftpd"), // hidden package assertSuccessfulReturnCode, }, }, diff --git a/test/cli/test-fixtures/image-hidden-packages/Dockerfile b/test/cli/test-fixtures/image-hidden-packages/Dockerfile new file mode 100644 index 000000000..cf8ea3bfa --- /dev/null +++ b/test/cli/test-fixtures/image-hidden-packages/Dockerfile @@ -0,0 +1,4 @@ +FROM centos:7.9.2009 +# all-layers scope should pickup on vsftpd +RUN yum install -y vsftpd +RUN yum remove -y vsftpd \ No newline at end of file diff --git a/test/integration/catalog_packages_test.go b/test/integration/catalog_packages_test.go index ade886512..6c701a72e 100644 --- a/test/integration/catalog_packages_test.go +++ b/test/integration/catalog_packages_test.go @@ -54,7 +54,7 @@ func BenchmarkImagePackageCatalogers(b *testing.B) { } func TestPkgCoverageImage(t *testing.T) { - sbom, _ := catalogFixtureImage(t, "image-pkg-coverage") + sbom, _ := catalogFixtureImage(t, "image-pkg-coverage", source.SquashedScope) observedLanguages := internal.NewStringSet() definedLanguages := internal.NewStringSet() diff --git a/test/integration/distro_test.go b/test/integration/distro_test.go index 8d1dbfacc..5c9f044f0 100644 --- a/test/integration/distro_test.go +++ b/test/integration/distro_test.go @@ -1,6 +1,7 @@ package integration import ( + "github.com/anchore/syft/syft/source" "testing" "github.com/stretchr/testify/assert" @@ -9,7 +10,7 @@ import ( ) func TestDistroImage(t *testing.T) { - sbom, _ := catalogFixtureImage(t, "image-distro-id") + sbom, _ := catalogFixtureImage(t, "image-distro-id", source.SquashedScope) expected := &linux.Release{ PrettyName: "BusyBox v1.31.1", diff --git a/test/integration/encode_decode_cycle_test.go b/test/integration/encode_decode_cycle_test.go index 920dc2d7d..348a309d2 100644 --- a/test/integration/encode_decode_cycle_test.go +++ b/test/integration/encode_decode_cycle_test.go @@ -2,12 +2,14 @@ package integration import ( "bytes" - "regexp" - "testing" - + "fmt" "github.com/anchore/syft/internal/formats/cyclonedxjson" "github.com/anchore/syft/internal/formats/cyclonedxxml" "github.com/anchore/syft/internal/formats/syftjson" + "github.com/anchore/syft/syft/source" + "regexp" + "testing" + "github.com/anchore/syft/syft/sbom" "github.com/stretchr/testify/require" @@ -51,12 +53,12 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { }, }, } - for _, test := range tests { - t.Run(string(test.formatOption), func(t *testing.T) { - // use second image for relationships - for _, image := range []string{"image-pkg-coverage", "image-owning-package"} { - originalSBOM, _ := catalogFixtureImage(t, image) + for _, test := range tests { + // use second image for relationships + for _, image := range []string{"image-pkg-coverage", "image-owning-package"} { + t.Run(fmt.Sprintf("%s/%s", test.formatOption, image), func(t *testing.T) { + originalSBOM, _ := catalogFixtureImage(t, image, source.SquashedScope) format := syft.FormatByID(test.formatOption) require.NotNil(t, format) @@ -87,7 +89,7 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { t.Errorf("diff: %s", dmp.DiffPrettyText(diffs)) } } - } - }) + }) + } } } diff --git a/test/integration/node_packages_test.go b/test/integration/node_packages_test.go index 8505e5c78..2ececa658 100644 --- a/test/integration/node_packages_test.go +++ b/test/integration/node_packages_test.go @@ -14,7 +14,7 @@ func TestNpmPackageLockDirectory(t *testing.T) { foundPackages := internal.NewStringSet() for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.NpmPkg) { - for _, actualLocation := range actualPkg.Locations { + for _, actualLocation := range actualPkg.Locations.ToSlice() { if strings.Contains(actualLocation.RealPath, "node_modules") { t.Errorf("found packages from package-lock.json in node_modules: %s", actualLocation) } @@ -35,7 +35,7 @@ func TestYarnPackageLockDirectory(t *testing.T) { foundPackages := internal.NewStringSet() for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.NpmPkg) { - for _, actualLocation := range actualPkg.Locations { + for _, actualLocation := range actualPkg.Locations.ToSlice() { if strings.Contains(actualLocation.RealPath, "node_modules") { t.Errorf("found packages from yarn.lock in node_modules: %s", actualLocation) } diff --git a/test/integration/package_deduplication_test.go b/test/integration/package_deduplication_test.go new file mode 100644 index 000000000..8798f9d90 --- /dev/null +++ b/test/integration/package_deduplication_test.go @@ -0,0 +1,84 @@ +package integration + +import ( + "fmt" + "github.com/anchore/syft/syft/source" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "testing" +) + +func TestPackageDeduplication(t *testing.T) { + tests := []struct { + scope source.Scope + packageCount int + instanceCount map[string]int + locationCount map[string]int + }{ + { + scope: source.AllLayersScope, + packageCount: 172, // without deduplication this would be 618 + instanceCount: map[string]int{ + "basesystem": 1, + "wget": 1, + "curl": 2, // upgraded in the image + "vsftpd": 1, + "httpd": 1, + }, + locationCount: map[string]int{ + "basesystem-10.0-7.el7.centos": 4, + "curl-7.29.0-59.el7": 1, // from base image + "curl-7.29.0-59.el7_9.1": 3, // upgrade + "wget-1.14-18.el7_6.1": 3, + "vsftpd-3.0.2-29.el7_9": 2, + "httpd-2.4.6-97.el7.centos.5": 1, + }, + }, + { + scope: source.SquashedScope, + packageCount: 170, + instanceCount: map[string]int{ + "basesystem": 1, + "wget": 1, + "curl": 1, // upgraded, but the most recent + "vsftpd": 1, + "httpd": 1, + }, + locationCount: map[string]int{ + "basesystem-10.0-7.el7.centos": 1, + "curl-7.29.0-59.el7_9.1": 1, // upgrade + "wget-1.14-18.el7_6.1": 1, + "vsftpd-3.0.2-29.el7_9": 1, + "httpd-2.4.6-97.el7.centos.5": 1, + }, + }, + } + + for _, tt := range tests { + t.Run(string(tt.scope), func(t *testing.T) { + sbom, _ := catalogFixtureImage(t, "image-vertical-package-dups", tt.scope) + + assert.Equal(t, tt.packageCount, sbom.Artifacts.PackageCatalog.PackageCount()) + for name, expectedInstanceCount := range tt.instanceCount { + pkgs := sbom.Artifacts.PackageCatalog.PackagesByName(name) + + // with multiple packages with the same name, something is wrong (or this is the wrong fixture) + require.Len(t, pkgs, expectedInstanceCount) + for _, p := range pkgs { + nameVersion := fmt.Sprintf("%s-%s", name, p.Version) + expectedLocationCount, ok := tt.locationCount[nameVersion] + if !ok { + t.Fatalf("missing name-version: %s", nameVersion) + } + + // we should see merged locations (assumption, there was 1 location for each package) + assert.Len(t, p.Locations.ToSlice(), expectedLocationCount) + + // all paths should match + assert.Len(t, p.Locations.CoordinateSet().Paths(), 1) + } + } + + }) + } +} diff --git a/test/integration/package_ownership_relationship_test.go b/test/integration/package_ownership_relationship_test.go index 587b15459..8335898b9 100644 --- a/test/integration/package_ownership_relationship_test.go +++ b/test/integration/package_ownership_relationship_test.go @@ -3,6 +3,7 @@ package integration import ( "bytes" "encoding/json" + "github.com/anchore/syft/syft/source" "testing" "github.com/anchore/syft/internal/formats/syftjson" @@ -22,7 +23,7 @@ func TestPackageOwnershipRelationships(t *testing.T) { for _, test := range tests { t.Run(test.fixture, func(t *testing.T) { - sbom, _ := catalogFixtureImage(t, test.fixture) + sbom, _ := catalogFixtureImage(t, test.fixture, source.SquashedScope) output := bytes.NewBufferString("") err := syftjson.Format().Encode(output, sbom) diff --git a/test/integration/regression_apk_scanner_buffer_size_test.go b/test/integration/regression_apk_scanner_buffer_size_test.go index 19bf9f92f..d0e4d953e 100644 --- a/test/integration/regression_apk_scanner_buffer_size_test.go +++ b/test/integration/regression_apk_scanner_buffer_size_test.go @@ -1,6 +1,7 @@ package integration import ( + "github.com/anchore/syft/syft/source" "testing" "github.com/anchore/syft/syft/pkg" @@ -9,7 +10,7 @@ import ( func TestRegression212ApkBufferSize(t *testing.T) { // This is a regression test for issue #212 (https://github.com/anchore/syft/issues/212) in which the apk db could // not be processed due to a scanner buffer that was too small - sbom, _ := catalogFixtureImage(t, "image-large-apk-data") + sbom, _ := catalogFixtureImage(t, "image-large-apk-data", source.SquashedScope) expectedPkgs := 58 actualPkgs := 0 diff --git a/test/integration/regression_go_bin_scanner_arch_test.go b/test/integration/regression_go_bin_scanner_arch_test.go index 5b364d8f9..295dd4f10 100644 --- a/test/integration/regression_go_bin_scanner_arch_test.go +++ b/test/integration/regression_go_bin_scanner_arch_test.go @@ -1,6 +1,7 @@ package integration import ( + "github.com/anchore/syft/syft/source" "strings" "testing" @@ -15,12 +16,12 @@ func TestRegressionGoArchDiscovery(t *testing.T) { ) // This is a regression test to make sure the way we detect go binary packages // stays consistent and reproducible as the tool chain evolves - sbom, _ := catalogFixtureImage(t, "image-go-bin-arch-coverage") + sbom, _ := catalogFixtureImage(t, "image-go-bin-arch-coverage", source.SquashedScope) var actualELF, actualWIN, actualMACOS int for p := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.GoModulePkg) { - for _, l := range p.Locations { + for _, l := range p.Locations.ToSlice() { switch { case strings.Contains(l.RealPath, "elf"): actualELF++ diff --git a/test/integration/regression_java_no_main_package_test.go b/test/integration/regression_java_no_main_package_test.go index a5f41ddaf..417002dc9 100644 --- a/test/integration/regression_java_no_main_package_test.go +++ b/test/integration/regression_java_no_main_package_test.go @@ -1,9 +1,10 @@ package integration import ( + "github.com/anchore/syft/syft/source" "testing" ) func TestRegressionJavaNoMainPackage(t *testing.T) { // Regression: https://github.com/anchore/syft/issues/252 - catalogFixtureImage(t, "image-java-no-main-package") + catalogFixtureImage(t, "image-java-no-main-package", source.SquashedScope) } diff --git a/test/integration/test-fixtures/image-vertical-package-dups/Dockerfile b/test/integration/test-fixtures/image-vertical-package-dups/Dockerfile new file mode 100644 index 000000000..ebe48155d --- /dev/null +++ b/test/integration/test-fixtures/image-vertical-package-dups/Dockerfile @@ -0,0 +1,6 @@ +FROM centos:7.9.2009 +# modifying the RPM DB multiple times will result in duplicate packages when using all-layers (if there was no de-dup logic) +# curl is tricky, it already exists in the image and is being upgraded +RUN yum install -y wget curl +RUN yum install -y vsftpd +RUN yum install -y httpd \ No newline at end of file diff --git a/test/integration/utils_test.go b/test/integration/utils_test.go index fd33eba82..f05bb100a 100644 --- a/test/integration/utils_test.go +++ b/test/integration/utils_test.go @@ -13,7 +13,7 @@ import ( "github.com/anchore/syft/syft/source" ) -func catalogFixtureImage(t *testing.T, fixtureImageName string) (sbom.SBOM, *source.Source) { +func catalogFixtureImage(t *testing.T, fixtureImageName string, scope source.Scope) (sbom.SBOM, *source.Source) { imagetest.GetFixtureImage(t, "docker-archive", fixtureImageName) tarPath := imagetest.GetFixtureImageTarPath(t, fixtureImageName) userInput := "docker-archive:" + tarPath @@ -25,7 +25,7 @@ func catalogFixtureImage(t *testing.T, fixtureImageName string) (sbom.SBOM, *sou // TODO: this would be better with functional options (after/during API refactor) c := cataloger.DefaultConfig() - c.Search.Scope = source.SquashedScope + c.Search.Scope = scope pkgCatalog, relationships, actualDistro, err := syft.CatalogPackages(theSource, c) if err != nil { t.Fatalf("failed to catalog image: %+v", err)