Deduplicate packages across multiple container image layers (#930)

This commit is contained in:
Alex Goodman 2022-03-31 15:45:51 -04:00 committed by GitHub
parent cb3e73e308
commit f24bbc1838
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
64 changed files with 1368 additions and 381 deletions

View File

@ -56,14 +56,14 @@ func sbomFixture() sbom.SBOM {
Name: "name", Name: "name",
Version: "version", Version: "version",
FoundBy: "foundBy", FoundBy: "foundBy",
Locations: []source.Location{ Locations: source.NewLocationSet(
{ source.Location{
Coordinates: source.Coordinates{ Coordinates: source.Coordinates{
RealPath: "path", RealPath: "path",
FileSystemID: "layerID", FileSystemID: "layerID",
}, },
}, },
}, ),
Licenses: []string{"license"}, Licenses: []string{"license"},
Language: pkg.Python, Language: pkg.Python,
Type: pkg.PythonPkg, Type: pkg.PythonPkg,

View File

@ -13,8 +13,9 @@ import (
func encodeComponent(p pkg.Package) cyclonedx.Component { func encodeComponent(p pkg.Package) cyclonedx.Component {
props := encodeProperties(p, "syft:package") props := encodeProperties(p, "syft:package")
props = append(props, encodeCPEs(p)...) props = append(props, encodeCPEs(p)...)
if len(p.Locations) > 0 { locations := p.Locations.ToSlice()
props = append(props, encodeProperties(p.Locations, "syft:location")...) if len(locations) > 0 {
props = append(props, encodeProperties(locations, "syft:location")...)
} }
if hasMetadata(p) { if hasMetadata(p) {
props = append(props, encodeProperties(p.Metadata, "syft:metadata")...) props = append(props, encodeProperties(p.Metadata, "syft:metadata")...)
@ -73,10 +74,13 @@ func decodeComponent(c *cyclonedx.Component) *pkg.Package {
return p return p
} }
func decodeLocations(vals map[string]string) []source.Location { func decodeLocations(vals map[string]string) source.LocationSet {
v := common.Decode(reflect.TypeOf([]source.Location{}), vals, "syft:location", CycloneDXFields) v := common.Decode(reflect.TypeOf([]source.Location{}), vals, "syft:location", CycloneDXFields)
out, _ := v.([]source.Location) out, ok := v.([]source.Location)
return out if !ok {
out = nil
}
return source.NewLocationSet(out...)
} }
func decodePackageMetadata(vals map[string]string, c *cyclonedx.Component, typ pkg.MetadataType) interface{} { func decodePackageMetadata(vals map[string]string, c *cyclonedx.Component, typ pkg.MetadataType) interface{} {

View File

@ -26,9 +26,9 @@ func Test_encodeComponentProperties(t *testing.T) {
name: "from apk", name: "from apk",
input: pkg.Package{ input: pkg.Package{
FoundBy: "cataloger", FoundBy: "cataloger",
Locations: []source.Location{ Locations: source.NewLocationSet(
{Coordinates: source.Coordinates{RealPath: "test"}}, source.Location{Coordinates: source.Coordinates{RealPath: "test"}},
}, ),
Metadata: pkg.ApkMetadata{ Metadata: pkg.ApkMetadata{
Package: "libc-utils", Package: "libc-utils",
OriginPackage: "libc-dev", OriginPackage: "libc-dev",

View File

@ -35,7 +35,7 @@ func SourceInfo(p pkg.Package) string {
answer = "acquired package info from the following paths" answer = "acquired package info from the following paths"
} }
var paths []string var paths []string
for _, l := range p.Locations { for _, l := range p.Locations.ToSlice() {
paths = append(paths, l.RealPath) paths = append(paths, l.RealPath)
} }

View File

@ -18,10 +18,10 @@ func Test_SourceInfo(t *testing.T) {
name: "locations are captured", name: "locations are captured",
input: pkg.Package{ input: pkg.Package{
// note: no type given // note: no type given
Locations: []source.Location{ Locations: source.NewLocationSet(
source.NewVirtualLocation("/a-place", "/b-place"), source.NewVirtualLocation("/a-place", "/b-place"),
source.NewVirtualLocation("/c-place", "/d-place"), source.NewVirtualLocation("/c-place", "/d-place"),
}, ),
}, },
expected: []string{ expected: []string{
"from the following paths", "from the following paths",

View File

@ -157,9 +157,9 @@ func populateImageCatalog(catalog *pkg.Catalog, img *image.Image) {
catalog.Add(pkg.Package{ catalog.Add(pkg.Package{
Name: "package-1", Name: "package-1",
Version: "1.0.1", Version: "1.0.1",
Locations: []source.Location{ Locations: source.NewLocationSet(
source.NewLocationFromImage(string(ref1.RealPath), *ref1, img), source.NewLocationFromImage(string(ref1.RealPath), *ref1, img),
}, ),
Type: pkg.PythonPkg, Type: pkg.PythonPkg,
FoundBy: "the-cataloger-1", FoundBy: "the-cataloger-1",
Language: pkg.Python, Language: pkg.Python,
@ -177,9 +177,9 @@ func populateImageCatalog(catalog *pkg.Catalog, img *image.Image) {
catalog.Add(pkg.Package{ catalog.Add(pkg.Package{
Name: "package-2", Name: "package-2",
Version: "2.0.1", Version: "2.0.1",
Locations: []source.Location{ Locations: source.NewLocationSet(
source.NewLocationFromImage(string(ref2.RealPath), *ref2, img), source.NewLocationFromImage(string(ref2.RealPath), *ref2, img),
}, ),
Type: pkg.DebPkg, Type: pkg.DebPkg,
FoundBy: "the-cataloger-2", FoundBy: "the-cataloger-2",
MetadataType: pkg.DpkgMetadataType, MetadataType: pkg.DpkgMetadataType,
@ -234,9 +234,9 @@ func newDirectoryCatalog() *pkg.Catalog {
Version: "1.0.1", Version: "1.0.1",
Type: pkg.PythonPkg, Type: pkg.PythonPkg,
FoundBy: "the-cataloger-1", FoundBy: "the-cataloger-1",
Locations: []source.Location{ Locations: source.NewLocationSet(
source.NewLocation("/some/path/pkg1"), source.NewLocation("/some/path/pkg1"),
}, ),
Language: pkg.Python, Language: pkg.Python,
MetadataType: pkg.PythonPackageMetadataType, MetadataType: pkg.PythonPackageMetadataType,
Licenses: []string{"MIT"}, Licenses: []string{"MIT"},
@ -259,9 +259,9 @@ func newDirectoryCatalog() *pkg.Catalog {
Version: "2.0.1", Version: "2.0.1",
Type: pkg.DebPkg, Type: pkg.DebPkg,
FoundBy: "the-cataloger-2", FoundBy: "the-cataloger-2",
Locations: []source.Location{ Locations: source.NewLocationSet(
source.NewLocation("/some/path/pkg1"), source.NewLocation("/some/path/pkg1"),
}, ),
MetadataType: pkg.DpkgMetadataType, MetadataType: pkg.DpkgMetadataType,
Metadata: pkg.DpkgMetadata{ Metadata: pkg.DpkgMetadata{
Package: "package-2", Package: "package-2",

View File

@ -58,8 +58,9 @@ func toSnapshotMetadata(s *sbom.SBOM) Metadata {
} }
func filesystem(p pkg.Package) string { func filesystem(p pkg.Package) string {
if len(p.Locations) > 0 { locations := p.Locations.ToSlice()
return p.Locations[0].FileSystemID if len(locations) > 0 {
return locations[0].FileSystemID
} }
return "" return ""
} }
@ -76,8 +77,9 @@ func toPath(s source.Metadata, p pkg.Package) string {
if inputPath == "." { if inputPath == "." {
inputPath = "" inputPath = ""
} }
if len(p.Locations) > 0 { locations := p.Locations.ToSlice()
location := p.Locations[0] if len(locations) > 0 {
location := locations[0]
packagePath := location.RealPath packagePath := location.RealPath
if location.VirtualPath != "" { if location.VirtualPath != "" {
packagePath = location.VirtualPath packagePath = location.VirtualPath

View File

@ -35,32 +35,38 @@ func Test_toGithubModel(t *testing.T) {
{ {
Name: "pkg-1", Name: "pkg-1",
Version: "1.0.1", Version: "1.0.1",
Locations: []source.Location{{ Locations: source.NewLocationSet(
source.Location{
Coordinates: source.Coordinates{ Coordinates: source.Coordinates{
RealPath: "/usr/lib", RealPath: "/usr/lib",
FileSystemID: "fsid-1", FileSystemID: "fsid-1",
}, },
}}, },
),
}, },
{ {
Name: "pkg-2", Name: "pkg-2",
Version: "2.0.2", Version: "2.0.2",
Locations: []source.Location{{ Locations: source.NewLocationSet(
source.Location{
Coordinates: source.Coordinates{ Coordinates: source.Coordinates{
RealPath: "/usr/lib", RealPath: "/usr/lib",
FileSystemID: "fsid-1", FileSystemID: "fsid-1",
}, },
}}, },
),
}, },
{ {
Name: "pkg-3", Name: "pkg-3",
Version: "3.0.3", Version: "3.0.3",
Locations: []source.Location{{ Locations: source.NewLocationSet(
source.Location{
Coordinates: source.Coordinates{ Coordinates: source.Coordinates{
RealPath: "/etc", RealPath: "/etc",
FileSystemID: "fsid-1", FileSystemID: "fsid-1",
}, },
}}, },
),
}, },
} { } {
p.PURL = packageurl.NewPackageURL( p.PURL = packageurl.NewPackageURL(

View File

@ -3,18 +3,18 @@
"name": "/some/path", "name": "/some/path",
"spdxVersion": "SPDX-2.2", "spdxVersion": "SPDX-2.2",
"creationInfo": { "creationInfo": {
"created": "2021-12-20T19:12:47.869816Z", "created": "2022-03-30T21:48:28.297464Z",
"creators": [ "creators": [
"Organization: Anchore, Inc", "Organization: Anchore, Inc",
"Tool: syft-[not provided]" "Tool: syft-[not provided]"
], ],
"licenseListVersion": "3.15" "licenseListVersion": "3.16"
}, },
"dataLicense": "CC0-1.0", "dataLicense": "CC0-1.0",
"documentNamespace": "https://anchore.com/syft/dir/some/path-4b896ded-7852-4e31-b764-136b53bdf346", "documentNamespace": "https://anchore.com/syft/dir/some/path-e188d59b-76f6-4c7f-a9f2-1ae7d0577781",
"packages": [ "packages": [
{ {
"SPDXID": "SPDXRef-1d97af55efe9512f", "SPDXID": "SPDXRef-b85dbb4e6ece5082",
"name": "package-1", "name": "package-1",
"licenseConcluded": "MIT", "licenseConcluded": "MIT",
"downloadLocation": "NOASSERTION", "downloadLocation": "NOASSERTION",
@ -36,7 +36,7 @@
"versionInfo": "1.0.1" "versionInfo": "1.0.1"
}, },
{ {
"SPDXID": "SPDXRef-ad3d1c4abd84bf75", "SPDXID": "SPDXRef-ceda99598967ae8d",
"name": "package-2", "name": "package-2",
"licenseConcluded": "NONE", "licenseConcluded": "NONE",
"downloadLocation": "NOASSERTION", "downloadLocation": "NOASSERTION",

View File

@ -3,18 +3,18 @@
"name": "user-image-input", "name": "user-image-input",
"spdxVersion": "SPDX-2.2", "spdxVersion": "SPDX-2.2",
"creationInfo": { "creationInfo": {
"created": "2021-12-20T19:13:07.647486Z", "created": "2022-03-30T21:48:28.303986Z",
"creators": [ "creators": [
"Organization: Anchore, Inc", "Organization: Anchore, Inc",
"Tool: syft-[not provided]" "Tool: syft-[not provided]"
], ],
"licenseListVersion": "3.15" "licenseListVersion": "3.16"
}, },
"dataLicense": "CC0-1.0", "dataLicense": "CC0-1.0",
"documentNamespace": "https://anchore.com/syft/image/user-image-input-174da656-1824-4bd3-8604-28919f8a65bc", "documentNamespace": "https://anchore.com/syft/image/user-image-input-9e4f4190-c5ae-4e31-a852-d1ab71357516",
"packages": [ "packages": [
{ {
"SPDXID": "SPDXRef-d16127444133b5c1", "SPDXID": "SPDXRef-2a46171f91c8d4bc",
"name": "package-1", "name": "package-1",
"licenseConcluded": "MIT", "licenseConcluded": "MIT",
"downloadLocation": "NOASSERTION", "downloadLocation": "NOASSERTION",
@ -36,7 +36,7 @@
"versionInfo": "1.0.1" "versionInfo": "1.0.1"
}, },
{ {
"SPDXID": "SPDXRef-24907357f3705420", "SPDXID": "SPDXRef-ae77680e9b1d087e",
"name": "package-2", "name": "package-2",
"licenseConcluded": "NONE", "licenseConcluded": "NONE",
"downloadLocation": "NOASSERTION", "downloadLocation": "NOASSERTION",

View File

@ -2,16 +2,16 @@ SPDXVersion: SPDX-2.2
DataLicense: CC0-1.0 DataLicense: CC0-1.0
SPDXID: SPDXRef-DOCUMENT SPDXID: SPDXRef-DOCUMENT
DocumentName: /some/path DocumentName: /some/path
DocumentNamespace: https://anchore.com/syft/dir/some/path-4b90f56d-d596-4ad8-b6a5-17f7d801350d DocumentNamespace: https://anchore.com/syft/dir/some/path-71aa3553-1a73-405f-9f1f-6347d6d4593b
LicenseListVersion: 3.16 LicenseListVersion: 3.16
Creator: Organization: Anchore, Inc Creator: Organization: Anchore, Inc
Creator: Tool: syft-[not provided] Creator: Tool: syft-[not provided]
Created: 2022-02-10T21:09:27Z Created: 2022-03-30T21:48:22Z
##### Package: package-2 ##### Package: package-2
PackageName: package-2 PackageName: package-2
SPDXID: SPDXRef-Package-deb-package-2-ad3d1c4abd84bf75 SPDXID: SPDXRef-Package-deb-package-2-ceda99598967ae8d
PackageVersion: 2.0.1 PackageVersion: 2.0.1
PackageDownloadLocation: NOASSERTION PackageDownloadLocation: NOASSERTION
FilesAnalyzed: false FilesAnalyzed: false
@ -24,7 +24,7 @@ ExternalRef: PACKAGE_MANAGER purl a-purl-2
##### Package: package-1 ##### Package: package-1
PackageName: package-1 PackageName: package-1
SPDXID: SPDXRef-Package-python-package-1-1d97af55efe9512f SPDXID: SPDXRef-Package-python-package-1-b85dbb4e6ece5082
PackageVersion: 1.0.1 PackageVersion: 1.0.1
PackageDownloadLocation: NOASSERTION PackageDownloadLocation: NOASSERTION
FilesAnalyzed: false FilesAnalyzed: false

View File

@ -2,16 +2,16 @@ SPDXVersion: SPDX-2.2
DataLicense: CC0-1.0 DataLicense: CC0-1.0
SPDXID: SPDXRef-DOCUMENT SPDXID: SPDXRef-DOCUMENT
DocumentName: user-image-input DocumentName: user-image-input
DocumentNamespace: https://anchore.com/syft/image/user-image-input-26a2def6-53d0-4504-b99a-a046832508ac DocumentNamespace: https://anchore.com/syft/image/user-image-input-e46e20f4-43a4-40e7-9f82-fd55b8a89e5f
LicenseListVersion: 3.16 LicenseListVersion: 3.16
Creator: Organization: Anchore, Inc Creator: Organization: Anchore, Inc
Creator: Tool: syft-[not provided] Creator: Tool: syft-[not provided]
Created: 2022-02-10T21:09:27Z Created: 2022-03-30T21:48:22Z
##### Package: package-2 ##### Package: package-2
PackageName: package-2 PackageName: package-2
SPDXID: SPDXRef-Package-deb-package-2-73f796c846875b9e SPDXID: SPDXRef-Package-deb-package-2-ae77680e9b1d087e
PackageVersion: 2.0.1 PackageVersion: 2.0.1
PackageDownloadLocation: NOASSERTION PackageDownloadLocation: NOASSERTION
FilesAnalyzed: false FilesAnalyzed: false
@ -24,7 +24,7 @@ ExternalRef: PACKAGE_MANAGER purl a-purl-2
##### Package: package-1 ##### Package: package-1
PackageName: package-1 PackageName: package-1
SPDXID: SPDXRef-Package-python-package-1-d9527e708c11f8b9 SPDXID: SPDXRef-Package-python-package-1-2a46171f91c8d4bc
PackageVersion: 1.0.1 PackageVersion: 1.0.1
PackageDownloadLocation: NOASSERTION PackageDownloadLocation: NOASSERTION
FilesAnalyzed: false FilesAnalyzed: false

View File

@ -42,13 +42,13 @@ func TestEncodeFullJSONDocument(t *testing.T) {
p1 := pkg.Package{ p1 := pkg.Package{
Name: "package-1", Name: "package-1",
Version: "1.0.1", Version: "1.0.1",
Locations: []source.Location{ Locations: source.NewLocationSet(
{ source.Location{
Coordinates: source.Coordinates{ Coordinates: source.Coordinates{
RealPath: "/a/place/a", RealPath: "/a/place/a",
}, },
}, },
}, ),
Type: pkg.PythonPkg, Type: pkg.PythonPkg,
FoundBy: "the-cataloger-1", FoundBy: "the-cataloger-1",
Language: pkg.Python, Language: pkg.Python,
@ -68,13 +68,13 @@ func TestEncodeFullJSONDocument(t *testing.T) {
p2 := pkg.Package{ p2 := pkg.Package{
Name: "package-2", Name: "package-2",
Version: "2.0.1", Version: "2.0.1",
Locations: []source.Location{ Locations: source.NewLocationSet(
{ source.Location{
Coordinates: source.Coordinates{ Coordinates: source.Coordinates{
RealPath: "/b/place/b", RealPath: "/b/place/b",
}, },
}, },
}, ),
Type: pkg.DebPkg, Type: pkg.DebPkg,
FoundBy: "the-cataloger-2", FoundBy: "the-cataloger-2",
MetadataType: pkg.DpkgMetadataType, MetadataType: pkg.DpkgMetadataType,

View File

@ -1,7 +1,7 @@
{ {
"artifacts": [ "artifacts": [
{ {
"id": "1d97af55efe9512f", "id": "b85dbb4e6ece5082",
"name": "package-1", "name": "package-1",
"version": "1.0.1", "version": "1.0.1",
"type": "python", "type": "python",
@ -36,7 +36,7 @@
} }
}, },
{ {
"id": "ad3d1c4abd84bf75", "id": "ceda99598967ae8d",
"name": "package-2", "name": "package-2",
"version": "2.0.1", "version": "2.0.1",
"type": "deb", "type": "deb",

View File

@ -1,7 +1,7 @@
{ {
"artifacts": [ "artifacts": [
{ {
"id": "d9a7c58726ab4bef", "id": "b3fa3ee64756b0c6",
"name": "package-1", "name": "package-1",
"version": "1.0.1", "version": "1.0.1",
"type": "python", "type": "python",
@ -31,7 +31,7 @@
} }
}, },
{ {
"id": "ac462e450060da2c", "id": "b324f4d9ee5413fe",
"name": "package-2", "name": "package-2",
"version": "2.0.1", "version": "2.0.1",
"type": "deb", "type": "deb",

View File

@ -1,7 +1,7 @@
{ {
"artifacts": [ "artifacts": [
{ {
"id": "d9527e708c11f8b9", "id": "2a46171f91c8d4bc",
"name": "package-1", "name": "package-1",
"version": "1.0.1", "version": "1.0.1",
"type": "python", "type": "python",
@ -32,7 +32,7 @@
} }
}, },
{ {
"id": "73f796c846875b9e", "id": "ae77680e9b1d087e",
"name": "package-2", "name": "package-2",
"version": "2.0.1", "version": "2.0.1",
"type": "deb", "type": "deb",

View File

@ -175,8 +175,9 @@ func toPackageModel(p pkg.Package) model.Package {
licenses = p.Licenses licenses = p.Licenses
} }
var coordinates = make([]source.Coordinates, len(p.Locations)) locations := p.Locations.ToSlice()
for i, l := range p.Locations { var coordinates = make([]source.Coordinates, len(locations))
for i, l := range locations {
coordinates[i] = l.Coordinates coordinates[i] = l.Coordinates
} }

View File

@ -53,7 +53,8 @@ func toSyftRelationships(doc *model.Document, catalog *pkg.Catalog, relationship
for _, p := range catalog.Sorted() { for _, p := range catalog.Sorted() {
idMap[string(p.ID())] = p idMap[string(p.ID())] = p
for _, l := range p.Locations { locations := p.Locations.ToSlice()
for _, l := range locations {
idMap[string(l.Coordinates.ID())] = l.Coordinates idMap[string(l.Coordinates.ID())] = l.Coordinates
} }
} }
@ -166,7 +167,7 @@ func toSyftPackage(p model.Package, idAliases map[string]string) pkg.Package {
Name: p.Name, Name: p.Name,
Version: p.Version, Version: p.Version,
FoundBy: p.FoundBy, FoundBy: p.FoundBy,
Locations: locations, Locations: source.NewLocationSet(locations...),
Licenses: p.Licenses, Licenses: p.Licenses,
Language: p.Language, Language: p.Language,
Type: p.Type, Type: p.Type,

View File

@ -10,11 +10,28 @@ import (
"github.com/jinzhu/copier" "github.com/jinzhu/copier"
) )
type orderedIDSet struct {
slice []artifact.ID
}
func (s *orderedIDSet) add(ids ...artifact.ID) {
loopNewIDs:
for _, newID := range ids {
for _, existingID := range s.slice {
if existingID == newID {
continue loopNewIDs
}
}
s.slice = append(s.slice, newID)
}
}
// Catalog represents a collection of Packages. // Catalog represents a collection of Packages.
type Catalog struct { type Catalog struct {
byID map[artifact.ID]Package byID map[artifact.ID]Package
idsByType map[Type][]artifact.ID idsByName map[string]orderedIDSet
idsByPath map[string][]artifact.ID // note: this is real path or virtual path idsByType map[Type]orderedIDSet
idsByPath map[string]orderedIDSet // note: this is real path or virtual path
lock sync.RWMutex lock sync.RWMutex
} }
@ -22,8 +39,9 @@ type Catalog struct {
func NewCatalog(pkgs ...Package) *Catalog { func NewCatalog(pkgs ...Package) *Catalog {
catalog := Catalog{ catalog := Catalog{
byID: make(map[artifact.ID]Package), byID: make(map[artifact.ID]Package),
idsByType: make(map[Type][]artifact.ID), idsByName: make(map[string]orderedIDSet),
idsByPath: make(map[string][]artifact.ID), idsByType: make(map[Type]orderedIDSet),
idsByPath: make(map[string]orderedIDSet),
} }
for _, p := range pkgs { for _, p := range pkgs {
@ -55,7 +73,12 @@ func (c *Catalog) Package(id artifact.ID) *Package {
// PackagesByPath returns all packages that were discovered from the given path. // PackagesByPath returns all packages that were discovered from the given path.
func (c *Catalog) PackagesByPath(path string) []Package { func (c *Catalog) PackagesByPath(path string) []Package {
return c.Packages(c.idsByPath[path]) return c.Packages(c.idsByPath[path].slice)
}
// PackagesByName returns all packages that were discovered with a matching name.
func (c *Catalog) PackagesByName(name string) []Package {
return c.Packages(c.idsByName[name].slice)
} }
// Packages returns all packages for the given ID. // Packages returns all packages for the given ID.
@ -81,26 +104,58 @@ func (c *Catalog) Add(p Package) {
id = p.ID() id = p.ID()
} }
// store by package ID if existing, exists := c.byID[id]; exists {
c.byID[id] = p // there is already a package with this fingerprint merge the existing record with the new one
if err := existing.merge(p); err != nil {
log.Warnf("failed to merge packages: %+v", err)
} else {
c.addPathsToIndex(p)
}
return
}
// store by package type c.addToIndex(p)
c.idsByType[p.Type] = append(c.idsByType[p.Type], id) }
// store by file location paths func (c *Catalog) addToIndex(p Package) {
c.byID[p.id] = p
c.addNameToIndex(p)
c.addTypeToIndex(p)
c.addPathsToIndex(p)
}
func (c *Catalog) addNameToIndex(p Package) {
nameIndex := c.idsByName[p.Name]
nameIndex.add(p.id)
c.idsByName[p.Name] = nameIndex
}
func (c *Catalog) addTypeToIndex(p Package) {
typeIndex := c.idsByType[p.Type]
typeIndex.add(p.id)
c.idsByType[p.Type] = typeIndex
}
func (c *Catalog) addPathsToIndex(p Package) {
observedPaths := internal.NewStringSet() observedPaths := internal.NewStringSet()
for _, l := range p.Locations { for _, l := range p.Locations.ToSlice() {
if l.RealPath != "" && !observedPaths.Contains(l.RealPath) { if l.RealPath != "" && !observedPaths.Contains(l.RealPath) {
c.idsByPath[l.RealPath] = append(c.idsByPath[l.RealPath], id) c.addPathToIndex(p.id, l.RealPath)
observedPaths.Add(l.RealPath) observedPaths.Add(l.RealPath)
} }
if l.VirtualPath != "" && l.RealPath != l.VirtualPath && !observedPaths.Contains(l.VirtualPath) { if l.VirtualPath != "" && l.RealPath != l.VirtualPath && !observedPaths.Contains(l.VirtualPath) {
c.idsByPath[l.VirtualPath] = append(c.idsByPath[l.VirtualPath], id) c.addPathToIndex(p.id, l.VirtualPath)
observedPaths.Add(l.VirtualPath) observedPaths.Add(l.VirtualPath)
} }
} }
} }
func (c *Catalog) addPathToIndex(id artifact.ID, path string) {
pathIndex := c.idsByPath[path]
pathIndex.add(id)
c.idsByPath[path] = pathIndex
}
// Enumerate all packages for the given type(s), enumerating all packages if no type is specified. // Enumerate all packages for the given type(s), enumerating all packages if no type is specified.
func (c *Catalog) Enumerate(types ...Type) <-chan Package { func (c *Catalog) Enumerate(types ...Type) <-chan Package {
channel := make(chan Package) channel := make(chan Package)
@ -124,7 +179,7 @@ func (c *Catalog) Enumerate(types ...Type) <-chan Package {
continue continue
} }
} }
for _, id := range ids { for _, id := range ids.slice {
p := c.Package(id) p := c.Package(id)
if p != nil { if p != nil {
channel <- *p channel <- *p
@ -145,8 +200,10 @@ func (c *Catalog) Sorted(types ...Type) (pkgs []Package) {
sort.SliceStable(pkgs, func(i, j int) bool { sort.SliceStable(pkgs, func(i, j int) bool {
if pkgs[i].Name == pkgs[j].Name { if pkgs[i].Name == pkgs[j].Name {
if pkgs[i].Version == pkgs[j].Version { if pkgs[i].Version == pkgs[j].Version {
if pkgs[i].Type == pkgs[j].Type && len(pkgs[i].Locations) > 0 && len(pkgs[j].Locations) > 0 { iLocations := pkgs[i].Locations.ToSlice()
return pkgs[i].Locations[0].String() < pkgs[j].Locations[0].String() jLocations := pkgs[j].Locations.ToSlice()
if pkgs[i].Type == pkgs[j].Type && len(iLocations) > 0 && len(jLocations) > 0 {
return iLocations[0].String() < jLocations[0].String()
} }
return pkgs[i].Type < pkgs[j].Type return pkgs[i].Type < pkgs[j].Type
} }

View File

@ -3,11 +3,11 @@ package pkg
import ( import (
"testing" "testing"
"github.com/stretchr/testify/assert" "github.com/anchore/syft/syft/artifact"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
"github.com/scylladb/go-set/strset"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
) )
type expectedIndexes struct { type expectedIndexes struct {
@ -19,17 +19,17 @@ func TestCatalogAddPopulatesIndex(t *testing.T) {
var pkgs = []Package{ var pkgs = []Package{
{ {
Locations: []source.Location{ Locations: source.NewLocationSet(
source.NewVirtualLocation("/a/path", "/another/path"), source.NewVirtualLocation("/a/path", "/another/path"),
source.NewVirtualLocation("/b/path", "/bee/path"), source.NewVirtualLocation("/b/path", "/bee/path"),
}, ),
Type: RpmPkg, Type: RpmPkg,
}, },
{ {
Locations: []source.Location{ Locations: source.NewLocationSet(
source.NewVirtualLocation("/c/path", "/another/path"), source.NewVirtualLocation("/c/path", "/another/path"),
source.NewVirtualLocation("/d/path", "/another/path"), source.NewVirtualLocation("/d/path", "/another/path"),
}, ),
Type: NpmPkg, Type: NpmPkg,
}, },
} }
@ -106,47 +106,169 @@ func assertIndexes(t *testing.T, c *Catalog, expectedIndexes expectedIndexes) {
func TestCatalog_PathIndexDeduplicatesRealVsVirtualPaths(t *testing.T) { func TestCatalog_PathIndexDeduplicatesRealVsVirtualPaths(t *testing.T) {
p1 := Package{ p1 := Package{
Locations: []source.Location{ Locations: source.NewLocationSet(
source.NewVirtualLocation("/b/path", "/another/path"), source.NewVirtualLocation("/b/path", "/another/path"),
source.NewVirtualLocation("/b/path", "/b/path"), source.NewVirtualLocation("/b/path", "/b/path"),
}, ),
Type: RpmPkg, Type: RpmPkg,
Name: "Package-1", Name: "Package-1",
} }
p2 := Package{ p2 := Package{
Locations: []source.Location{ Locations: source.NewLocationSet(
source.NewVirtualLocation("/b/path", "/b/path"), source.NewVirtualLocation("/b/path", "/b/path"),
}, ),
Type: RpmPkg, Type: RpmPkg,
Name: "Package-2", Name: "Package-2",
} }
p2Dup := Package{
Locations: source.NewLocationSet(
source.NewVirtualLocation("/b/path", "/another/path"),
source.NewVirtualLocation("/b/path", "/c/path/b/dup"),
),
Type: RpmPkg,
Name: "Package-2",
}
tests := []struct { tests := []struct {
name string name string
pkg Package pkgs []Package
paths []string
}{ }{
{ {
name: "multiple locations with shared path", name: "multiple locations with shared path",
pkg: p1, pkgs: []Package{p1},
paths: []string{
"/b/path",
"/another/path",
},
}, },
{ {
name: "one location with shared path", name: "one location with shared path",
pkg: p2, pkgs: []Package{p2},
paths: []string{
"/b/path",
},
},
{
name: "two instances with similar locations",
pkgs: []Package{p2, p2Dup},
paths: []string{
"/b/path",
"/another/path",
"/c/path/b/dup", // this updated the path index on merge
},
}, },
} }
for _, test := range tests { for _, test := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
actual := NewCatalog(test.pkg).PackagesByPath("/b/path") for _, path := range test.paths {
if len(actual) != 1 { actualPackages := NewCatalog(test.pkgs...).PackagesByPath(path)
t.Errorf("expected exactly one package path, got %d", len(actual)) require.Len(t, actualPackages, 1)
} }
}) })
} }
} }
func TestCatalog_MergeRecords(t *testing.T) {
var tests = []struct {
name string
pkgs []Package
expectedLocations []source.Location
}{
{
name: "multiple Locations with shared path",
pkgs: []Package{
{
Locations: source.NewLocationSet(
source.Location{
Coordinates: source.Coordinates{
RealPath: "/b/path",
FileSystemID: "a",
},
VirtualPath: "/another/path",
},
),
Type: RpmPkg,
},
{
Locations: source.NewLocationSet(
source.Location{
Coordinates: source.Coordinates{
RealPath: "/b/path",
FileSystemID: "b",
},
VirtualPath: "/another/path",
},
),
Type: RpmPkg,
},
},
expectedLocations: []source.Location{
{
Coordinates: source.Coordinates{
RealPath: "/b/path",
FileSystemID: "a",
},
VirtualPath: "/another/path",
},
{
Coordinates: source.Coordinates{
RealPath: "/b/path",
FileSystemID: "b",
},
VirtualPath: "/another/path",
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
actual := NewCatalog(tt.pkgs...).PackagesByPath("/b/path")
require.Len(t, actual, 1)
assert.Equal(t, tt.expectedLocations, actual[0].Locations.ToSlice())
})
}
}
func TestCatalog_EnumerateNilCatalog(t *testing.T) { func TestCatalog_EnumerateNilCatalog(t *testing.T) {
var c *Catalog var c *Catalog
assert.Empty(t, c.Enumerate()) assert.Empty(t, c.Enumerate())
} }
func Test_idOrderedSet_add(t *testing.T) {
tests := []struct {
name string
input []artifact.ID
expected []artifact.ID
}{
{
name: "elements deduplicated when added",
input: []artifact.ID{
"1", "2", "3", "4", "1", "2", "3", "4", "1", "2", "3", "4",
},
expected: []artifact.ID{
"1", "2", "3", "4",
},
},
{
name: "elements retain ordering when added",
input: []artifact.ID{
"4", "3", "2", "1",
},
expected: []artifact.ID{
"4", "3", "2", "1",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var s orderedIDSet
s.add(tt.input...)
assert.Equal(t, tt.expected, s.slice)
})
}
}

View File

@ -16,7 +16,7 @@ func Test_disallowJenkinsServerCPEForPluginPackage(t *testing.T) {
}{ }{
{ {
name: "go case (filter out)", name: "go case (filter out)",
cpe: mustCPE("cpe:2.3:a:name:jenkins:3.2:*:*:*:*:*:*:*"), cpe: pkg.MustCPE("cpe:2.3:a:name:jenkins:3.2:*:*:*:*:*:*:*"),
pkg: pkg.Package{ pkg: pkg.Package{
Type: pkg.JenkinsPluginPkg, Type: pkg.JenkinsPluginPkg,
}, },
@ -24,7 +24,7 @@ func Test_disallowJenkinsServerCPEForPluginPackage(t *testing.T) {
}, },
{ {
name: "ignore jenkins plugins with unique name", name: "ignore jenkins plugins with unique name",
cpe: mustCPE("cpe:2.3:a:name:ci-jenkins:3.2:*:*:*:*:*:*:*"), cpe: pkg.MustCPE("cpe:2.3:a:name:ci-jenkins:3.2:*:*:*:*:*:*:*"),
pkg: pkg.Package{ pkg: pkg.Package{
Type: pkg.JenkinsPluginPkg, Type: pkg.JenkinsPluginPkg,
}, },
@ -32,7 +32,7 @@ func Test_disallowJenkinsServerCPEForPluginPackage(t *testing.T) {
}, },
{ {
name: "ignore java packages", name: "ignore java packages",
cpe: mustCPE("cpe:2.3:a:name:jenkins:3.2:*:*:*:*:*:*:*"), cpe: pkg.MustCPE("cpe:2.3:a:name:jenkins:3.2:*:*:*:*:*:*:*"),
pkg: pkg.Package{ pkg: pkg.Package{
Type: pkg.JavaPkg, Type: pkg.JavaPkg,
}, },
@ -55,7 +55,7 @@ func Test_disallowJenkinsCPEsNotAssociatedWithJenkins(t *testing.T) {
}{ }{
{ {
name: "filter out mismatched name (cloudbees vendor)", name: "filter out mismatched name (cloudbees vendor)",
cpe: mustCPE("cpe:2.3:a:cloudbees:jenkins:3.2:*:*:*:*:*:*:*"), cpe: pkg.MustCPE("cpe:2.3:a:cloudbees:jenkins:3.2:*:*:*:*:*:*:*"),
pkg: pkg.Package{ pkg: pkg.Package{
Name: "not-j*nkins", Name: "not-j*nkins",
Type: pkg.JavaPkg, Type: pkg.JavaPkg,
@ -64,7 +64,7 @@ func Test_disallowJenkinsCPEsNotAssociatedWithJenkins(t *testing.T) {
}, },
{ {
name: "filter out mismatched name (jenkins vendor)", name: "filter out mismatched name (jenkins vendor)",
cpe: mustCPE("cpe:2.3:a:jenkins:jenkins:3.2:*:*:*:*:*:*:*"), cpe: pkg.MustCPE("cpe:2.3:a:jenkins:jenkins:3.2:*:*:*:*:*:*:*"),
pkg: pkg.Package{ pkg: pkg.Package{
Name: "not-j*nkins", Name: "not-j*nkins",
Type: pkg.JavaPkg, Type: pkg.JavaPkg,
@ -73,7 +73,7 @@ func Test_disallowJenkinsCPEsNotAssociatedWithJenkins(t *testing.T) {
}, },
{ {
name: "filter out mismatched name (any vendor)", name: "filter out mismatched name (any vendor)",
cpe: mustCPE("cpe:2.3:a:*:jenkins:3.2:*:*:*:*:*:*:*"), cpe: pkg.MustCPE("cpe:2.3:a:*:jenkins:3.2:*:*:*:*:*:*:*"),
pkg: pkg.Package{ pkg: pkg.Package{
Name: "not-j*nkins", Name: "not-j*nkins",
Type: pkg.JavaPkg, Type: pkg.JavaPkg,
@ -82,7 +82,7 @@ func Test_disallowJenkinsCPEsNotAssociatedWithJenkins(t *testing.T) {
}, },
{ {
name: "ignore packages with the name jenkins", name: "ignore packages with the name jenkins",
cpe: mustCPE("cpe:2.3:a:*:jenkins:3.2:*:*:*:*:*:*:*"), cpe: pkg.MustCPE("cpe:2.3:a:*:jenkins:3.2:*:*:*:*:*:*:*"),
pkg: pkg.Package{ pkg: pkg.Package{
Name: "jenkins-thing", Name: "jenkins-thing",
Type: pkg.JavaPkg, Type: pkg.JavaPkg,
@ -91,7 +91,7 @@ func Test_disallowJenkinsCPEsNotAssociatedWithJenkins(t *testing.T) {
}, },
{ {
name: "ignore product names that are not exactly 'jenkins'", name: "ignore product names that are not exactly 'jenkins'",
cpe: mustCPE("cpe:2.3:a:*:jenkins-something-else:3.2:*:*:*:*:*:*:*"), cpe: pkg.MustCPE("cpe:2.3:a:*:jenkins-something-else:3.2:*:*:*:*:*:*:*"),
pkg: pkg.Package{ pkg: pkg.Package{
Name: "not-j*nkins", Name: "not-j*nkins",
Type: pkg.JavaPkg, Type: pkg.JavaPkg,
@ -115,7 +115,7 @@ func Test_disallowJiraClientServerMismatch(t *testing.T) {
}{ }{
{ {
name: "filter out mismatched name (atlassian vendor)", name: "filter out mismatched name (atlassian vendor)",
cpe: mustCPE("cpe:2.3:a:atlassian:jira:3.2:*:*:*:*:*:*:*"), cpe: pkg.MustCPE("cpe:2.3:a:atlassian:jira:3.2:*:*:*:*:*:*:*"),
pkg: pkg.Package{ pkg: pkg.Package{
Name: "something-client", Name: "something-client",
Type: pkg.JavaPkg, Type: pkg.JavaPkg,
@ -124,7 +124,7 @@ func Test_disallowJiraClientServerMismatch(t *testing.T) {
}, },
{ {
name: "filter out mismatched name (jira vendor)", name: "filter out mismatched name (jira vendor)",
cpe: mustCPE("cpe:2.3:a:jira:jira:3.2:*:*:*:*:*:*:*"), cpe: pkg.MustCPE("cpe:2.3:a:jira:jira:3.2:*:*:*:*:*:*:*"),
pkg: pkg.Package{ pkg: pkg.Package{
Name: "something-client", Name: "something-client",
Type: pkg.JavaPkg, Type: pkg.JavaPkg,
@ -133,7 +133,7 @@ func Test_disallowJiraClientServerMismatch(t *testing.T) {
}, },
{ {
name: "filter out mismatched name (any vendor)", name: "filter out mismatched name (any vendor)",
cpe: mustCPE("cpe:2.3:a:*:jira:3.2:*:*:*:*:*:*:*"), cpe: pkg.MustCPE("cpe:2.3:a:*:jira:3.2:*:*:*:*:*:*:*"),
pkg: pkg.Package{ pkg: pkg.Package{
Name: "something-client", Name: "something-client",
Type: pkg.JavaPkg, Type: pkg.JavaPkg,
@ -142,7 +142,7 @@ func Test_disallowJiraClientServerMismatch(t *testing.T) {
}, },
{ {
name: "ignore package names that do not have 'client'", name: "ignore package names that do not have 'client'",
cpe: mustCPE("cpe:2.3:a:*:jira:3.2:*:*:*:*:*:*:*"), cpe: pkg.MustCPE("cpe:2.3:a:*:jira:3.2:*:*:*:*:*:*:*"),
pkg: pkg.Package{ pkg: pkg.Package{
Name: "jira-thing", Name: "jira-thing",
Type: pkg.JavaPkg, Type: pkg.JavaPkg,
@ -151,7 +151,7 @@ func Test_disallowJiraClientServerMismatch(t *testing.T) {
}, },
{ {
name: "ignore product names that are not exactly 'jira'", name: "ignore product names that are not exactly 'jira'",
cpe: mustCPE("cpe:2.3:a:*:jira-something-else:3.2:*:*:*:*:*:*:*"), cpe: pkg.MustCPE("cpe:2.3:a:*:jira-something-else:3.2:*:*:*:*:*:*:*"),
pkg: pkg.Package{ pkg: pkg.Package{
Name: "not-j*ra", Name: "not-j*ra",
Type: pkg.JavaPkg, Type: pkg.JavaPkg,

View File

@ -55,7 +55,7 @@ func Generate(p pkg.Package) []pkg.CPE {
// filter out any known combinations that don't accurately represent this package // filter out any known combinations that don't accurately represent this package
cpes = filter(cpes, p, cpeFilters...) cpes = filter(cpes, p, cpeFilters...)
sort.Sort(BySpecificity(cpes)) sort.Sort(pkg.CPEBySpecificity(cpes))
return cpes return cpes
} }

View File

@ -58,9 +58,8 @@ func (c *GenericCataloger) Catalog(resolver source.FileResolver) ([]pkg.Package,
for _, p := range discoveredPackages { for _, p := range discoveredPackages {
p.FoundBy = c.upstreamCataloger p.FoundBy = c.upstreamCataloger
p.Locations = append(p.Locations, location) p.Locations.Add(location)
p.SetID() p.SetID()
packages = append(packages, *p) packages = append(packages, *p)
} }

View File

@ -53,7 +53,7 @@ func TestGenericCataloger(t *testing.T) {
assert.Len(t, actualPkgs, len(expectedPkgs)) assert.Len(t, actualPkgs, len(expectedPkgs))
for _, p := range actualPkgs { for _, p := range actualPkgs {
ref := p.Locations[0] ref := p.Locations.ToSlice()[0]
exP, ok := expectedPkgs[ref.RealPath] exP, ok := expectedPkgs[ref.RealPath]
if !ok { if !ok {
t.Errorf("missing expected pkg: ref=%+v", ref) t.Errorf("missing expected pkg: ref=%+v", ref)

View File

@ -60,7 +60,7 @@ func (c *Cataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, []arti
for i := range pkgs { for i := range pkgs {
p := &pkgs[i] p := &pkgs[i]
p.FoundBy = c.Name() p.FoundBy = c.Name()
p.Locations = []source.Location{dbLocation} p.Locations.Add(dbLocation)
// the current entry only has what may have been listed in the status file, however, there are additional // the current entry only has what may have been listed in the status file, however, there are additional
// files that are listed in multiple other locations. We should retrieve them all and merge the file lists // files that are listed in multiple other locations. We should retrieve them all and merge the file lists
@ -88,7 +88,7 @@ func addLicenses(resolver source.FileResolver, dbLocation source.Location, p *pk
p.Licenses = parseLicensesFromCopyright(copyrightReader) p.Licenses = parseLicensesFromCopyright(copyrightReader)
// keep a record of the file where this was discovered // keep a record of the file where this was discovered
p.Locations = append(p.Locations, *copyrightLocation) p.Locations.Add(*copyrightLocation)
} }
} }
@ -117,7 +117,7 @@ loopNewFiles:
p.Metadata = metadata p.Metadata = metadata
// persist location information from each new source of information // persist location information from each new source of information
p.Locations = append(p.Locations, infoLocations...) p.Locations.Add(infoLocations...)
} }
func getAdditionalFileListing(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) ([]pkg.DpkgFileRecord, []source.Location) { func getAdditionalFileListing(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) ([]pkg.DpkgFileRecord, []source.Location) {

View File

@ -1,6 +1,7 @@
package deb package deb
import ( import (
"github.com/stretchr/testify/assert"
"testing" "testing"
"github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/file"
@ -115,15 +116,13 @@ func TestDpkgCataloger(t *testing.T) {
for idx := range actual { for idx := range actual {
a := &actual[idx] a := &actual[idx]
// we will test the sources separately // we will test the sources separately
var sourcesList = make([]string, len(a.Locations)) var sourcesList = make([]string, len(a.Locations.ToSlice()))
for i, s := range a.Locations { for i, s := range a.Locations.ToSlice() {
sourcesList[i] = s.RealPath sourcesList[i] = s.RealPath
} }
a.Locations = nil a.Locations = source.NewLocationSet()
for _, d := range deep.Equal(sourcesList, test.sources[a.Name]) { assert.ElementsMatch(t, sourcesList, test.sources[a.Name])
t.Errorf("diff: %+v", d)
}
} }
// test remaining fields... // test remaining fields...

View File

@ -49,9 +49,7 @@ func newGoBinaryPackage(dep *debug.Module, goVersion, architecture string, locat
Version: dep.Version, Version: dep.Version,
Language: pkg.Go, Language: pkg.Go,
Type: pkg.GoModulePkg, Type: pkg.GoModulePkg,
Locations: []source.Location{ Locations: source.NewLocationSet(location),
location,
},
MetadataType: pkg.GolangBinMetadataType, MetadataType: pkg.GolangBinMetadataType,
Metadata: pkg.GolangBinMetadata{ Metadata: pkg.GolangBinMetadata{
GoCompiledVersion: goVersion, GoCompiledVersion: goVersion,

View File

@ -133,14 +133,14 @@ func TestBuildGoPkgInfo(t *testing.T) {
FoundBy: catalogerName, FoundBy: catalogerName,
Language: pkg.Go, Language: pkg.Go,
Type: pkg.GoModulePkg, Type: pkg.GoModulePkg,
Locations: []source.Location{ Locations: source.NewLocationSet(
{ source.Location{
Coordinates: source.Coordinates{ Coordinates: source.Coordinates{
RealPath: "/a-path", RealPath: "/a-path",
FileSystemID: "layer-id", FileSystemID: "layer-id",
}, },
}, },
}, ),
MetadataType: pkg.GolangBinMetadataType, MetadataType: pkg.GolangBinMetadataType,
Metadata: pkg.GolangBinMetadata{ Metadata: pkg.GolangBinMetadata{
GoCompiledVersion: goCompiledVersion, GoCompiledVersion: goCompiledVersion,
@ -190,14 +190,14 @@ func TestBuildGoPkgInfo(t *testing.T) {
Version: "v0.2.1", Version: "v0.2.1",
Language: pkg.Go, Language: pkg.Go,
Type: pkg.GoModulePkg, Type: pkg.GoModulePkg,
Locations: []source.Location{ Locations: source.NewLocationSet(
{ source.Location{
Coordinates: source.Coordinates{ Coordinates: source.Coordinates{
RealPath: "/a-path", RealPath: "/a-path",
FileSystemID: "layer-id", FileSystemID: "layer-id",
}, },
}, },
}, ),
MetadataType: pkg.GolangBinMetadataType, MetadataType: pkg.GolangBinMetadataType,
Metadata: pkg.GolangBinMetadata{ Metadata: pkg.GolangBinMetadata{
GoCompiledVersion: goCompiledVersion, GoCompiledVersion: goCompiledVersion,
@ -252,14 +252,14 @@ func TestBuildGoPkgInfo(t *testing.T) {
Version: "v0.2.1", Version: "v0.2.1",
Language: pkg.Go, Language: pkg.Go,
Type: pkg.GoModulePkg, Type: pkg.GoModulePkg,
Locations: []source.Location{ Locations: source.NewLocationSet(
{ source.Location{
Coordinates: source.Coordinates{ Coordinates: source.Coordinates{
RealPath: "/a-path", RealPath: "/a-path",
FileSystemID: "layer-id", FileSystemID: "layer-id",
}, },
}, },
}, ),
MetadataType: pkg.GolangBinMetadataType, MetadataType: pkg.GolangBinMetadataType,
Metadata: pkg.GolangBinMetadata{ Metadata: pkg.GolangBinMetadata{
GoCompiledVersion: goCompiledVersion, GoCompiledVersion: goCompiledVersion,
@ -273,14 +273,14 @@ func TestBuildGoPkgInfo(t *testing.T) {
Version: "v0.0.0-20210222170800-9c70f9b80bcf", Version: "v0.0.0-20210222170800-9c70f9b80bcf",
Language: pkg.Go, Language: pkg.Go,
Type: pkg.GoModulePkg, Type: pkg.GoModulePkg,
Locations: []source.Location{ Locations: source.NewLocationSet(
{ source.Location{
Coordinates: source.Coordinates{ Coordinates: source.Coordinates{
RealPath: "/a-path", RealPath: "/a-path",
FileSystemID: "layer-id", FileSystemID: "layer-id",
}, },
}, },
}, ),
MetadataType: pkg.GolangBinMetadataType, MetadataType: pkg.GolangBinMetadataType,
Metadata: pkg.GolangBinMetadata{ Metadata: pkg.GolangBinMetadata{
GoCompiledVersion: goCompiledVersion, GoCompiledVersion: goCompiledVersion,
@ -327,12 +327,14 @@ func TestBuildGoPkgInfo(t *testing.T) {
Version: "v0.0.0-20211006194710-c8a6f5223071", Version: "v0.0.0-20211006194710-c8a6f5223071",
Language: pkg.Go, Language: pkg.Go,
Type: pkg.GoModulePkg, Type: pkg.GoModulePkg,
Locations: []source.Location{ Locations: source.NewLocationSet(
{ source.Location{
Coordinates: source.Coordinates{ Coordinates: source.Coordinates{
RealPath: "/a-path", RealPath: "/a-path",
FileSystemID: "layer-id", FileSystemID: "layer-id",
}}}, },
},
),
MetadataType: pkg.GolangBinMetadataType, MetadataType: pkg.GolangBinMetadataType,
Metadata: pkg.GolangBinMetadata{ Metadata: pkg.GolangBinMetadata{
GoCompiledVersion: goCompiledVersion, GoCompiledVersion: goCompiledVersion,
@ -344,14 +346,14 @@ func TestBuildGoPkgInfo(t *testing.T) {
Version: "v0.0.0-20210916214954-140adaaadfaf", Version: "v0.0.0-20210916214954-140adaaadfaf",
Language: pkg.Go, Language: pkg.Go,
Type: pkg.GoModulePkg, Type: pkg.GoModulePkg,
Locations: []source.Location{ Locations: source.NewLocationSet(
{ source.Location{
Coordinates: source.Coordinates{ Coordinates: source.Coordinates{
RealPath: "/a-path", RealPath: "/a-path",
FileSystemID: "layer-id", FileSystemID: "layer-id",
}, },
}, },
}, ),
MetadataType: pkg.GolangBinMetadataType, MetadataType: pkg.GolangBinMetadataType,
Metadata: pkg.GolangBinMetadata{ Metadata: pkg.GolangBinMetadata{
GoCompiledVersion: goCompiledVersion, GoCompiledVersion: goCompiledVersion,

View File

@ -80,7 +80,7 @@ func (c *PackageCataloger) catalogEggOrWheel(resolver source.FileResolver, metad
Name: metadata.Name, Name: metadata.Name,
Version: metadata.Version, Version: metadata.Version,
FoundBy: c.Name(), FoundBy: c.Name(),
Locations: sources, Locations: source.NewLocationSet(sources...),
Licenses: licenses, Licenses: licenses,
Language: pkg.Python, Language: pkg.Python,
Type: pkg.PythonPkg, Type: pkg.PythonPkg,

View File

@ -144,7 +144,7 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
test.expectedPackage.Locations = locations test.expectedPackage.Locations = source.NewLocationSet(locations...)
actual, _, err := NewPythonPackageCataloger().Catalog(resolver) actual, _, err := NewPythonPackageCataloger().Catalog(resolver)
if err != nil { if err != nil {

View File

@ -1,6 +1,7 @@
package python package python
import ( import (
"github.com/anchore/syft/syft/source"
"os" "os"
"testing" "testing"
@ -48,7 +49,14 @@ func TestParsePipFileLock(t *testing.T) {
t.Fatalf("failed to parse requirements: %+v", err) t.Fatalf("failed to parse requirements: %+v", err)
} }
if diff := cmp.Diff(expected, actual, cmp.AllowUnexported(pkg.Package{})); diff != "" { if diff := cmp.Diff(expected, actual,
cmp.AllowUnexported(pkg.Package{}),
cmp.Comparer(
func(x, y source.LocationSet) bool {
return cmp.Equal(x.ToSlice(), y.ToSlice())
},
),
); diff != "" {
t.Errorf("unexpected result from parsing (-expected +actual)\n%s", diff) t.Errorf("unexpected result from parsing (-expected +actual)\n%s", diff)
} }
} }

View File

@ -1,6 +1,7 @@
package python package python
import ( import (
"github.com/anchore/syft/syft/source"
"os" "os"
"testing" "testing"
@ -42,7 +43,14 @@ func TestParseRequirementsTxt(t *testing.T) {
t.Fatalf("failed to parse requirements: %+v", err) t.Fatalf("failed to parse requirements: %+v", err)
} }
if diff := cmp.Diff(expected, actual, cmp.AllowUnexported(pkg.Package{})); diff != "" { if diff := cmp.Diff(expected, actual,
cmp.AllowUnexported(pkg.Package{}),
cmp.Comparer(
func(x, y source.LocationSet) bool {
return cmp.Equal(x.ToSlice(), y.ToSlice())
},
),
); diff != "" {
t.Errorf("unexpected result from parsing (-expected +actual)\n%s", diff) t.Errorf("unexpected result from parsing (-expected +actual)\n%s", diff)
} }
} }

View File

@ -1,6 +1,7 @@
package python package python
import ( import (
"github.com/anchore/syft/syft/source"
"os" "os"
"testing" "testing"
@ -54,7 +55,14 @@ func TestParseSetup(t *testing.T) {
t.Fatalf("failed to parse requirements: %+v", err) t.Fatalf("failed to parse requirements: %+v", err)
} }
if diff := cmp.Diff(expected, actual, cmp.AllowUnexported(pkg.Package{})); diff != "" { if diff := cmp.Diff(expected, actual,
cmp.AllowUnexported(pkg.Package{}),
cmp.Comparer(
func(x, y source.LocationSet) bool {
return cmp.Equal(x.ToSlice(), y.ToSlice())
},
),
); diff != "" {
t.Errorf("unexpected result from parsing (-expected +actual)\n%s", diff) t.Errorf("unexpected result from parsing (-expected +actual)\n%s", diff)
} }
} }

View File

@ -63,7 +63,7 @@ func parseRpmDB(resolver source.FilePathResolver, dbLocation source.Location, re
p := pkg.Package{ p := pkg.Package{
Name: entry.Name, Name: entry.Name,
Version: toELVersion(metadata), Version: toELVersion(metadata),
Locations: []source.Location{dbLocation}, Locations: source.NewLocationSet(dbLocation),
FoundBy: catalogerName, FoundBy: catalogerName,
Type: pkg.RpmPkg, Type: pkg.RpmPkg,
MetadataType: pkg.RpmdbMetadataType, MetadataType: pkg.RpmdbMetadataType,

View File

@ -71,7 +71,7 @@ func TestParseRpmDB(t *testing.T) {
"dive": { "dive": {
Name: "dive", Name: "dive",
Version: "0.9.2-1", Version: "0.9.2-1",
Locations: []source.Location{dbLocation}, Locations: source.NewLocationSet(dbLocation),
FoundBy: catalogerName, FoundBy: catalogerName,
Type: pkg.RpmPkg, Type: pkg.RpmPkg,
MetadataType: pkg.RpmdbMetadataType, MetadataType: pkg.RpmdbMetadataType,
@ -98,7 +98,7 @@ func TestParseRpmDB(t *testing.T) {
"dive": { "dive": {
Name: "dive", Name: "dive",
Version: "0.9.2-1", Version: "0.9.2-1",
Locations: []source.Location{dbLocation}, Locations: source.NewLocationSet(dbLocation),
FoundBy: catalogerName, FoundBy: catalogerName,
Type: pkg.RpmPkg, Type: pkg.RpmPkg,
MetadataType: pkg.RpmdbMetadataType, MetadataType: pkg.RpmdbMetadataType,

View File

@ -1,4 +1,4 @@
package cpe package pkg
import ( import (
"sort" "sort"
@ -6,15 +6,15 @@ import (
"github.com/facebookincubator/nvdtools/wfn" "github.com/facebookincubator/nvdtools/wfn"
) )
var _ sort.Interface = (*BySpecificity)(nil) var _ sort.Interface = (*CPEBySpecificity)(nil)
type BySpecificity []wfn.Attributes type CPEBySpecificity []wfn.Attributes
func (c BySpecificity) Len() int { return len(c) } func (c CPEBySpecificity) Len() int { return len(c) }
func (c BySpecificity) Swap(i, j int) { c[i], c[j] = c[j], c[i] } func (c CPEBySpecificity) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
func (c BySpecificity) Less(i, j int) bool { func (c CPEBySpecificity) Less(i, j int) bool {
iScore := weightedCountForSpecifiedFields(c[i]) iScore := weightedCountForSpecifiedFields(c[i])
jScore := weightedCountForSpecifiedFields(c[j]) jScore := weightedCountForSpecifiedFields(c[j])

View File

@ -1,40 +1,32 @@
package cpe package pkg
import ( import (
"sort" "sort"
"testing" "testing"
"github.com/anchore/syft/syft/pkg"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
func mustCPE(c string) pkg.CPE { func mustCPE(c string) CPE {
return must(pkg.NewCPE(c)) return must(NewCPE(c))
}
func must(c pkg.CPE, e error) pkg.CPE {
if e != nil {
panic(e)
}
return c
} }
func TestCPESpecificity(t *testing.T) { func TestCPESpecificity(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
input []pkg.CPE input []CPE
expected []pkg.CPE expected []CPE
}{ }{
{ {
name: "sort strictly by wfn *", name: "sort strictly by wfn *",
input: []pkg.CPE{ input: []CPE{
mustCPE("cpe:2.3:a:*:package:1:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:*:package:1:*:*:*:*:*:*:*"),
mustCPE("cpe:2.3:a:some:package:1:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:some:package:1:*:*:*:*:*:*:*"),
mustCPE("cpe:2.3:a:*:package:1:*:*:*:*:some:*:*"), mustCPE("cpe:2.3:a:*:package:1:*:*:*:*:some:*:*"),
mustCPE("cpe:2.3:a:some:package:1:*:*:*:*:some:*:*"), mustCPE("cpe:2.3:a:some:package:1:*:*:*:*:some:*:*"),
mustCPE("cpe:2.3:a:some:package:*:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:some:package:*:*:*:*:*:*:*:*"),
}, },
expected: []pkg.CPE{ expected: []CPE{
mustCPE("cpe:2.3:a:some:package:1:*:*:*:*:some:*:*"), mustCPE("cpe:2.3:a:some:package:1:*:*:*:*:some:*:*"),
mustCPE("cpe:2.3:a:some:package:1:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:some:package:1:*:*:*:*:*:*:*"),
mustCPE("cpe:2.3:a:some:package:*:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:some:package:*:*:*:*:*:*:*:*"),
@ -44,7 +36,7 @@ func TestCPESpecificity(t *testing.T) {
}, },
{ {
name: "sort strictly by field length", name: "sort strictly by field length",
input: []pkg.CPE{ input: []CPE{
mustCPE("cpe:2.3:a:1:22:1:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:1:22:1:*:*:*:*:1:*:*"),
mustCPE("cpe:2.3:a:55555:1:1:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:55555:1:1:*:*:*:*:1:*:*"),
mustCPE("cpe:2.3:a:1:1:333:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:1:1:333:*:*:*:*:1:*:*"),
@ -52,7 +44,7 @@ func TestCPESpecificity(t *testing.T) {
mustCPE("cpe:2.3:a:1:1:1:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:1:1:1:*:*:*:*:1:*:*"),
mustCPE("cpe:2.3:a:1:1:1:*:*:*:*:4444:*:*"), mustCPE("cpe:2.3:a:1:1:1:*:*:*:*:4444:*:*"),
}, },
expected: []pkg.CPE{ expected: []CPE{
mustCPE("cpe:2.3:a:1:666666:1:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:1:666666:1:*:*:*:*:1:*:*"),
mustCPE("cpe:2.3:a:55555:1:1:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:55555:1:1:*:*:*:*:1:*:*"),
mustCPE("cpe:2.3:a:1:1:1:*:*:*:*:4444:*:*"), mustCPE("cpe:2.3:a:1:1:1:*:*:*:*:4444:*:*"),
@ -63,7 +55,7 @@ func TestCPESpecificity(t *testing.T) {
}, },
{ {
name: "sort by mix of field length and specificity", name: "sort by mix of field length and specificity",
input: []pkg.CPE{ input: []CPE{
mustCPE("cpe:2.3:a:1:666666:*:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:1:666666:*:*:*:*:*:1:*:*"),
mustCPE("cpe:2.3:a:*:1:1:*:*:*:*:4444:*:*"), mustCPE("cpe:2.3:a:*:1:1:*:*:*:*:4444:*:*"),
mustCPE("cpe:2.3:a:1:*:333:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:1:*:333:*:*:*:*:*:*:*"),
@ -71,7 +63,7 @@ func TestCPESpecificity(t *testing.T) {
mustCPE("cpe:2.3:a:1:22:1:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:1:22:1:*:*:*:*:1:*:*"),
mustCPE("cpe:2.3:a:55555:1:1:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:55555:1:1:*:*:*:*:1:*:*"),
}, },
expected: []pkg.CPE{ expected: []CPE{
mustCPE("cpe:2.3:a:55555:1:1:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:55555:1:1:*:*:*:*:1:*:*"),
mustCPE("cpe:2.3:a:1:22:1:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:1:22:1:*:*:*:*:1:*:*"),
mustCPE("cpe:2.3:a:1:1:1:*:*:*:*:1:*:*"), mustCPE("cpe:2.3:a:1:1:1:*:*:*:*:1:*:*"),
@ -82,7 +74,7 @@ func TestCPESpecificity(t *testing.T) {
}, },
{ {
name: "sort by mix of field length, specificity, dash", name: "sort by mix of field length, specificity, dash",
input: []pkg.CPE{ input: []CPE{
mustCPE("cpe:2.3:a:alpine:alpine_keys:2.3-r1:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:alpine:alpine_keys:2.3-r1:*:*:*:*:*:*:*"),
mustCPE("cpe:2.3:a:alpine_keys:alpine_keys:2.3-r1:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:alpine_keys:alpine_keys:2.3-r1:*:*:*:*:*:*:*"),
mustCPE("cpe:2.3:a:alpine-keys:alpine_keys:2.3-r1:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:alpine-keys:alpine_keys:2.3-r1:*:*:*:*:*:*:*"),
@ -90,7 +82,7 @@ func TestCPESpecificity(t *testing.T) {
mustCPE("cpe:2.3:a:alpine-keys:alpine-keys:2.3-r1:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:alpine-keys:alpine-keys:2.3-r1:*:*:*:*:*:*:*"),
mustCPE("cpe:2.3:a:alpine_keys:alpine-keys:2.3-r1:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:alpine_keys:alpine-keys:2.3-r1:*:*:*:*:*:*:*"),
}, },
expected: []pkg.CPE{ expected: []CPE{
mustCPE("cpe:2.3:a:alpine-keys:alpine-keys:2.3-r1:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:alpine-keys:alpine-keys:2.3-r1:*:*:*:*:*:*:*"),
mustCPE("cpe:2.3:a:alpine-keys:alpine_keys:2.3-r1:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:alpine-keys:alpine_keys:2.3-r1:*:*:*:*:*:*:*"),
mustCPE("cpe:2.3:a:alpine_keys:alpine-keys:2.3-r1:*:*:*:*:*:*:*"), mustCPE("cpe:2.3:a:alpine_keys:alpine-keys:2.3-r1:*:*:*:*:*:*:*"),
@ -103,7 +95,7 @@ func TestCPESpecificity(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
sort.Sort(BySpecificity(test.input)) sort.Sort(CPEBySpecificity(test.input))
assert.Equal(t, test.expected, test.input) assert.Equal(t, test.expected, test.input)
}) })
} }

View File

@ -20,7 +20,7 @@ var jenkinsPluginPomPropertiesGroupIDs = []string{
// JavaMetadata encapsulates all Java ecosystem metadata for a package as well as an (optional) parent relationship. // JavaMetadata encapsulates all Java ecosystem metadata for a package as well as an (optional) parent relationship.
type JavaMetadata struct { type JavaMetadata struct {
VirtualPath string `json:"virtualPath"` VirtualPath string `json:"virtualPath" cyclonedx:"virtualPath"` // we need to include the virtual path in cyclonedx documents to prevent deduplication of jars within jars
Manifest *JavaManifest `mapstructure:"Manifest" json:"manifest,omitempty"` Manifest *JavaManifest `mapstructure:"Manifest" json:"manifest,omitempty"`
PomProperties *PomProperties `mapstructure:"PomProperties" json:"pomProperties,omitempty" cyclonedx:"-"` PomProperties *PomProperties `mapstructure:"PomProperties" json:"pomProperties,omitempty" cyclonedx:"-"`
PomProject *PomProject `mapstructure:"PomProject" json:"pomProject,omitempty"` PomProject *PomProject `mapstructure:"PomProject" json:"pomProject,omitempty"`

25
syft/pkg/merge_cpes.go Normal file
View File

@ -0,0 +1,25 @@
package pkg
import (
"sort"
)
func mergeCPEs(a, b []CPE) (result []CPE) {
aCPEs := make(map[string]CPE)
// keep all CPEs from a and create a quick string-based lookup
for _, aCPE := range a {
aCPEs[aCPE.BindToFmtString()] = aCPE
result = append(result, aCPE)
}
// keep all unique CPEs from b
for _, bCPE := range b {
if _, exists := aCPEs[bCPE.BindToFmtString()]; !exists {
result = append(result, bCPE)
}
}
sort.Sort(CPEBySpecificity(result))
return result
}

View File

@ -18,12 +18,12 @@ type Package struct {
Name string // the package name Name string // the package name
Version string // the version of the package Version string // the version of the package
FoundBy string `cyclonedx:"foundBy"` // the specific cataloger that discovered this package FoundBy string `cyclonedx:"foundBy"` // the specific cataloger that discovered this package
Locations []source.Location // the locations that lead to the discovery of this package (note: this is not necessarily the locations that make up this package) Locations source.LocationSet // the locations that lead to the discovery of this package (note: this is not necessarily the locations that make up this package)
Licenses []string // licenses discovered with the package metadata Licenses []string // licenses discovered with the package metadata
Language Language `cyclonedx:"language"` // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc) Language Language `cyclonedx:"language"` // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc)
Type Type `cyclonedx:"type"` // the package type (e.g. Npm, Yarn, Python, Rpm, Deb, etc) Type Type `cyclonedx:"type"` // the package type (e.g. Npm, Yarn, Python, Rpm, Deb, etc)
CPEs []CPE `hash:"ignore"` // all possible Common Platform Enumerators (note: this is NOT included in the definition of the ID since all fields on a CPE are derived from other fields) CPEs []CPE `hash:"ignore"` // all possible Common Platform Enumerators (note: this is NOT included in the definition of the ID since all fields on a CPE are derived from other fields)
PURL string `hash:"ignore"` // the Package URL (see https://github.com/package-url/purl-spec) (note: this is NOT included in the definition of the ID since all fields on a pURL are derived from other fields) PURL string `hash:"ignore"` // the Package URL (see https://github.com/package-url/purl-spec)
MetadataType MetadataType `cyclonedx:"metadataType"` // the shape of the additional data in the "metadata" field MetadataType MetadataType `cyclonedx:"metadataType"` // the shape of the additional data in the "metadata" field
Metadata interface{} // additional data found while parsing the package source Metadata interface{} // additional data found while parsing the package source
} }
@ -46,3 +46,21 @@ func (p Package) ID() artifact.ID {
func (p Package) String() string { func (p Package) String() string {
return fmt.Sprintf("Pkg(name=%q version=%q type=%q id=%q)", p.Name, p.Version, p.Type, p.id) return fmt.Sprintf("Pkg(name=%q version=%q type=%q id=%q)", p.Name, p.Version, p.Type, p.id)
} }
func (p *Package) merge(other Package) error {
if p.id != other.id {
return fmt.Errorf("cannot merge packages with different IDs: %q vs %q", p.id, other.id)
}
if p.PURL != other.PURL {
log.Warnf("merging packages have with different pURLs: %q=%q vs %q=%q", p.id, p.PURL, other.id, other.PURL)
}
p.Locations.Add(other.Locations.ToSlice()...)
p.CPEs = mergeCPEs(p.CPEs, other.CPEs)
if p.PURL == "" {
p.PURL = other.PURL
}
return nil
}

View File

@ -3,24 +3,28 @@ package pkg
import ( import (
"testing" "testing"
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
) )
func TestFingerprint(t *testing.T) { func TestIDUniqueness(t *testing.T) {
originalPkg := Package{ originalLocation := source.Location{
Name: "pi",
Version: "3.14",
FoundBy: "Archimedes",
Locations: []source.Location{
{
Coordinates: source.Coordinates{ Coordinates: source.Coordinates{
RealPath: "39.0742° N, 21.8243° E", RealPath: "39.0742° N, 21.8243° E",
FileSystemID: "Earth", FileSystemID: "Earth",
}, },
VirtualPath: "/Ancient-Greece", VirtualPath: "/Ancient-Greece",
}, }
}, originalPkg := Package{
Name: "pi",
Version: "3.14",
FoundBy: "Archimedes",
Locations: source.NewLocationSet(
originalLocation,
),
Licenses: []string{ Licenses: []string{
"cc0-1.0", "cc0-1.0",
"MIT", "MIT",
@ -47,7 +51,7 @@ func TestFingerprint(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
transform func(pkg Package) Package transform func(pkg Package) Package
expectIdentical bool expectedIDComparison assert.ComparisonAssertionFunc
}{ }{
{ {
name: "go case (no transform)", name: "go case (no transform)",
@ -55,7 +59,7 @@ func TestFingerprint(t *testing.T) {
// do nothing! // do nothing!
return pkg return pkg
}, },
expectIdentical: true, expectedIDComparison: assert.Equal,
}, },
{ {
name: "same metadata is ignored", name: "same metadata is ignored",
@ -72,7 +76,7 @@ func TestFingerprint(t *testing.T) {
} }
return pkg return pkg
}, },
expectIdentical: true, expectedIDComparison: assert.Equal,
}, },
{ {
name: "licenses order is ignored", name: "licenses order is ignored",
@ -84,7 +88,7 @@ func TestFingerprint(t *testing.T) {
} }
return pkg return pkg
}, },
expectIdentical: true, expectedIDComparison: assert.Equal,
}, },
{ {
name: "name is reflected", name: "name is reflected",
@ -92,7 +96,42 @@ func TestFingerprint(t *testing.T) {
pkg.Name = "new!" pkg.Name = "new!"
return pkg return pkg
}, },
expectIdentical: false, expectedIDComparison: assert.NotEqual,
},
{
name: "location is reflected",
transform: func(pkg Package) Package {
locations := source.NewLocationSet(pkg.Locations.ToSlice()...)
locations.Add(source.NewLocation("/somewhere/new"))
pkg.Locations = locations
return pkg
},
expectedIDComparison: assert.NotEqual,
},
{
name: "same path for different filesystem is NOT reflected",
transform: func(pkg Package) Package {
newLocation := originalLocation
newLocation.FileSystemID = "Mars"
pkg.Locations = source.NewLocationSet(newLocation)
return pkg
},
expectedIDComparison: assert.Equal,
},
{
name: "multiple equivalent paths for different filesystem is NOT reflected",
transform: func(pkg Package) Package {
newLocation := originalLocation
newLocation.FileSystemID = "Mars"
locations := source.NewLocationSet(pkg.Locations.ToSlice()...)
locations.Add(newLocation, originalLocation)
pkg.Locations = locations
return pkg
},
expectedIDComparison: assert.Equal,
}, },
{ {
name: "version is reflected", name: "version is reflected",
@ -100,7 +139,7 @@ func TestFingerprint(t *testing.T) {
pkg.Version = "new!" pkg.Version = "new!"
return pkg return pkg
}, },
expectIdentical: false, expectedIDComparison: assert.NotEqual,
}, },
{ {
name: "licenses is reflected", name: "licenses is reflected",
@ -108,7 +147,7 @@ func TestFingerprint(t *testing.T) {
pkg.Licenses = []string{"new!"} pkg.Licenses = []string{"new!"}
return pkg return pkg
}, },
expectIdentical: false, expectedIDComparison: assert.NotEqual,
}, },
{ {
name: "type is reflected", name: "type is reflected",
@ -116,7 +155,7 @@ func TestFingerprint(t *testing.T) {
pkg.Type = RustPkg pkg.Type = RustPkg
return pkg return pkg
}, },
expectIdentical: false, expectedIDComparison: assert.NotEqual,
}, },
{ {
name: "metadata type is reflected", name: "metadata type is reflected",
@ -124,7 +163,7 @@ func TestFingerprint(t *testing.T) {
pkg.MetadataType = RustCargoPackageMetadataType pkg.MetadataType = RustCargoPackageMetadataType
return pkg return pkg
}, },
expectIdentical: false, expectedIDComparison: assert.NotEqual,
}, },
{ {
name: "CPEs is ignored", name: "CPEs is ignored",
@ -132,7 +171,7 @@ func TestFingerprint(t *testing.T) {
pkg.CPEs = []CPE{} pkg.CPEs = []CPE{}
return pkg return pkg
}, },
expectIdentical: true, expectedIDComparison: assert.Equal,
}, },
{ {
name: "pURL is ignored", name: "pURL is ignored",
@ -140,7 +179,7 @@ func TestFingerprint(t *testing.T) {
pkg.PURL = "new!" pkg.PURL = "new!"
return pkg return pkg
}, },
expectIdentical: true, expectedIDComparison: assert.Equal,
}, },
{ {
name: "language is reflected", name: "language is reflected",
@ -148,7 +187,7 @@ func TestFingerprint(t *testing.T) {
pkg.Language = Rust pkg.Language = Rust
return pkg return pkg
}, },
expectIdentical: false, expectedIDComparison: assert.NotEqual,
}, },
{ {
name: "foundBy is reflected", name: "foundBy is reflected",
@ -156,7 +195,7 @@ func TestFingerprint(t *testing.T) {
pkg.FoundBy = "new!" pkg.FoundBy = "new!"
return pkg return pkg
}, },
expectIdentical: false, expectedIDComparison: assert.NotEqual,
}, },
{ {
name: "metadata mutation is reflected", name: "metadata mutation is reflected",
@ -166,7 +205,7 @@ func TestFingerprint(t *testing.T) {
pkg.Metadata = metadata pkg.Metadata = metadata
return pkg return pkg
}, },
expectIdentical: false, expectedIDComparison: assert.NotEqual,
}, },
{ {
name: "new metadata is reflected", name: "new metadata is reflected",
@ -176,7 +215,7 @@ func TestFingerprint(t *testing.T) {
} }
return pkg return pkg
}, },
expectIdentical: false, expectedIDComparison: assert.NotEqual,
}, },
{ {
name: "nil metadata is reflected", name: "nil metadata is reflected",
@ -184,7 +223,7 @@ func TestFingerprint(t *testing.T) {
pkg.Metadata = nil pkg.Metadata = nil
return pkg return pkg
}, },
expectIdentical: false, expectedIDComparison: assert.NotEqual,
}, },
} }
@ -199,12 +238,207 @@ func TestFingerprint(t *testing.T) {
transformedFingerprint := transformedPkg.ID() transformedFingerprint := transformedPkg.ID()
assert.NotEmpty(t, transformedFingerprint) assert.NotEmpty(t, transformedFingerprint)
if test.expectIdentical { test.expectedIDComparison(t, originalFingerprint, transformedFingerprint)
assert.Equal(t, originalFingerprint, transformedFingerprint) })
} else { }
assert.NotEqual(t, originalFingerprint, transformedFingerprint) }
}
func TestPackage_Merge(t *testing.T) {
originalLocation := source.Location{
Coordinates: source.Coordinates{
RealPath: "39.0742° N, 21.8243° E",
FileSystemID: "Earth",
},
VirtualPath: "/Ancient-Greece",
}
similarLocation := originalLocation
similarLocation.FileSystemID = "Mars"
tests := []struct {
name string
subject Package
other Package
expected *Package
}{
{
name: "merge two packages (different cpes + locations)",
subject: Package{
Name: "pi",
Version: "3.14",
FoundBy: "Archimedes",
Locations: source.NewLocationSet(
originalLocation,
),
Licenses: []string{
"cc0-1.0",
"MIT",
},
Language: "math",
Type: PythonPkg,
CPEs: []CPE{
must(NewCPE(`cpe:2.3:a:Archimedes:pi:3.14:*:*:*:*:math:*:*`)),
},
PURL: "pkg:pypi/pi@3.14",
MetadataType: PythonPackageMetadataType,
Metadata: PythonPackageMetadata{
Name: "pi",
Version: "3.14",
License: "cc0-1.0",
Author: "Archimedes",
AuthorEmail: "Archimedes@circles.io",
Platform: "universe",
SitePackagesRootPath: "Pi",
},
},
other: Package{
Name: "pi",
Version: "3.14",
FoundBy: "Archimedes",
Locations: source.NewLocationSet(
similarLocation, // NOTE: difference; we have a different layer but the same path
),
Licenses: []string{
"cc0-1.0",
"MIT",
},
Language: "math",
Type: PythonPkg,
CPEs: []CPE{
must(NewCPE(`cpe:2.3:a:DIFFERENT:pi:3.14:*:*:*:*:math:*:*`)), // NOTE: difference
},
PURL: "pkg:pypi/pi@3.14",
MetadataType: PythonPackageMetadataType,
Metadata: PythonPackageMetadata{
Name: "pi",
Version: "3.14",
License: "cc0-1.0",
Author: "Archimedes",
AuthorEmail: "Archimedes@circles.io",
Platform: "universe",
SitePackagesRootPath: "Pi",
},
},
expected: &Package{
Name: "pi",
Version: "3.14",
FoundBy: "Archimedes",
Locations: source.NewLocationSet(
originalLocation,
similarLocation, // NOTE: merge!
),
Licenses: []string{
"cc0-1.0",
"MIT",
},
Language: "math",
Type: PythonPkg,
CPEs: []CPE{
must(NewCPE(`cpe:2.3:a:Archimedes:pi:3.14:*:*:*:*:math:*:*`)),
must(NewCPE(`cpe:2.3:a:DIFFERENT:pi:3.14:*:*:*:*:math:*:*`)), // NOTE: merge!
},
PURL: "pkg:pypi/pi@3.14",
MetadataType: PythonPackageMetadataType,
Metadata: PythonPackageMetadata{
Name: "pi",
Version: "3.14",
License: "cc0-1.0",
Author: "Archimedes",
AuthorEmail: "Archimedes@circles.io",
Platform: "universe",
SitePackagesRootPath: "Pi",
},
},
},
{
name: "error when there are different IDs",
subject: Package{
Name: "pi",
Version: "3.14",
FoundBy: "Archimedes",
Locations: source.NewLocationSet(
originalLocation,
),
Licenses: []string{
"cc0-1.0",
"MIT",
},
Language: "math",
Type: PythonPkg,
CPEs: []CPE{
must(NewCPE(`cpe:2.3:a:Archimedes:pi:3.14:*:*:*:*:math:*:*`)),
},
PURL: "pkg:pypi/pi@3.14",
MetadataType: PythonPackageMetadataType,
Metadata: PythonPackageMetadata{
Name: "pi",
Version: "3.14",
License: "cc0-1.0",
Author: "Archimedes",
AuthorEmail: "Archimedes@circles.io",
Platform: "universe",
SitePackagesRootPath: "Pi",
},
},
other: Package{
Name: "pi-DIFFERENT", // difference
Version: "3.14",
FoundBy: "Archimedes",
Locations: source.NewLocationSet(
originalLocation,
),
Licenses: []string{
"cc0-1.0",
"MIT",
},
Language: "math",
Type: PythonPkg,
CPEs: []CPE{
must(NewCPE(`cpe:2.3:a:Archimedes:pi:3.14:*:*:*:*:math:*:*`)),
},
PURL: "pkg:pypi/pi@3.14",
MetadataType: PythonPackageMetadataType,
Metadata: PythonPackageMetadata{
Name: "pi",
Version: "3.14",
License: "cc0-1.0",
Author: "Archimedes",
AuthorEmail: "Archimedes@circles.io",
Platform: "universe",
SitePackagesRootPath: "Pi",
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tt.subject.SetID()
tt.other.SetID()
err := tt.subject.merge(tt.other)
if tt.expected == nil {
require.Error(t, err)
return
}
require.NoError(t, err)
tt.expected.SetID()
require.Equal(t, tt.expected.id, tt.subject.id)
if diff := cmp.Diff(*tt.expected, tt.subject,
cmp.AllowUnexported(Package{}),
cmp.Comparer(
func(x, y source.LocationSet) bool {
return cmp.Equal(
x.ToSlice(), y.ToSlice(),
cmp.AllowUnexported(source.Location{}),
cmp.AllowUnexported(file.Reference{}),
)
},
),
); diff != "" {
t.Errorf("unexpected result from parsing (-expected +actual)\n%s", diff)
}
}) })
} }
} }

View File

@ -18,10 +18,10 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
name: "owns-by-real-path", name: "owns-by-real-path",
setup: func(t testing.TB) ([]Package, []artifact.Relationship) { setup: func(t testing.TB) ([]Package, []artifact.Relationship) {
parent := Package{ parent := Package{
Locations: []source.Location{ Locations: source.NewLocationSet(
source.NewVirtualLocation("/a/path", "/another/path"), source.NewVirtualLocation("/a/path", "/another/path"),
source.NewVirtualLocation("/b/path", "/bee/path"), source.NewVirtualLocation("/b/path", "/bee/path"),
}, ),
Type: RpmPkg, Type: RpmPkg,
MetadataType: RpmdbMetadataType, MetadataType: RpmdbMetadataType,
Metadata: RpmdbMetadata{ Metadata: RpmdbMetadata{
@ -35,10 +35,10 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
parent.SetID() parent.SetID()
child := Package{ child := Package{
Locations: []source.Location{ Locations: source.NewLocationSet(
source.NewVirtualLocation("/c/path", "/another/path"), source.NewVirtualLocation("/c/path", "/another/path"),
source.NewVirtualLocation("/d/path", "/another/path"), source.NewVirtualLocation("/d/path", "/another/path"),
}, ),
Type: NpmPkg, Type: NpmPkg,
} }
child.SetID() child.SetID()
@ -61,10 +61,10 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
name: "owns-by-virtual-path", name: "owns-by-virtual-path",
setup: func(t testing.TB) ([]Package, []artifact.Relationship) { setup: func(t testing.TB) ([]Package, []artifact.Relationship) {
parent := Package{ parent := Package{
Locations: []source.Location{ Locations: source.NewLocationSet(
source.NewVirtualLocation("/a/path", "/some/other/path"), source.NewVirtualLocation("/a/path", "/some/other/path"),
source.NewVirtualLocation("/b/path", "/bee/path"), source.NewVirtualLocation("/b/path", "/bee/path"),
}, ),
Type: RpmPkg, Type: RpmPkg,
MetadataType: RpmdbMetadataType, MetadataType: RpmdbMetadataType,
Metadata: RpmdbMetadata{ Metadata: RpmdbMetadata{
@ -78,10 +78,10 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
parent.SetID() parent.SetID()
child := Package{ child := Package{
Locations: []source.Location{ Locations: source.NewLocationSet(
source.NewVirtualLocation("/c/path", "/another/path"), source.NewVirtualLocation("/c/path", "/another/path"),
source.NewLocation("/d/path"), source.NewLocation("/d/path"),
}, ),
Type: NpmPkg, Type: NpmPkg,
} }
child.SetID() child.SetID()
@ -103,10 +103,10 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
name: "ignore-empty-path", name: "ignore-empty-path",
setup: func(t testing.TB) ([]Package, []artifact.Relationship) { setup: func(t testing.TB) ([]Package, []artifact.Relationship) {
parent := Package{ parent := Package{
Locations: []source.Location{ Locations: source.NewLocationSet(
source.NewVirtualLocation("/a/path", "/some/other/path"), source.NewVirtualLocation("/a/path", "/some/other/path"),
source.NewVirtualLocation("/b/path", "/bee/path"), source.NewVirtualLocation("/b/path", "/bee/path"),
}, ),
Type: RpmPkg, Type: RpmPkg,
MetadataType: RpmdbMetadataType, MetadataType: RpmdbMetadataType,
Metadata: RpmdbMetadata{ Metadata: RpmdbMetadata{
@ -121,10 +121,10 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
parent.SetID() parent.SetID()
child := Package{ child := Package{
Locations: []source.Location{ Locations: source.NewLocationSet(
source.NewVirtualLocation("/c/path", "/another/path"), source.NewVirtualLocation("/c/path", "/another/path"),
source.NewLocation("/d/path"), source.NewLocation("/d/path"),
}, ),
Type: NpmPkg, Type: NpmPkg,
} }

View File

@ -0,0 +1,86 @@
package source
import (
"sort"
"github.com/mitchellh/hashstructure/v2"
"github.com/scylladb/go-set/strset"
)
type CoordinateSet struct {
set map[Coordinates]struct{}
}
func NewCoordinateSet(coordinates ...Coordinates) (s CoordinateSet) {
for _, l := range coordinates {
s.Add(l)
}
return s
}
func (s *CoordinateSet) Add(coordinates ...Coordinates) {
if s.set == nil {
s.set = make(map[Coordinates]struct{})
}
for _, l := range coordinates {
s.set[l] = struct{}{}
}
}
func (s CoordinateSet) Remove(coordinates ...Coordinates) {
if s.set == nil {
return
}
for _, l := range coordinates {
delete(s.set, l)
}
}
func (s CoordinateSet) Contains(l Coordinates) bool {
if s.set == nil {
return false
}
_, ok := s.set[l]
return ok
}
func (s CoordinateSet) Paths() []string {
if s.set == nil {
return nil
}
paths := strset.New()
for _, c := range s.ToSlice() {
paths.Add(c.RealPath)
}
pathSlice := paths.List()
sort.Strings(pathSlice)
return pathSlice
}
func (s CoordinateSet) ToSlice() []Coordinates {
if s.set == nil {
return nil
}
coordinates := make([]Coordinates, len(s.set))
idx := 0
for v := range s.set {
coordinates[idx] = v
idx++
}
sort.SliceStable(coordinates, func(i, j int) bool {
if coordinates[i].RealPath == coordinates[j].RealPath {
return coordinates[i].FileSystemID < coordinates[j].FileSystemID
}
return coordinates[i].RealPath < coordinates[j].RealPath
})
return coordinates
}
func (s CoordinateSet) Hash() (uint64, error) {
return hashstructure.Hash(s.ToSlice(), hashstructure.FormatV2, &hashstructure.HashOptions{
ZeroNil: true,
SlicesAsSets: true,
})
}

View File

@ -0,0 +1,117 @@
package source
import (
"github.com/anchore/syft/syft/artifact"
"github.com/stretchr/testify/require"
"testing"
"github.com/stretchr/testify/assert"
)
func TestCoordinatesSet(t *testing.T) {
binA := Coordinates{
RealPath: "/bin",
FileSystemID: "a",
}
binB := Coordinates{
RealPath: "/bin",
FileSystemID: "b",
}
tests := []struct {
name string
input []Coordinates
expected []Coordinates
}{
{
name: "de-dup same location",
input: []Coordinates{
binA, binA, binA,
},
expected: []Coordinates{
binA,
},
},
{
name: "dont de-dup different filesystem",
input: []Coordinates{
binB, binA,
},
expected: []Coordinates{
binA, binB,
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
set := NewCoordinateSet(test.input...)
assert.Equal(t, test.expected, set.ToSlice())
})
}
}
func TestCoordinateSet_Hash(t *testing.T) {
etcA := Coordinates{
RealPath: "/etc",
FileSystemID: "a",
}
etcB := Coordinates{
RealPath: "/etc",
FileSystemID: "b",
}
binA := Coordinates{
RealPath: "/bin",
FileSystemID: "a",
}
binB := Coordinates{
RealPath: "/bin",
FileSystemID: "b",
}
tests := []struct {
name string
setA CoordinateSet
setB CoordinateSet
want assert.ComparisonAssertionFunc
}{
{
name: "empty sets have the same hash",
setA: NewCoordinateSet(),
setB: NewCoordinateSet(),
want: assert.Equal,
},
{
name: "sets with same elements have the same hash",
setA: NewCoordinateSet(binA, etcA),
setB: NewCoordinateSet(etcA, binA),
want: assert.Equal,
},
{
name: "sets with different elements have different hashes",
setA: NewCoordinateSet(binA, etcA),
setB: NewCoordinateSet(binA),
want: assert.NotEqual,
},
{
name: "sets with same paths but different FS IDs have different hashes",
setA: NewCoordinateSet(etcA, binA),
setB: NewCoordinateSet(etcB, binB),
want: assert.NotEqual,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotA, err := artifact.IDByHash(tt.setA)
require.NoError(t, err)
gotB, err := artifact.IDByHash(tt.setB)
require.NoError(t, err)
tt.want(t, gotA, gotB)
})
}
}

View File

@ -2,7 +2,6 @@ package source
import ( import (
"fmt" "fmt"
"sort"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/artifact"
@ -14,18 +13,6 @@ type Coordinates struct {
FileSystemID string `json:"layerID,omitempty" cyclonedx:"layerID"` // An ID representing the filesystem. For container images, this is a layer digest. For directories or a root filesystem, this is blank. FileSystemID string `json:"layerID,omitempty" cyclonedx:"layerID"` // An ID representing the filesystem. For container images, this is a layer digest. For directories or a root filesystem, this is blank.
} }
// CoordinateSet represents a set of string types.
type CoordinateSet map[Coordinates]struct{}
// NewCoordinateSet creates a CoordinateSet populated with values from the given slice.
func NewCoordinateSet(start ...Coordinates) CoordinateSet {
ret := make(CoordinateSet)
for _, s := range start {
ret.Add(s)
}
return ret
}
func (c Coordinates) ID() artifact.ID { func (c Coordinates) ID() artifact.ID {
f, err := artifact.IDByHash(c) f, err := artifact.IDByHash(c)
if err != nil { if err != nil {
@ -45,37 +32,3 @@ func (c Coordinates) String() string {
} }
return fmt.Sprintf("Location<%s>", str) return fmt.Sprintf("Location<%s>", str)
} }
// Add a string to the set.
func (s CoordinateSet) Add(i Coordinates) {
s[i] = struct{}{}
}
// Remove a string from the set.
func (s CoordinateSet) Remove(i Coordinates) {
delete(s, i)
}
// Contains indicates if the given string is contained within the set.
func (s CoordinateSet) Contains(i Coordinates) bool {
_, ok := s[i]
return ok
}
// ToSlice returns a sorted slice of Locations that are contained within the set.
func (s CoordinateSet) ToSlice() []Coordinates {
ret := make([]Coordinates, len(s))
idx := 0
for v := range s {
ret[idx] = v
idx++
}
sort.SliceStable(ret, func(i, j int) bool {
if ret[i].RealPath == ret[j].RealPath {
return ret[i].FileSystemID < ret[j].FileSystemID
}
return ret[i].RealPath < ret[j].RealPath
})
return ret
}

View File

@ -1,51 +0,0 @@
package source
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestCoordinateSet(t *testing.T) {
binA := Coordinates{
RealPath: "/bin",
FileSystemID: "a",
}
binB := Coordinates{
RealPath: "/bin",
FileSystemID: "b",
}
tests := []struct {
name string
input []Coordinates
expected []Coordinates
}{
{
name: "de-dup same location",
input: []Coordinates{
binA, binA, binA,
},
expected: []Coordinates{
binA,
},
},
{
name: "dont de-dup different filesystem",
input: []Coordinates{
binB, binA,
},
expected: []Coordinates{
binA, binB,
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
assert.Equal(t, test.expected, NewCoordinateSet(test.input...).ToSlice())
})
}
}

View File

@ -109,3 +109,9 @@ func (l Location) String() string {
} }
return fmt.Sprintf("Location<%s>", str) return fmt.Sprintf("Location<%s>", str)
} }
func (l Location) Equals(other Location) bool {
return l.RealPath == other.RealPath &&
l.VirtualPath == other.VirtualPath &&
l.FileSystemID == other.FileSystemID
}

View File

@ -0,0 +1,78 @@
package source
import (
"sort"
"github.com/mitchellh/hashstructure/v2"
)
type LocationSet struct {
set map[Location]struct{}
}
func NewLocationSet(locations ...Location) (s LocationSet) {
for _, l := range locations {
s.Add(l)
}
return s
}
func (s *LocationSet) Add(locations ...Location) {
if s.set == nil {
s.set = make(map[Location]struct{})
}
for _, l := range locations {
s.set[l] = struct{}{}
}
}
func (s LocationSet) Remove(locations ...Location) {
if s.set == nil {
return
}
for _, l := range locations {
delete(s.set, l)
}
}
func (s LocationSet) Contains(l Location) bool {
if s.set == nil {
return false
}
_, ok := s.set[l]
return ok
}
func (s LocationSet) ToSlice() []Location {
if s.set == nil {
return nil
}
locations := make([]Location, len(s.set))
idx := 0
for v := range s.set {
locations[idx] = v
idx++
}
sort.Sort(Locations(locations))
return locations
}
func (s *LocationSet) CoordinateSet() CoordinateSet {
if s.set == nil {
return NewCoordinateSet()
}
set := NewCoordinateSet()
for l := range s.set {
set.Add(l.Coordinates)
}
return set
}
func (s LocationSet) Hash() (uint64, error) {
// access paths and filesystem IDs are not considered when hashing a location set, only the real paths
return hashstructure.Hash(s.CoordinateSet().Paths(), hashstructure.FormatV2, &hashstructure.HashOptions{
ZeroNil: true,
SlicesAsSets: true,
})
}

View File

@ -0,0 +1,178 @@
package source
import (
"github.com/anchore/syft/syft/artifact"
"github.com/stretchr/testify/require"
"testing"
"github.com/stretchr/testify/assert"
)
func TestLocationSet(t *testing.T) {
etcHostsLinkVar := Location{
Coordinates: Coordinates{
RealPath: "/etc/hosts",
FileSystemID: "a",
},
VirtualPath: "/var/etc/hosts",
}
etcHostsLinkHome := Location{
Coordinates: Coordinates{
RealPath: "/etc/hosts",
FileSystemID: "a",
},
VirtualPath: "/home/wagoodman/hosts",
}
binA := Location{
Coordinates: Coordinates{
RealPath: "/bin",
FileSystemID: "a",
},
VirtualPath: "/usr/bin",
}
binB := Location{
Coordinates: Coordinates{
RealPath: "/bin",
FileSystemID: "b",
},
VirtualPath: "/usr/bin",
}
tests := []struct {
name string
input []Location
expected []Location
}{
{
name: "de-dup same location",
input: []Location{
binA, binA, binA,
},
expected: []Location{
binA,
},
},
{
name: "dont de-dup different filesystem",
input: []Location{
binB, binA,
},
expected: []Location{
binA, binB,
},
},
{
name: "dont de-dup different virtual paths",
input: []Location{
etcHostsLinkVar, etcHostsLinkHome,
},
expected: []Location{
etcHostsLinkHome, etcHostsLinkVar,
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
set := NewLocationSet(test.input...)
assert.Equal(t, test.expected, set.ToSlice())
})
}
}
func TestLocationSet_Hash(t *testing.T) {
etcAlink := Location{
Coordinates: Coordinates{
RealPath: "/etc/hosts",
FileSystemID: "a",
},
VirtualPath: "/var/etc/hosts",
}
etcA := Location{
Coordinates: Coordinates{
RealPath: "/etc/hosts",
FileSystemID: "a",
},
}
etcB := Location{
Coordinates: Coordinates{
RealPath: "/etc/hosts",
FileSystemID: "b",
},
}
binA := Location{
Coordinates: Coordinates{
RealPath: "/bin",
FileSystemID: "a",
},
VirtualPath: "/usr/bin",
}
binB := Location{
Coordinates: Coordinates{
RealPath: "/bin",
FileSystemID: "b",
},
VirtualPath: "/usr/bin",
}
tests := []struct {
name string
setA LocationSet
setB LocationSet
want assert.ComparisonAssertionFunc
}{
{
name: "empty sets have the same hash",
setA: NewLocationSet(),
setB: NewLocationSet(),
want: assert.Equal,
},
{
name: "sets with same elements accessed through different paths have the same hash",
setA: NewLocationSet(binA, etcA),
setB: NewLocationSet(etcAlink, binA),
want: assert.Equal,
},
{
name: "sets with same elements have the same hash",
setA: NewLocationSet(binA, etcA),
setB: NewLocationSet(etcA, binA),
want: assert.Equal,
},
{
name: "sets with different element counts have different hashes",
setA: NewLocationSet(binA, etcA),
setB: NewLocationSet(binA),
want: assert.NotEqual,
},
{
name: "sets with same path but different FS IDs have the same hash",
setA: NewLocationSet(binA),
setB: NewLocationSet(binB),
want: assert.Equal,
},
{
name: "sets with same paths but different FS IDs have the same hash",
setA: NewLocationSet(etcA, binA),
setB: NewLocationSet(binB, etcB),
want: assert.Equal,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotA, err := artifact.IDByHash(tt.setA)
require.NoError(t, err)
gotB, err := artifact.IDByHash(tt.setB)
require.NoError(t, err)
tt.want(t, gotA, gotB)
})
}
}

21
syft/source/locations.go Normal file
View File

@ -0,0 +1,21 @@
package source
type Locations []Location
func (l Locations) Len() int {
return len(l)
}
func (l Locations) Less(i, j int) bool {
if l[i].RealPath == l[j].RealPath {
if l[i].VirtualPath == l[j].VirtualPath {
return l[i].FileSystemID < l[j].FileSystemID
}
return l[i].VirtualPath < l[j].VirtualPath
}
return l[i].RealPath < l[j].RealPath
}
func (l Locations) Swap(i, j int) {
l[i], l[j] = l[j], l[i]
}

View File

@ -8,6 +8,7 @@ import (
) )
func TestPackagesCmdFlags(t *testing.T) { func TestPackagesCmdFlags(t *testing.T) {
hiddenPackagesImage := "docker-archive:" + getFixtureImage(t, "image-hidden-packages")
coverageImage := "docker-archive:" + getFixtureImage(t, "image-pkg-coverage") coverageImage := "docker-archive:" + getFixtureImage(t, "image-pkg-coverage")
//badBinariesImage := "docker-archive:" + getFixtureImage(t, "image-bad-binaries") //badBinariesImage := "docker-archive:" + getFixtureImage(t, "image-bad-binaries")
tmp := t.TempDir() + "/" tmp := t.TempDir() + "/"
@ -100,21 +101,34 @@ func TestPackagesCmdFlags(t *testing.T) {
}, },
}, },
{ {
name: "all-layers-scope-flag", name: "squashed-scope-flag-hidden-packages",
args: []string{"packages", "-o", "json", "-s", "all-layers", coverageImage}, args: []string{"packages", "-o", "json", "-s", "squashed", hiddenPackagesImage},
assertions: []traitAssertion{ assertions: []traitAssertion{
assertPackageCount(22), assertPackageCount(162),
assertNotInOutput("vsftpd"), // hidden package
assertSuccessfulReturnCode,
},
},
{
name: "all-layers-scope-flag",
args: []string{"packages", "-o", "json", "-s", "all-layers", hiddenPackagesImage},
assertions: []traitAssertion{
assertPackageCount(163), // packages are now deduplicated for this case
assertInOutput("all-layers"),
assertInOutput("vsftpd"), // hidden package
assertSuccessfulReturnCode, assertSuccessfulReturnCode,
}, },
}, },
{ {
name: "all-layers-scope-flag-by-env", name: "all-layers-scope-flag-by-env",
args: []string{"packages", "-o", "json", coverageImage}, args: []string{"packages", "-o", "json", hiddenPackagesImage},
env: map[string]string{ env: map[string]string{
"SYFT_PACKAGE_CATALOGER_SCOPE": "all-layers", "SYFT_PACKAGE_CATALOGER_SCOPE": "all-layers",
}, },
assertions: []traitAssertion{ assertions: []traitAssertion{
assertPackageCount(22), assertPackageCount(163), // packages are now deduplicated for this case
assertInOutput("all-layers"),
assertInOutput("vsftpd"), // hidden package
assertSuccessfulReturnCode, assertSuccessfulReturnCode,
}, },
}, },

View File

@ -0,0 +1,4 @@
FROM centos:7.9.2009
# all-layers scope should pickup on vsftpd
RUN yum install -y vsftpd
RUN yum remove -y vsftpd

View File

@ -54,7 +54,7 @@ func BenchmarkImagePackageCatalogers(b *testing.B) {
} }
func TestPkgCoverageImage(t *testing.T) { func TestPkgCoverageImage(t *testing.T) {
sbom, _ := catalogFixtureImage(t, "image-pkg-coverage") sbom, _ := catalogFixtureImage(t, "image-pkg-coverage", source.SquashedScope)
observedLanguages := internal.NewStringSet() observedLanguages := internal.NewStringSet()
definedLanguages := internal.NewStringSet() definedLanguages := internal.NewStringSet()

View File

@ -1,6 +1,7 @@
package integration package integration
import ( import (
"github.com/anchore/syft/syft/source"
"testing" "testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
@ -9,7 +10,7 @@ import (
) )
func TestDistroImage(t *testing.T) { func TestDistroImage(t *testing.T) {
sbom, _ := catalogFixtureImage(t, "image-distro-id") sbom, _ := catalogFixtureImage(t, "image-distro-id", source.SquashedScope)
expected := &linux.Release{ expected := &linux.Release{
PrettyName: "BusyBox v1.31.1", PrettyName: "BusyBox v1.31.1",

View File

@ -2,12 +2,14 @@ package integration
import ( import (
"bytes" "bytes"
"regexp" "fmt"
"testing"
"github.com/anchore/syft/internal/formats/cyclonedxjson" "github.com/anchore/syft/internal/formats/cyclonedxjson"
"github.com/anchore/syft/internal/formats/cyclonedxxml" "github.com/anchore/syft/internal/formats/cyclonedxxml"
"github.com/anchore/syft/internal/formats/syftjson" "github.com/anchore/syft/internal/formats/syftjson"
"github.com/anchore/syft/syft/source"
"regexp"
"testing"
"github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/sbom"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
@ -51,12 +53,12 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) {
}, },
}, },
} }
for _, test := range tests {
t.Run(string(test.formatOption), func(t *testing.T) {
for _, test := range tests {
// use second image for relationships // use second image for relationships
for _, image := range []string{"image-pkg-coverage", "image-owning-package"} { for _, image := range []string{"image-pkg-coverage", "image-owning-package"} {
originalSBOM, _ := catalogFixtureImage(t, image) t.Run(fmt.Sprintf("%s/%s", test.formatOption, image), func(t *testing.T) {
originalSBOM, _ := catalogFixtureImage(t, image, source.SquashedScope)
format := syft.FormatByID(test.formatOption) format := syft.FormatByID(test.formatOption)
require.NotNil(t, format) require.NotNil(t, format)
@ -87,7 +89,7 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) {
t.Errorf("diff: %s", dmp.DiffPrettyText(diffs)) t.Errorf("diff: %s", dmp.DiffPrettyText(diffs))
} }
} }
}
}) })
} }
}
} }

View File

@ -14,7 +14,7 @@ func TestNpmPackageLockDirectory(t *testing.T) {
foundPackages := internal.NewStringSet() foundPackages := internal.NewStringSet()
for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.NpmPkg) { for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.NpmPkg) {
for _, actualLocation := range actualPkg.Locations { for _, actualLocation := range actualPkg.Locations.ToSlice() {
if strings.Contains(actualLocation.RealPath, "node_modules") { if strings.Contains(actualLocation.RealPath, "node_modules") {
t.Errorf("found packages from package-lock.json in node_modules: %s", actualLocation) t.Errorf("found packages from package-lock.json in node_modules: %s", actualLocation)
} }
@ -35,7 +35,7 @@ func TestYarnPackageLockDirectory(t *testing.T) {
foundPackages := internal.NewStringSet() foundPackages := internal.NewStringSet()
for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.NpmPkg) { for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.NpmPkg) {
for _, actualLocation := range actualPkg.Locations { for _, actualLocation := range actualPkg.Locations.ToSlice() {
if strings.Contains(actualLocation.RealPath, "node_modules") { if strings.Contains(actualLocation.RealPath, "node_modules") {
t.Errorf("found packages from yarn.lock in node_modules: %s", actualLocation) t.Errorf("found packages from yarn.lock in node_modules: %s", actualLocation)
} }

View File

@ -0,0 +1,84 @@
package integration
import (
"fmt"
"github.com/anchore/syft/syft/source"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"testing"
)
func TestPackageDeduplication(t *testing.T) {
tests := []struct {
scope source.Scope
packageCount int
instanceCount map[string]int
locationCount map[string]int
}{
{
scope: source.AllLayersScope,
packageCount: 172, // without deduplication this would be 618
instanceCount: map[string]int{
"basesystem": 1,
"wget": 1,
"curl": 2, // upgraded in the image
"vsftpd": 1,
"httpd": 1,
},
locationCount: map[string]int{
"basesystem-10.0-7.el7.centos": 4,
"curl-7.29.0-59.el7": 1, // from base image
"curl-7.29.0-59.el7_9.1": 3, // upgrade
"wget-1.14-18.el7_6.1": 3,
"vsftpd-3.0.2-29.el7_9": 2,
"httpd-2.4.6-97.el7.centos.5": 1,
},
},
{
scope: source.SquashedScope,
packageCount: 170,
instanceCount: map[string]int{
"basesystem": 1,
"wget": 1,
"curl": 1, // upgraded, but the most recent
"vsftpd": 1,
"httpd": 1,
},
locationCount: map[string]int{
"basesystem-10.0-7.el7.centos": 1,
"curl-7.29.0-59.el7_9.1": 1, // upgrade
"wget-1.14-18.el7_6.1": 1,
"vsftpd-3.0.2-29.el7_9": 1,
"httpd-2.4.6-97.el7.centos.5": 1,
},
},
}
for _, tt := range tests {
t.Run(string(tt.scope), func(t *testing.T) {
sbom, _ := catalogFixtureImage(t, "image-vertical-package-dups", tt.scope)
assert.Equal(t, tt.packageCount, sbom.Artifacts.PackageCatalog.PackageCount())
for name, expectedInstanceCount := range tt.instanceCount {
pkgs := sbom.Artifacts.PackageCatalog.PackagesByName(name)
// with multiple packages with the same name, something is wrong (or this is the wrong fixture)
require.Len(t, pkgs, expectedInstanceCount)
for _, p := range pkgs {
nameVersion := fmt.Sprintf("%s-%s", name, p.Version)
expectedLocationCount, ok := tt.locationCount[nameVersion]
if !ok {
t.Fatalf("missing name-version: %s", nameVersion)
}
// we should see merged locations (assumption, there was 1 location for each package)
assert.Len(t, p.Locations.ToSlice(), expectedLocationCount)
// all paths should match
assert.Len(t, p.Locations.CoordinateSet().Paths(), 1)
}
}
})
}
}

View File

@ -3,6 +3,7 @@ package integration
import ( import (
"bytes" "bytes"
"encoding/json" "encoding/json"
"github.com/anchore/syft/syft/source"
"testing" "testing"
"github.com/anchore/syft/internal/formats/syftjson" "github.com/anchore/syft/internal/formats/syftjson"
@ -22,7 +23,7 @@ func TestPackageOwnershipRelationships(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(test.fixture, func(t *testing.T) { t.Run(test.fixture, func(t *testing.T) {
sbom, _ := catalogFixtureImage(t, test.fixture) sbom, _ := catalogFixtureImage(t, test.fixture, source.SquashedScope)
output := bytes.NewBufferString("") output := bytes.NewBufferString("")
err := syftjson.Format().Encode(output, sbom) err := syftjson.Format().Encode(output, sbom)

View File

@ -1,6 +1,7 @@
package integration package integration
import ( import (
"github.com/anchore/syft/syft/source"
"testing" "testing"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
@ -9,7 +10,7 @@ import (
func TestRegression212ApkBufferSize(t *testing.T) { func TestRegression212ApkBufferSize(t *testing.T) {
// This is a regression test for issue #212 (https://github.com/anchore/syft/issues/212) in which the apk db could // This is a regression test for issue #212 (https://github.com/anchore/syft/issues/212) in which the apk db could
// not be processed due to a scanner buffer that was too small // not be processed due to a scanner buffer that was too small
sbom, _ := catalogFixtureImage(t, "image-large-apk-data") sbom, _ := catalogFixtureImage(t, "image-large-apk-data", source.SquashedScope)
expectedPkgs := 58 expectedPkgs := 58
actualPkgs := 0 actualPkgs := 0

View File

@ -1,6 +1,7 @@
package integration package integration
import ( import (
"github.com/anchore/syft/syft/source"
"strings" "strings"
"testing" "testing"
@ -15,12 +16,12 @@ func TestRegressionGoArchDiscovery(t *testing.T) {
) )
// This is a regression test to make sure the way we detect go binary packages // This is a regression test to make sure the way we detect go binary packages
// stays consistent and reproducible as the tool chain evolves // stays consistent and reproducible as the tool chain evolves
sbom, _ := catalogFixtureImage(t, "image-go-bin-arch-coverage") sbom, _ := catalogFixtureImage(t, "image-go-bin-arch-coverage", source.SquashedScope)
var actualELF, actualWIN, actualMACOS int var actualELF, actualWIN, actualMACOS int
for p := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.GoModulePkg) { for p := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.GoModulePkg) {
for _, l := range p.Locations { for _, l := range p.Locations.ToSlice() {
switch { switch {
case strings.Contains(l.RealPath, "elf"): case strings.Contains(l.RealPath, "elf"):
actualELF++ actualELF++

View File

@ -1,9 +1,10 @@
package integration package integration
import ( import (
"github.com/anchore/syft/syft/source"
"testing" "testing"
) )
func TestRegressionJavaNoMainPackage(t *testing.T) { // Regression: https://github.com/anchore/syft/issues/252 func TestRegressionJavaNoMainPackage(t *testing.T) { // Regression: https://github.com/anchore/syft/issues/252
catalogFixtureImage(t, "image-java-no-main-package") catalogFixtureImage(t, "image-java-no-main-package", source.SquashedScope)
} }

View File

@ -0,0 +1,6 @@
FROM centos:7.9.2009
# modifying the RPM DB multiple times will result in duplicate packages when using all-layers (if there was no de-dup logic)
# curl is tricky, it already exists in the image and is being upgraded
RUN yum install -y wget curl
RUN yum install -y vsftpd
RUN yum install -y httpd

View File

@ -13,7 +13,7 @@ import (
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
) )
func catalogFixtureImage(t *testing.T, fixtureImageName string) (sbom.SBOM, *source.Source) { func catalogFixtureImage(t *testing.T, fixtureImageName string, scope source.Scope) (sbom.SBOM, *source.Source) {
imagetest.GetFixtureImage(t, "docker-archive", fixtureImageName) imagetest.GetFixtureImage(t, "docker-archive", fixtureImageName)
tarPath := imagetest.GetFixtureImageTarPath(t, fixtureImageName) tarPath := imagetest.GetFixtureImageTarPath(t, fixtureImageName)
userInput := "docker-archive:" + tarPath userInput := "docker-archive:" + tarPath
@ -25,7 +25,7 @@ func catalogFixtureImage(t *testing.T, fixtureImageName string) (sbom.SBOM, *sou
// TODO: this would be better with functional options (after/during API refactor) // TODO: this would be better with functional options (after/during API refactor)
c := cataloger.DefaultConfig() c := cataloger.DefaultConfig()
c.Search.Scope = source.SquashedScope c.Search.Scope = scope
pkgCatalog, relationships, actualDistro, err := syft.CatalogPackages(theSource, c) pkgCatalog, relationships, actualDistro, err := syft.CatalogPackages(theSource, c)
if err != nil { if err != nil {
t.Fatalf("failed to catalog image: %+v", err) t.Fatalf("failed to catalog image: %+v", err)