From 8a0fa5d3ad3f0d6c06e8c9195437beb1122a085e Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 9 Nov 2021 10:49:37 -0500 Subject: [PATCH] remove catalog ID assignment Signed-off-by: Alex Goodman --- go.mod | 1 + go.sum | 2 + internal/formats/common/testutils/utils.go | 4 - internal/formats/syftjson/to_format_model.go | 2 +- internal/formats/syftjson/to_syft_model.go | 2 - syft/pkg/catalog.go | 85 ++++-------- syft/pkg/catalog_test.go | 124 ++++++------------ syft/pkg/ownership_by_files_relationship.go | 14 +- .../ownership_by_files_relationship_test.go | 90 ++++++------- syft/pkg/package.go | 13 +- syft/pkg/package_test.go | 1 - 11 files changed, 128 insertions(+), 210 deletions(-) diff --git a/go.mod b/go.mod index fba115bf1..747686789 100644 --- a/go.mod +++ b/go.mod @@ -24,6 +24,7 @@ require ( github.com/gookit/color v1.2.7 github.com/hashicorp/go-multierror v1.1.0 github.com/hashicorp/go-version v1.2.0 + github.com/jinzhu/copier v0.3.2 github.com/mitchellh/go-homedir v1.1.0 github.com/mitchellh/hashstructure v1.1.0 github.com/mitchellh/mapstructure v1.3.1 diff --git a/go.sum b/go.sum index d2a6395ed..9a21d79be 100644 --- a/go.sum +++ b/go.sum @@ -454,6 +454,8 @@ github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NH github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jarcoal/httpmock v1.0.5/go.mod h1:ATjnClrvW/3tijVmpL/va5Z3aAyGvqU3gCT8nX0Txik= github.com/jingyugao/rowserrcheck v0.0.0-20191204022205-72ab7603b68a/go.mod h1:xRskid8CManxVta/ALEhJha/pweKBaVG6fWgc0yH25s= +github.com/jinzhu/copier v0.3.2 h1:QdBOCbaouLDYaIPFfi1bKv5F5tPpeTwXe4sD0jqtz5w= +github.com/jinzhu/copier v0.3.2/go.mod h1:24xnZezI2Yqac9J61UC6/dG/k76ttpq0DdJI3QmUvro= github.com/jirfag/go-printf-func-name v0.0.0-20191110105641-45db9963cdd3/go.mod h1:HEWGJkRDzjJY2sqdDwxccsGicWEf9BQOZsq2tV+xzM0= github.com/jirfag/go-printf-func-name v0.0.0-20200119135958-7558a9eaa5af/go.mod h1:HEWGJkRDzjJY2sqdDwxccsGicWEf9BQOZsq2tV+xzM0= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= diff --git a/internal/formats/common/testutils/utils.go b/internal/formats/common/testutils/utils.go index 69b3243ac..d48f9f9ba 100644 --- a/internal/formats/common/testutils/utils.go +++ b/internal/formats/common/testutils/utils.go @@ -133,7 +133,6 @@ func populateImageCatalog(catalog *pkg.Catalog, img *image.Image) { // populate catalog with test data catalog.Add(pkg.Package{ - ID: "package-1-id", Name: "package-1", Version: "1.0.1", Locations: []source.Location{ @@ -154,7 +153,6 @@ func populateImageCatalog(catalog *pkg.Catalog, img *image.Image) { }, }) catalog.Add(pkg.Package{ - ID: "package-2-id", Name: "package-2", Version: "2.0.1", Locations: []source.Location{ @@ -197,7 +195,6 @@ func newDirectoryCatalog() *pkg.Catalog { // populate catalog with test data catalog.Add(pkg.Package{ - ID: "package-1-id", Name: "package-1", Version: "1.0.1", Type: pkg.PythonPkg, @@ -223,7 +220,6 @@ func newDirectoryCatalog() *pkg.Catalog { }, }) catalog.Add(pkg.Package{ - ID: "package-2-id", Name: "package-2", Version: "2.0.1", Type: pkg.DebPkg, diff --git a/internal/formats/syftjson/to_format_model.go b/internal/formats/syftjson/to_format_model.go index 7c148c8ee..5140891b6 100644 --- a/internal/formats/syftjson/to_format_model.go +++ b/internal/formats/syftjson/to_format_model.go @@ -71,7 +71,7 @@ func toPackageModel(p *pkg.Package) model.Package { return model.Package{ PackageBasicData: model.PackageBasicData{ - ID: string(p.ID), + ID: string(p.Identity()), Name: p.Name, Version: p.Version, Type: p.Type, diff --git a/internal/formats/syftjson/to_syft_model.go b/internal/formats/syftjson/to_syft_model.go index f9f75d10f..b49789cac 100644 --- a/internal/formats/syftjson/to_syft_model.go +++ b/internal/formats/syftjson/to_syft_model.go @@ -3,7 +3,6 @@ package syftjson import ( "github.com/anchore/syft/internal/formats/syftjson/model" "github.com/anchore/syft/internal/log" - "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/distro" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/sbom" @@ -62,7 +61,6 @@ func toSyftPackage(p model.Package) pkg.Package { } return pkg.Package{ - ID: artifact.ID(p.ID), Name: p.Name, Version: p.Version, FoundBy: p.FoundBy, diff --git a/syft/pkg/catalog.go b/syft/pkg/catalog.go index d3fbae63a..d59a0c4f8 100644 --- a/syft/pkg/catalog.go +++ b/syft/pkg/catalog.go @@ -5,6 +5,7 @@ import ( "sync" "github.com/anchore/syft/syft/artifact" + "github.com/jinzhu/copier" "github.com/anchore/syft/internal" @@ -13,7 +14,7 @@ import ( // Catalog represents a collection of Packages. type Catalog struct { - byID map[artifact.ID]*Package + byID map[artifact.ID]Package idsByType map[Type][]artifact.ID idsByPath map[string][]artifact.ID // note: this is real path or virtual path lock sync.RWMutex @@ -22,7 +23,7 @@ type Catalog struct { // NewCatalog returns a new empty Catalog func NewCatalog(pkgs ...Package) *Catalog { catalog := Catalog{ - byID: make(map[artifact.ID]*Package), + byID: make(map[artifact.ID]Package), idsByType: make(map[Type][]artifact.ID), idsByPath: make(map[string][]artifact.ID), } @@ -45,16 +46,21 @@ func (c *Catalog) Package(id artifact.ID) *Package { if !exists { return nil } - return v + var p Package + if err := copier.Copy(&p, &v); err != nil { + log.Warnf("unable to copy package id=%q name=%q: %+v", id, v.Name, err) + return nil + } + return &p } // PackagesByPath returns all packages that were discovered from the given path. -func (c *Catalog) PackagesByPath(path string) []*Package { +func (c *Catalog) PackagesByPath(path string) []Package { return c.Packages(c.idsByPath[path]) } // Packages returns all packages for the given ID. -func (c *Catalog) Packages(ids []artifact.ID) (result []*Package) { +func (c *Catalog) Packages(ids []artifact.ID) (result []Package) { for _, i := range ids { p, exists := c.byID[i] if exists { @@ -69,68 +75,32 @@ func (c *Catalog) Add(p Package) { c.lock.Lock() defer c.lock.Unlock() - if p.ID == "" { - fingerprint, err := p.Fingerprint() - if err != nil { - log.Warnf("failed to add package to catalog: %w", err) - return - } - - p.ID = artifact.ID(fingerprint) - } + // note: since we are capturing the ID, we cannot modify the package being added from this point forward + id := p.Identity() // store by package ID - c.byID[p.ID] = &p + c.byID[id] = p // store by package type - c.idsByType[p.Type] = append(c.idsByType[p.Type], p.ID) + c.idsByType[p.Type] = append(c.idsByType[p.Type], id) // store by file location paths observedPaths := internal.NewStringSet() for _, l := range p.Locations { if l.RealPath != "" && !observedPaths.Contains(l.RealPath) { - c.idsByPath[l.RealPath] = append(c.idsByPath[l.RealPath], p.ID) + c.idsByPath[l.RealPath] = append(c.idsByPath[l.RealPath], id) observedPaths.Add(l.RealPath) } if l.VirtualPath != "" && l.RealPath != l.VirtualPath && !observedPaths.Contains(l.VirtualPath) { - c.idsByPath[l.VirtualPath] = append(c.idsByPath[l.VirtualPath], p.ID) + c.idsByPath[l.VirtualPath] = append(c.idsByPath[l.VirtualPath], id) observedPaths.Add(l.VirtualPath) } } } -func (c *Catalog) Remove(id artifact.ID) { - c.lock.Lock() - defer c.lock.Unlock() - - _, exists := c.byID[id] - if !exists { - log.Errorf("package ID does not exist in the catalog : id=%+v", id) - return - } - - // Remove all index references to this package ID - for t, ids := range c.idsByType { - c.idsByType[t] = removeID(id, ids) - if len(c.idsByType[t]) == 0 { - delete(c.idsByType, t) - } - } - - for p, ids := range c.idsByPath { - c.idsByPath[p] = removeID(id, ids) - if len(c.idsByPath[p]) == 0 { - delete(c.idsByPath, p) - } - } - - // Remove package - delete(c.byID, id) -} - // Enumerate all packages for the given type(s), enumerating all packages if no type is specified. -func (c *Catalog) Enumerate(types ...Type) <-chan *Package { - channel := make(chan *Package) +func (c *Catalog) Enumerate(types ...Type) <-chan Package { + channel := make(chan Package) go func() { defer close(channel) for ty, ids := range c.idsByType { @@ -148,7 +118,10 @@ func (c *Catalog) Enumerate(types ...Type) <-chan *Package { } } for _, id := range ids { - channel <- c.Package(id) + p := c.Package(id) + if p != nil { + channel <- *p + } } } }() @@ -157,8 +130,7 @@ func (c *Catalog) Enumerate(types ...Type) <-chan *Package { // Sorted enumerates all packages for the given types sorted by package name. Enumerates all packages if no type // is specified. -func (c *Catalog) Sorted(types ...Type) []*Package { - pkgs := make([]*Package, 0) +func (c *Catalog) Sorted(types ...Type) (pkgs []Package) { for p := range c.Enumerate(types...) { pkgs = append(pkgs, p) } @@ -178,12 +150,3 @@ func (c *Catalog) Sorted(types ...Type) []*Package { return pkgs } - -func removeID(id artifact.ID, target []artifact.ID) (result []artifact.ID) { - for _, value := range target { - if value != id { - result = append(result, value) - } - } - return result -} diff --git a/syft/pkg/catalog_test.go b/syft/pkg/catalog_test.go index 52c671bbf..cd88b4a49 100644 --- a/syft/pkg/catalog_test.go +++ b/syft/pkg/catalog_test.go @@ -3,8 +3,6 @@ package pkg import ( "testing" - "github.com/anchore/syft/syft/artifact" - "github.com/scylladb/go-set/strset" "github.com/anchore/syft/syft/source" @@ -12,7 +10,6 @@ import ( var catalogAddAndRemoveTestPkgs = []Package{ { - ID: "my-id", Locations: []source.Location{ { RealPath: "/a/path", @@ -26,7 +23,6 @@ var catalogAddAndRemoveTestPkgs = []Package{ Type: RpmPkg, }, { - ID: "my-other-id", Locations: []source.Location{ { RealPath: "/c/path", @@ -47,6 +43,11 @@ type expectedIndexes struct { } func TestCatalogAddPopulatesIndex(t *testing.T) { + + fixtureID := func(i int) string { + return string(catalogAddAndRemoveTestPkgs[i].Identity()) + } + tests := []struct { name string pkgs []Package @@ -57,16 +58,16 @@ func TestCatalogAddPopulatesIndex(t *testing.T) { pkgs: catalogAddAndRemoveTestPkgs, expectedIndexes: expectedIndexes{ byType: map[Type]*strset.Set{ - RpmPkg: strset.New("my-id"), - NpmPkg: strset.New("my-other-id"), + RpmPkg: strset.New(fixtureID(0)), + NpmPkg: strset.New(fixtureID(1)), }, byPath: map[string]*strset.Set{ - "/another/path": strset.New("my-id", "my-other-id"), - "/a/path": strset.New("my-id"), - "/b/path": strset.New("my-id"), - "/bee/path": strset.New("my-id"), - "/c/path": strset.New("my-other-id"), - "/d/path": strset.New("my-other-id"), + "/another/path": strset.New(fixtureID(0), fixtureID(1)), + "/a/path": strset.New(fixtureID(0)), + "/b/path": strset.New(fixtureID(0)), + "/bee/path": strset.New(fixtureID(0)), + "/c/path": strset.New(fixtureID(1)), + "/d/path": strset.New(fixtureID(1)), }, }, }, @@ -82,50 +83,6 @@ func TestCatalogAddPopulatesIndex(t *testing.T) { } } -func TestCatalogRemove(t *testing.T) { - tests := []struct { - name string - pkgs []Package - removeId artifact.ID - expectedIndexes expectedIndexes - }{ - { - name: "vanilla-add", - removeId: "my-other-id", - pkgs: catalogAddAndRemoveTestPkgs, - expectedIndexes: expectedIndexes{ - byType: map[Type]*strset.Set{ - RpmPkg: strset.New("my-id"), - }, - byPath: map[string]*strset.Set{ - "/another/path": strset.New("my-id"), - "/a/path": strset.New("my-id"), - "/b/path": strset.New("my-id"), - "/bee/path": strset.New("my-id"), - }, - }, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - c := NewCatalog(test.pkgs...) - c.Remove(test.removeId) - - assertIndexes(t, c, test.expectedIndexes) - - if c.Package(test.removeId) != nil { - t.Errorf("expected package to be removed, but was found!") - } - - if c.PackageCount() != len(test.pkgs)-1 { - t.Errorf("expected count to be affected but was not") - } - - }) - } -} - func assertIndexes(t *testing.T, c *Catalog, expectedIndexes expectedIndexes) { // assert path index if len(c.idsByPath) != len(expectedIndexes.byPath) { @@ -134,7 +91,7 @@ func assertIndexes(t *testing.T, c *Catalog, expectedIndexes expectedIndexes) { for path, expectedIds := range expectedIndexes.byPath { actualIds := strset.New() for _, p := range c.PackagesByPath(path) { - actualIds.Add(string(p.ID)) + actualIds.Add(string(p.Identity())) } if !expectedIds.IsEqual(actualIds) { @@ -149,7 +106,7 @@ func assertIndexes(t *testing.T, c *Catalog, expectedIndexes expectedIndexes) { for ty, expectedIds := range expectedIndexes.byType { actualIds := strset.New() for p := range c.Enumerate(ty) { - actualIds.Add(string(p.ID)) + actualIds.Add(string(p.Identity())) } if !expectedIds.IsEqual(actualIds) { @@ -159,39 +116,42 @@ func assertIndexes(t *testing.T, c *Catalog, expectedIndexes expectedIndexes) { } func TestCatalog_PathIndexDeduplicatesRealVsVirtualPaths(t *testing.T) { + p1 := Package{ + Locations: []source.Location{ + { + RealPath: "/b/path", + VirtualPath: "/another/path", + }, + { + RealPath: "/b/path", + VirtualPath: "/b/path", + }, + }, + Type: RpmPkg, + Name: "Package-1", + } + + p2 := Package{ + Locations: []source.Location{ + { + RealPath: "/b/path", + VirtualPath: "/b/path", + }, + }, + Type: RpmPkg, + Name: "Package-2", + } tests := []struct { name string pkg Package }{ { name: "multiple locations with shared path", - pkg: Package{ - ID: "my-id", - Locations: []source.Location{ - { - RealPath: "/b/path", - VirtualPath: "/another/path", - }, - { - RealPath: "/b/path", - VirtualPath: "/b/path", - }, - }, - Type: RpmPkg, - }, + pkg: p1, }, { name: "one location with shared path", - pkg: Package{ - ID: "my-id", - Locations: []source.Location{ - { - RealPath: "/b/path", - VirtualPath: "/b/path", - }, - }, - Type: RpmPkg, - }, + pkg: p2, }, } diff --git a/syft/pkg/ownership_by_files_relationship.go b/syft/pkg/ownership_by_files_relationship.go index b33c79431..519ae2e27 100644 --- a/syft/pkg/ownership_by_files_relationship.go +++ b/syft/pkg/ownership_by_files_relationship.go @@ -51,6 +51,7 @@ func findOwnershipByFilesRelationships(catalog *Catalog) map[artifact.ID]map[art } for _, candidateOwnerPkg := range catalog.Sorted() { + id := candidateOwnerPkg.Identity() if candidateOwnerPkg.Metadata == nil { continue } @@ -69,17 +70,18 @@ func findOwnershipByFilesRelationships(catalog *Catalog) map[artifact.ID]map[art // look for package(s) in the catalog that may be owned by this package and mark the relationship for _, subPackage := range catalog.PackagesByPath(ownedFilePath) { - if subPackage.ID == candidateOwnerPkg.ID { + subID := subPackage.Identity() + if subID == id { continue } - if _, exists := relationships[candidateOwnerPkg.ID]; !exists { - relationships[candidateOwnerPkg.ID] = make(map[artifact.ID]*strset.Set) + if _, exists := relationships[id]; !exists { + relationships[id] = make(map[artifact.ID]*strset.Set) } - if _, exists := relationships[candidateOwnerPkg.ID][subPackage.ID]; !exists { - relationships[candidateOwnerPkg.ID][subPackage.ID] = strset.New() + if _, exists := relationships[id][subID]; !exists { + relationships[id][subID] = strset.New() } - relationships[candidateOwnerPkg.ID][subPackage.ID].Add(ownedFilePath) + relationships[id][subID].Add(ownedFilePath) } } } diff --git a/syft/pkg/ownership_by_files_relationship_test.go b/syft/pkg/ownership_by_files_relationship_test.go index d8245108b..20b94d551 100644 --- a/syft/pkg/ownership_by_files_relationship_test.go +++ b/syft/pkg/ownership_by_files_relationship_test.go @@ -3,33 +3,21 @@ package pkg import ( "testing" - "github.com/stretchr/testify/assert" - "github.com/anchore/syft/syft/artifact" - "github.com/anchore/syft/syft/source" + "github.com/stretchr/testify/assert" ) -type node struct { - id string -} - -func (n node) Identity() artifact.ID { - return artifact.ID(n.id) -} - func TestOwnershipByFilesRelationship(t *testing.T) { tests := []struct { - name string - pkgs []Package - expectedRelations []artifact.Relationship + name string + setup func(t testing.TB) ([]Package, []artifact.Relationship) }{ { name: "owns-by-real-path", - pkgs: []Package{ - { - ID: "parent", + setup: func(t testing.TB) ([]Package, []artifact.Relationship) { + parent := Package{ Locations: []source.Location{ { RealPath: "/a/path", @@ -49,9 +37,9 @@ func TestOwnershipByFilesRelationship(t *testing.T) { {Path: "/d/path"}, }, }, - }, - { - ID: "child", + } + + child := Package{ Locations: []source.Location{ { RealPath: "/c/path", @@ -63,26 +51,26 @@ func TestOwnershipByFilesRelationship(t *testing.T) { }, }, Type: NpmPkg, - }, - }, - expectedRelations: []artifact.Relationship{ - { - From: node{"parent"}, - To: node{"child"}, + } + + relationship := artifact.Relationship{ + From: parent, + To: child, Type: artifact.OwnershipByFileOverlapRelationship, Data: ownershipByFilesMetadata{ Files: []string{ "/d/path", }, }, - }, + } + + return []Package{parent, child}, []artifact.Relationship{relationship} }, }, { name: "owns-by-virtual-path", - pkgs: []Package{ - { - ID: "parent", + setup: func(t testing.TB) ([]Package, []artifact.Relationship) { + parent := Package{ Locations: []source.Location{ { RealPath: "/a/path", @@ -102,9 +90,9 @@ func TestOwnershipByFilesRelationship(t *testing.T) { {Path: "/another/path"}, }, }, - }, - { - ID: "child", + } + + child := Package{ Locations: []source.Location{ { RealPath: "/c/path", @@ -116,26 +104,25 @@ func TestOwnershipByFilesRelationship(t *testing.T) { }, }, Type: NpmPkg, - }, - }, - expectedRelations: []artifact.Relationship{ - { - From: node{"parent"}, - To: node{"child"}, + } + + relationship := artifact.Relationship{ + From: parent, + To: child, Type: artifact.OwnershipByFileOverlapRelationship, Data: ownershipByFilesMetadata{ Files: []string{ "/another/path", }, }, - }, + } + return []Package{parent, child}, []artifact.Relationship{relationship} }, }, { name: "ignore-empty-path", - pkgs: []Package{ - { - ID: "parent", + setup: func(t testing.TB) ([]Package, []artifact.Relationship) { + parent := Package{ Locations: []source.Location{ { RealPath: "/a/path", @@ -155,9 +142,9 @@ func TestOwnershipByFilesRelationship(t *testing.T) { {Path: ""}, }, }, - }, - { - ID: "child", + } + + child := Package{ Locations: []source.Location{ { RealPath: "/c/path", @@ -169,18 +156,21 @@ func TestOwnershipByFilesRelationship(t *testing.T) { }, }, Type: NpmPkg, - }, + } + + return []Package{parent, child}, nil }, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - c := NewCatalog(test.pkgs...) + pkgs, expectedRelations := test.setup(t) + c := NewCatalog(pkgs...) relationships := ownershipByFilesRelationships(c) - assert.Len(t, relationships, len(test.expectedRelations)) - for idx, expectedRelationship := range test.expectedRelations { + assert.Len(t, relationships, len(expectedRelations)) + for idx, expectedRelationship := range expectedRelations { actualRelationship := relationships[idx] assert.Equal(t, expectedRelationship.From.Identity(), actualRelationship.From.Identity()) assert.Equal(t, expectedRelationship.To.Identity(), actualRelationship.To.Identity()) diff --git a/syft/pkg/package.go b/syft/pkg/package.go index 062780b46..d888c5173 100644 --- a/syft/pkg/package.go +++ b/syft/pkg/package.go @@ -6,6 +6,8 @@ package pkg import ( "fmt" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/source" @@ -15,7 +17,6 @@ import ( // Package represents an application or library that has been bundled into a distributable format. // TODO: if we ignore FoundBy for ID generation should we merge the field to show it was found in two places? type Package struct { - ID artifact.ID `hash:"ignore"` // uniquely identifies a package Name string // the package name Version string // the version of the package FoundBy string // the specific cataloger that discovered this package @@ -31,8 +32,14 @@ type Package struct { } func (p Package) Identity() artifact.ID { - // TODO: tie this into the fingerprint system on rebase - return p.ID + f, err := p.Fingerprint() + if err != nil { + // TODO: what to do in this case? + log.Warnf("unable to get fingerprint of package=%s@%s: %+v", p.Name, p.Version, err) + return "" + } + + return artifact.ID(f) } // Stringer to represent a package. diff --git a/syft/pkg/package_test.go b/syft/pkg/package_test.go index 4edf88dac..6cab4fdbe 100644 --- a/syft/pkg/package_test.go +++ b/syft/pkg/package_test.go @@ -9,7 +9,6 @@ import ( func TestFingerprint(t *testing.T) { originalPkg := Package{ - ID: "π", Name: "pi", Version: "3.14", FoundBy: "Archimedes",