remove catalog ID assignment

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2021-11-09 10:49:37 -05:00
parent 253faf5652
commit 8a0fa5d3ad
No known key found for this signature in database
GPG Key ID: 5CB45AE22BAB7EA7
11 changed files with 128 additions and 210 deletions

1
go.mod
View File

@ -24,6 +24,7 @@ require (
github.com/gookit/color v1.2.7
github.com/hashicorp/go-multierror v1.1.0
github.com/hashicorp/go-version v1.2.0
github.com/jinzhu/copier v0.3.2
github.com/mitchellh/go-homedir v1.1.0
github.com/mitchellh/hashstructure v1.1.0
github.com/mitchellh/mapstructure v1.3.1

2
go.sum
View File

@ -454,6 +454,8 @@ github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NH
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/jarcoal/httpmock v1.0.5/go.mod h1:ATjnClrvW/3tijVmpL/va5Z3aAyGvqU3gCT8nX0Txik=
github.com/jingyugao/rowserrcheck v0.0.0-20191204022205-72ab7603b68a/go.mod h1:xRskid8CManxVta/ALEhJha/pweKBaVG6fWgc0yH25s=
github.com/jinzhu/copier v0.3.2 h1:QdBOCbaouLDYaIPFfi1bKv5F5tPpeTwXe4sD0jqtz5w=
github.com/jinzhu/copier v0.3.2/go.mod h1:24xnZezI2Yqac9J61UC6/dG/k76ttpq0DdJI3QmUvro=
github.com/jirfag/go-printf-func-name v0.0.0-20191110105641-45db9963cdd3/go.mod h1:HEWGJkRDzjJY2sqdDwxccsGicWEf9BQOZsq2tV+xzM0=
github.com/jirfag/go-printf-func-name v0.0.0-20200119135958-7558a9eaa5af/go.mod h1:HEWGJkRDzjJY2sqdDwxccsGicWEf9BQOZsq2tV+xzM0=
github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=

View File

@ -133,7 +133,6 @@ func populateImageCatalog(catalog *pkg.Catalog, img *image.Image) {
// populate catalog with test data
catalog.Add(pkg.Package{
ID: "package-1-id",
Name: "package-1",
Version: "1.0.1",
Locations: []source.Location{
@ -154,7 +153,6 @@ func populateImageCatalog(catalog *pkg.Catalog, img *image.Image) {
},
})
catalog.Add(pkg.Package{
ID: "package-2-id",
Name: "package-2",
Version: "2.0.1",
Locations: []source.Location{
@ -197,7 +195,6 @@ func newDirectoryCatalog() *pkg.Catalog {
// populate catalog with test data
catalog.Add(pkg.Package{
ID: "package-1-id",
Name: "package-1",
Version: "1.0.1",
Type: pkg.PythonPkg,
@ -223,7 +220,6 @@ func newDirectoryCatalog() *pkg.Catalog {
},
})
catalog.Add(pkg.Package{
ID: "package-2-id",
Name: "package-2",
Version: "2.0.1",
Type: pkg.DebPkg,

View File

@ -71,7 +71,7 @@ func toPackageModel(p *pkg.Package) model.Package {
return model.Package{
PackageBasicData: model.PackageBasicData{
ID: string(p.ID),
ID: string(p.Identity()),
Name: p.Name,
Version: p.Version,
Type: p.Type,

View File

@ -3,7 +3,6 @@ package syftjson
import (
"github.com/anchore/syft/internal/formats/syftjson/model"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/distro"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
@ -62,7 +61,6 @@ func toSyftPackage(p model.Package) pkg.Package {
}
return pkg.Package{
ID: artifact.ID(p.ID),
Name: p.Name,
Version: p.Version,
FoundBy: p.FoundBy,

View File

@ -5,6 +5,7 @@ import (
"sync"
"github.com/anchore/syft/syft/artifact"
"github.com/jinzhu/copier"
"github.com/anchore/syft/internal"
@ -13,7 +14,7 @@ import (
// Catalog represents a collection of Packages.
type Catalog struct {
byID map[artifact.ID]*Package
byID map[artifact.ID]Package
idsByType map[Type][]artifact.ID
idsByPath map[string][]artifact.ID // note: this is real path or virtual path
lock sync.RWMutex
@ -22,7 +23,7 @@ type Catalog struct {
// NewCatalog returns a new empty Catalog
func NewCatalog(pkgs ...Package) *Catalog {
catalog := Catalog{
byID: make(map[artifact.ID]*Package),
byID: make(map[artifact.ID]Package),
idsByType: make(map[Type][]artifact.ID),
idsByPath: make(map[string][]artifact.ID),
}
@ -45,16 +46,21 @@ func (c *Catalog) Package(id artifact.ID) *Package {
if !exists {
return nil
}
return v
var p Package
if err := copier.Copy(&p, &v); err != nil {
log.Warnf("unable to copy package id=%q name=%q: %+v", id, v.Name, err)
return nil
}
return &p
}
// PackagesByPath returns all packages that were discovered from the given path.
func (c *Catalog) PackagesByPath(path string) []*Package {
func (c *Catalog) PackagesByPath(path string) []Package {
return c.Packages(c.idsByPath[path])
}
// Packages returns all packages for the given ID.
func (c *Catalog) Packages(ids []artifact.ID) (result []*Package) {
func (c *Catalog) Packages(ids []artifact.ID) (result []Package) {
for _, i := range ids {
p, exists := c.byID[i]
if exists {
@ -69,68 +75,32 @@ func (c *Catalog) Add(p Package) {
c.lock.Lock()
defer c.lock.Unlock()
if p.ID == "" {
fingerprint, err := p.Fingerprint()
if err != nil {
log.Warnf("failed to add package to catalog: %w", err)
return
}
p.ID = artifact.ID(fingerprint)
}
// note: since we are capturing the ID, we cannot modify the package being added from this point forward
id := p.Identity()
// store by package ID
c.byID[p.ID] = &p
c.byID[id] = p
// store by package type
c.idsByType[p.Type] = append(c.idsByType[p.Type], p.ID)
c.idsByType[p.Type] = append(c.idsByType[p.Type], id)
// store by file location paths
observedPaths := internal.NewStringSet()
for _, l := range p.Locations {
if l.RealPath != "" && !observedPaths.Contains(l.RealPath) {
c.idsByPath[l.RealPath] = append(c.idsByPath[l.RealPath], p.ID)
c.idsByPath[l.RealPath] = append(c.idsByPath[l.RealPath], id)
observedPaths.Add(l.RealPath)
}
if l.VirtualPath != "" && l.RealPath != l.VirtualPath && !observedPaths.Contains(l.VirtualPath) {
c.idsByPath[l.VirtualPath] = append(c.idsByPath[l.VirtualPath], p.ID)
c.idsByPath[l.VirtualPath] = append(c.idsByPath[l.VirtualPath], id)
observedPaths.Add(l.VirtualPath)
}
}
}
func (c *Catalog) Remove(id artifact.ID) {
c.lock.Lock()
defer c.lock.Unlock()
_, exists := c.byID[id]
if !exists {
log.Errorf("package ID does not exist in the catalog : id=%+v", id)
return
}
// Remove all index references to this package ID
for t, ids := range c.idsByType {
c.idsByType[t] = removeID(id, ids)
if len(c.idsByType[t]) == 0 {
delete(c.idsByType, t)
}
}
for p, ids := range c.idsByPath {
c.idsByPath[p] = removeID(id, ids)
if len(c.idsByPath[p]) == 0 {
delete(c.idsByPath, p)
}
}
// Remove package
delete(c.byID, id)
}
// Enumerate all packages for the given type(s), enumerating all packages if no type is specified.
func (c *Catalog) Enumerate(types ...Type) <-chan *Package {
channel := make(chan *Package)
func (c *Catalog) Enumerate(types ...Type) <-chan Package {
channel := make(chan Package)
go func() {
defer close(channel)
for ty, ids := range c.idsByType {
@ -148,7 +118,10 @@ func (c *Catalog) Enumerate(types ...Type) <-chan *Package {
}
}
for _, id := range ids {
channel <- c.Package(id)
p := c.Package(id)
if p != nil {
channel <- *p
}
}
}
}()
@ -157,8 +130,7 @@ func (c *Catalog) Enumerate(types ...Type) <-chan *Package {
// Sorted enumerates all packages for the given types sorted by package name. Enumerates all packages if no type
// is specified.
func (c *Catalog) Sorted(types ...Type) []*Package {
pkgs := make([]*Package, 0)
func (c *Catalog) Sorted(types ...Type) (pkgs []Package) {
for p := range c.Enumerate(types...) {
pkgs = append(pkgs, p)
}
@ -178,12 +150,3 @@ func (c *Catalog) Sorted(types ...Type) []*Package {
return pkgs
}
func removeID(id artifact.ID, target []artifact.ID) (result []artifact.ID) {
for _, value := range target {
if value != id {
result = append(result, value)
}
}
return result
}

View File

@ -3,8 +3,6 @@ package pkg
import (
"testing"
"github.com/anchore/syft/syft/artifact"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/syft/source"
@ -12,7 +10,6 @@ import (
var catalogAddAndRemoveTestPkgs = []Package{
{
ID: "my-id",
Locations: []source.Location{
{
RealPath: "/a/path",
@ -26,7 +23,6 @@ var catalogAddAndRemoveTestPkgs = []Package{
Type: RpmPkg,
},
{
ID: "my-other-id",
Locations: []source.Location{
{
RealPath: "/c/path",
@ -47,6 +43,11 @@ type expectedIndexes struct {
}
func TestCatalogAddPopulatesIndex(t *testing.T) {
fixtureID := func(i int) string {
return string(catalogAddAndRemoveTestPkgs[i].Identity())
}
tests := []struct {
name string
pkgs []Package
@ -57,16 +58,16 @@ func TestCatalogAddPopulatesIndex(t *testing.T) {
pkgs: catalogAddAndRemoveTestPkgs,
expectedIndexes: expectedIndexes{
byType: map[Type]*strset.Set{
RpmPkg: strset.New("my-id"),
NpmPkg: strset.New("my-other-id"),
RpmPkg: strset.New(fixtureID(0)),
NpmPkg: strset.New(fixtureID(1)),
},
byPath: map[string]*strset.Set{
"/another/path": strset.New("my-id", "my-other-id"),
"/a/path": strset.New("my-id"),
"/b/path": strset.New("my-id"),
"/bee/path": strset.New("my-id"),
"/c/path": strset.New("my-other-id"),
"/d/path": strset.New("my-other-id"),
"/another/path": strset.New(fixtureID(0), fixtureID(1)),
"/a/path": strset.New(fixtureID(0)),
"/b/path": strset.New(fixtureID(0)),
"/bee/path": strset.New(fixtureID(0)),
"/c/path": strset.New(fixtureID(1)),
"/d/path": strset.New(fixtureID(1)),
},
},
},
@ -82,50 +83,6 @@ func TestCatalogAddPopulatesIndex(t *testing.T) {
}
}
func TestCatalogRemove(t *testing.T) {
tests := []struct {
name string
pkgs []Package
removeId artifact.ID
expectedIndexes expectedIndexes
}{
{
name: "vanilla-add",
removeId: "my-other-id",
pkgs: catalogAddAndRemoveTestPkgs,
expectedIndexes: expectedIndexes{
byType: map[Type]*strset.Set{
RpmPkg: strset.New("my-id"),
},
byPath: map[string]*strset.Set{
"/another/path": strset.New("my-id"),
"/a/path": strset.New("my-id"),
"/b/path": strset.New("my-id"),
"/bee/path": strset.New("my-id"),
},
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
c := NewCatalog(test.pkgs...)
c.Remove(test.removeId)
assertIndexes(t, c, test.expectedIndexes)
if c.Package(test.removeId) != nil {
t.Errorf("expected package to be removed, but was found!")
}
if c.PackageCount() != len(test.pkgs)-1 {
t.Errorf("expected count to be affected but was not")
}
})
}
}
func assertIndexes(t *testing.T, c *Catalog, expectedIndexes expectedIndexes) {
// assert path index
if len(c.idsByPath) != len(expectedIndexes.byPath) {
@ -134,7 +91,7 @@ func assertIndexes(t *testing.T, c *Catalog, expectedIndexes expectedIndexes) {
for path, expectedIds := range expectedIndexes.byPath {
actualIds := strset.New()
for _, p := range c.PackagesByPath(path) {
actualIds.Add(string(p.ID))
actualIds.Add(string(p.Identity()))
}
if !expectedIds.IsEqual(actualIds) {
@ -149,7 +106,7 @@ func assertIndexes(t *testing.T, c *Catalog, expectedIndexes expectedIndexes) {
for ty, expectedIds := range expectedIndexes.byType {
actualIds := strset.New()
for p := range c.Enumerate(ty) {
actualIds.Add(string(p.ID))
actualIds.Add(string(p.Identity()))
}
if !expectedIds.IsEqual(actualIds) {
@ -159,14 +116,7 @@ func assertIndexes(t *testing.T, c *Catalog, expectedIndexes expectedIndexes) {
}
func TestCatalog_PathIndexDeduplicatesRealVsVirtualPaths(t *testing.T) {
tests := []struct {
name string
pkg Package
}{
{
name: "multiple locations with shared path",
pkg: Package{
ID: "my-id",
p1 := Package{
Locations: []source.Location{
{
RealPath: "/b/path",
@ -178,12 +128,10 @@ func TestCatalog_PathIndexDeduplicatesRealVsVirtualPaths(t *testing.T) {
},
},
Type: RpmPkg,
},
},
{
name: "one location with shared path",
pkg: Package{
ID: "my-id",
Name: "Package-1",
}
p2 := Package{
Locations: []source.Location{
{
RealPath: "/b/path",
@ -191,7 +139,19 @@ func TestCatalog_PathIndexDeduplicatesRealVsVirtualPaths(t *testing.T) {
},
},
Type: RpmPkg,
Name: "Package-2",
}
tests := []struct {
name string
pkg Package
}{
{
name: "multiple locations with shared path",
pkg: p1,
},
{
name: "one location with shared path",
pkg: p2,
},
}

View File

@ -51,6 +51,7 @@ func findOwnershipByFilesRelationships(catalog *Catalog) map[artifact.ID]map[art
}
for _, candidateOwnerPkg := range catalog.Sorted() {
id := candidateOwnerPkg.Identity()
if candidateOwnerPkg.Metadata == nil {
continue
}
@ -69,17 +70,18 @@ func findOwnershipByFilesRelationships(catalog *Catalog) map[artifact.ID]map[art
// look for package(s) in the catalog that may be owned by this package and mark the relationship
for _, subPackage := range catalog.PackagesByPath(ownedFilePath) {
if subPackage.ID == candidateOwnerPkg.ID {
subID := subPackage.Identity()
if subID == id {
continue
}
if _, exists := relationships[candidateOwnerPkg.ID]; !exists {
relationships[candidateOwnerPkg.ID] = make(map[artifact.ID]*strset.Set)
if _, exists := relationships[id]; !exists {
relationships[id] = make(map[artifact.ID]*strset.Set)
}
if _, exists := relationships[candidateOwnerPkg.ID][subPackage.ID]; !exists {
relationships[candidateOwnerPkg.ID][subPackage.ID] = strset.New()
if _, exists := relationships[id][subID]; !exists {
relationships[id][subID] = strset.New()
}
relationships[candidateOwnerPkg.ID][subPackage.ID].Add(ownedFilePath)
relationships[id][subID].Add(ownedFilePath)
}
}
}

View File

@ -3,33 +3,21 @@ package pkg
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/source"
"github.com/stretchr/testify/assert"
)
type node struct {
id string
}
func (n node) Identity() artifact.ID {
return artifact.ID(n.id)
}
func TestOwnershipByFilesRelationship(t *testing.T) {
tests := []struct {
name string
pkgs []Package
expectedRelations []artifact.Relationship
setup func(t testing.TB) ([]Package, []artifact.Relationship)
}{
{
name: "owns-by-real-path",
pkgs: []Package{
{
ID: "parent",
setup: func(t testing.TB) ([]Package, []artifact.Relationship) {
parent := Package{
Locations: []source.Location{
{
RealPath: "/a/path",
@ -49,9 +37,9 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
{Path: "/d/path"},
},
},
},
{
ID: "child",
}
child := Package{
Locations: []source.Location{
{
RealPath: "/c/path",
@ -63,26 +51,26 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
},
},
Type: NpmPkg,
},
},
expectedRelations: []artifact.Relationship{
{
From: node{"parent"},
To: node{"child"},
}
relationship := artifact.Relationship{
From: parent,
To: child,
Type: artifact.OwnershipByFileOverlapRelationship,
Data: ownershipByFilesMetadata{
Files: []string{
"/d/path",
},
},
},
}
return []Package{parent, child}, []artifact.Relationship{relationship}
},
},
{
name: "owns-by-virtual-path",
pkgs: []Package{
{
ID: "parent",
setup: func(t testing.TB) ([]Package, []artifact.Relationship) {
parent := Package{
Locations: []source.Location{
{
RealPath: "/a/path",
@ -102,9 +90,9 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
{Path: "/another/path"},
},
},
},
{
ID: "child",
}
child := Package{
Locations: []source.Location{
{
RealPath: "/c/path",
@ -116,26 +104,25 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
},
},
Type: NpmPkg,
},
},
expectedRelations: []artifact.Relationship{
{
From: node{"parent"},
To: node{"child"},
}
relationship := artifact.Relationship{
From: parent,
To: child,
Type: artifact.OwnershipByFileOverlapRelationship,
Data: ownershipByFilesMetadata{
Files: []string{
"/another/path",
},
},
},
}
return []Package{parent, child}, []artifact.Relationship{relationship}
},
},
{
name: "ignore-empty-path",
pkgs: []Package{
{
ID: "parent",
setup: func(t testing.TB) ([]Package, []artifact.Relationship) {
parent := Package{
Locations: []source.Location{
{
RealPath: "/a/path",
@ -155,9 +142,9 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
{Path: ""},
},
},
},
{
ID: "child",
}
child := Package{
Locations: []source.Location{
{
RealPath: "/c/path",
@ -169,18 +156,21 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
},
},
Type: NpmPkg,
},
}
return []Package{parent, child}, nil
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
c := NewCatalog(test.pkgs...)
pkgs, expectedRelations := test.setup(t)
c := NewCatalog(pkgs...)
relationships := ownershipByFilesRelationships(c)
assert.Len(t, relationships, len(test.expectedRelations))
for idx, expectedRelationship := range test.expectedRelations {
assert.Len(t, relationships, len(expectedRelations))
for idx, expectedRelationship := range expectedRelations {
actualRelationship := relationships[idx]
assert.Equal(t, expectedRelationship.From.Identity(), actualRelationship.From.Identity())
assert.Equal(t, expectedRelationship.To.Identity(), actualRelationship.To.Identity())

View File

@ -6,6 +6,8 @@ package pkg
import (
"fmt"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/source"
@ -15,7 +17,6 @@ import (
// Package represents an application or library that has been bundled into a distributable format.
// TODO: if we ignore FoundBy for ID generation should we merge the field to show it was found in two places?
type Package struct {
ID artifact.ID `hash:"ignore"` // uniquely identifies a package
Name string // the package name
Version string // the version of the package
FoundBy string // the specific cataloger that discovered this package
@ -31,8 +32,14 @@ type Package struct {
}
func (p Package) Identity() artifact.ID {
// TODO: tie this into the fingerprint system on rebase
return p.ID
f, err := p.Fingerprint()
if err != nil {
// TODO: what to do in this case?
log.Warnf("unable to get fingerprint of package=%s@%s: %+v", p.Name, p.Version, err)
return ""
}
return artifact.ID(f)
}
// Stringer to represent a package.

View File

@ -9,7 +9,6 @@ import (
func TestFingerprint(t *testing.T) {
originalPkg := Package{
ID: "π",
Name: "pi",
Version: "3.14",
FoundBy: "Archimedes",