From a2882ee810edcbf4db3230e982c316b261eb17cd Mon Sep 17 00:00:00 2001 From: Christopher Angelo Phillips <32073428+spiffcs@users.noreply.github.com> Date: Fri, 29 Oct 2021 12:00:36 -0400 Subject: [PATCH] Update package identifier to be constant across multiple syft run (#595) Signed-off-by: Christopher Angelo Phillips --- go.mod | 1 + go.sum | 2 + syft/pkg/catalog.go | 14 +-- syft/pkg/id.go | 8 -- syft/pkg/package.go | 16 ++- syft/pkg/package_test.go | 206 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 231 insertions(+), 16 deletions(-) create mode 100644 syft/pkg/package_test.go diff --git a/go.mod b/go.mod index 1de5b447e..6ced4fdbd 100644 --- a/go.mod +++ b/go.mod @@ -24,6 +24,7 @@ require ( github.com/hashicorp/go-multierror v1.1.0 github.com/hashicorp/go-version v1.2.0 github.com/mitchellh/go-homedir v1.1.0 + github.com/mitchellh/hashstructure v1.1.0 github.com/mitchellh/mapstructure v1.3.1 github.com/olekukonko/tablewriter v0.0.4 github.com/pelletier/go-toml v1.8.1 diff --git a/go.sum b/go.sum index 54b564073..0a24fea7f 100644 --- a/go.sum +++ b/go.sum @@ -542,6 +542,8 @@ github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrk github.com/mitchellh/go-ps v0.0.0-20190716172923-621e5597135b/go.mod h1:r1VsdOzOPt1ZSrGZWFoNhsAedKnEd6r9Np1+5blZCWk= github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg= +github.com/mitchellh/hashstructure v1.1.0 h1:P6P1hdjqAAknpY/M1CGipelZgp+4y9ja9kmUZPXP+H0= +github.com/mitchellh/hashstructure v1.1.0/go.mod h1:xUDAozZz0Wmdiufv0uyhnHkUTN6/6d8ulp4AwfLKrmA= github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY= github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= diff --git a/syft/pkg/catalog.go b/syft/pkg/catalog.go index 7f1802e24..66c37c9c3 100644 --- a/syft/pkg/catalog.go +++ b/syft/pkg/catalog.go @@ -67,14 +67,14 @@ func (c *Catalog) Add(p Package) { c.lock.Lock() defer c.lock.Unlock() - _, exists := c.byID[p.ID] - if exists { - log.Errorf("package ID already exists in the catalog : id=%+v %+v", p.ID, p) - return - } - if p.ID == "" { - p.ID = newID() + fingerprint, err := p.Fingerprint() + if err != nil { + log.Warnf("failed to add package to catalog: %w", err) + return + } + + p.ID = ID(fingerprint) } // store by package ID diff --git a/syft/pkg/id.go b/syft/pkg/id.go index d152209d0..7b3e6b2d7 100644 --- a/syft/pkg/id.go +++ b/syft/pkg/id.go @@ -1,12 +1,4 @@ package pkg -import ( - "github.com/google/uuid" -) - // ID represents a unique value for each package added to a package catalog. type ID string - -func newID() ID { - return ID(uuid.New().String()) -} diff --git a/syft/pkg/package.go b/syft/pkg/package.go index 8ae228a21..08c5ebbdb 100644 --- a/syft/pkg/package.go +++ b/syft/pkg/package.go @@ -7,11 +7,13 @@ import ( "fmt" "github.com/anchore/syft/syft/source" + "github.com/mitchellh/hashstructure" ) // Package represents an application or library that has been bundled into a distributable format. +// TODO: if we ignore FoundBy for ID generation should we merge the field to show it was found in two places? type Package struct { - ID ID // uniquely identifies a package, set by the cataloger + ID ID `hash:"ignore"` // uniquely identifies a package, set by the cataloger Name string // the package name Version string // the version of the package FoundBy string // the specific cataloger that discovered this package @@ -30,3 +32,15 @@ type Package struct { func (p Package) String() string { return fmt.Sprintf("Pkg(type=%s, name=%s, version=%s)", p.Type, p.Name, p.Version) } + +func (p Package) Fingerprint() (string, error) { + f, err := hashstructure.Hash(p, &hashstructure.HashOptions{ + ZeroNil: true, + SlicesAsSets: true, + }) + if err != nil { + return "", fmt.Errorf("could not build package fingerprint for: %s version: %s", p.Name, p.Version) + } + + return fmt.Sprint(f), nil +} diff --git a/syft/pkg/package_test.go b/syft/pkg/package_test.go new file mode 100644 index 000000000..4edf88dac --- /dev/null +++ b/syft/pkg/package_test.go @@ -0,0 +1,206 @@ +package pkg + +import ( + "testing" + + "github.com/anchore/syft/syft/source" + "github.com/stretchr/testify/assert" +) + +func TestFingerprint(t *testing.T) { + originalPkg := Package{ + ID: "π", + Name: "pi", + Version: "3.14", + FoundBy: "Archimedes", + Locations: []source.Location{ + { + RealPath: "39.0742° N, 21.8243° E", + VirtualPath: "/Ancient-Greece", + FileSystemID: "Earth", + }, + }, + Licenses: []string{ + "cc0-1.0", + "MIT", + }, + Language: "math", + Type: PythonPkg, + CPEs: []CPE{ + must(NewCPE(`cpe:2.3:a:Archimedes:pi:3.14:*:*:*:*:math:*:*`)), + }, + PURL: "pkg:pypi/pi@3.14", + MetadataType: PythonPackageMetadataType, + Metadata: PythonPackageMetadata{ + Name: "pi", + Version: "3.14", + License: "cc0-1.0", + Author: "Archimedes", + AuthorEmail: "Archimedes@circles.io", + Platform: "universe", + SitePackagesRootPath: "Pi", + }, + } + + // this is a set of differential tests, ensuring that select mutations are reflected in the fingerprint (or not) + tests := []struct { + name string + transform func(pkg Package) Package + expectIdentical bool + }{ + { + name: "go case (no transform)", + transform: func(pkg Package) Package { + // do nothing! + return pkg + }, + expectIdentical: true, + }, + { + name: "same metadata is ignored", + transform: func(pkg Package) Package { + // note: this is the same as the original values, just a new allocation + pkg.Metadata = PythonPackageMetadata{ + Name: "pi", + Version: "3.14", + License: "cc0-1.0", + Author: "Archimedes", + AuthorEmail: "Archimedes@circles.io", + Platform: "universe", + SitePackagesRootPath: "Pi", + } + return pkg + }, + expectIdentical: true, + }, + { + name: "licenses order is ignored", + transform: func(pkg Package) Package { + // note: same as the original package, only a different order + pkg.Licenses = []string{ + "MIT", + "cc0-1.0", + } + return pkg + }, + expectIdentical: true, + }, + { + name: "name is reflected", + transform: func(pkg Package) Package { + pkg.Name = "new!" + return pkg + }, + expectIdentical: false, + }, + { + name: "version is reflected", + transform: func(pkg Package) Package { + pkg.Version = "new!" + return pkg + }, + expectIdentical: false, + }, + { + name: "licenses is reflected", + transform: func(pkg Package) Package { + pkg.Licenses = []string{"new!"} + return pkg + }, + expectIdentical: false, + }, + { + name: "type is reflected", + transform: func(pkg Package) Package { + pkg.Type = RustPkg + return pkg + }, + expectIdentical: false, + }, + { + name: "metadata type is reflected", + transform: func(pkg Package) Package { + pkg.MetadataType = RustCargoPackageMetadataType + return pkg + }, + expectIdentical: false, + }, + { + name: "CPEs is reflected", + transform: func(pkg Package) Package { + pkg.CPEs = []CPE{} + return pkg + }, + expectIdentical: false, + }, + { + name: "pURL is reflected", + transform: func(pkg Package) Package { + pkg.PURL = "new!" + return pkg + }, + expectIdentical: false, + }, + { + name: "language is reflected", + transform: func(pkg Package) Package { + pkg.Language = Rust + return pkg + }, + expectIdentical: false, + }, + { + name: "foundBy is reflected", + transform: func(pkg Package) Package { + pkg.FoundBy = "new!" + return pkg + }, + expectIdentical: false, + }, + { + name: "metadata mutation is reflected", + transform: func(pkg Package) Package { + metadata := pkg.Metadata.(PythonPackageMetadata) + metadata.Name = "new!" + pkg.Metadata = metadata + return pkg + }, + expectIdentical: false, + }, + { + name: "new metadata is reflected", + transform: func(pkg Package) Package { + pkg.Metadata = PythonPackageMetadata{ + Name: "new!", + } + return pkg + }, + expectIdentical: false, + }, + { + name: "nil metadata is reflected", + transform: func(pkg Package) Package { + pkg.Metadata = nil + return pkg + }, + expectIdentical: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + transformedPkg := test.transform(originalPkg) + originalFingerprint, err := originalPkg.Fingerprint() + assert.NoError(t, err, "expected no error on package fingerprint") + transformedFingerprint, err := transformedPkg.Fingerprint() + assert.NoError(t, err, "expected no error on package fingerprint") + + if test.expectIdentical { + assert.Equal(t, originalFingerprint, transformedFingerprint) + } else { + assert.NotEqual(t, originalFingerprint, transformedFingerprint) + } + + }) + } +}