Update package identifier to be constant across multiple syft run (#595)

Signed-off-by: Christopher Angelo Phillips <christopher.phillips@anchore.com>
This commit is contained in:
Christopher Angelo Phillips 2021-10-29 12:00:36 -04:00 committed by GitHub
parent 358b3a2cf8
commit a2882ee810
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 231 additions and 16 deletions

1
go.mod
View File

@ -24,6 +24,7 @@ require (
github.com/hashicorp/go-multierror v1.1.0
github.com/hashicorp/go-version v1.2.0
github.com/mitchellh/go-homedir v1.1.0
github.com/mitchellh/hashstructure v1.1.0
github.com/mitchellh/mapstructure v1.3.1
github.com/olekukonko/tablewriter v0.0.4
github.com/pelletier/go-toml v1.8.1

2
go.sum
View File

@ -542,6 +542,8 @@ github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrk
github.com/mitchellh/go-ps v0.0.0-20190716172923-621e5597135b/go.mod h1:r1VsdOzOPt1ZSrGZWFoNhsAedKnEd6r9Np1+5blZCWk=
github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI=
github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg=
github.com/mitchellh/hashstructure v1.1.0 h1:P6P1hdjqAAknpY/M1CGipelZgp+4y9ja9kmUZPXP+H0=
github.com/mitchellh/hashstructure v1.1.0/go.mod h1:xUDAozZz0Wmdiufv0uyhnHkUTN6/6d8ulp4AwfLKrmA=
github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY=
github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=

View File

@ -67,14 +67,14 @@ func (c *Catalog) Add(p Package) {
c.lock.Lock()
defer c.lock.Unlock()
_, exists := c.byID[p.ID]
if exists {
log.Errorf("package ID already exists in the catalog : id=%+v %+v", p.ID, p)
if p.ID == "" {
fingerprint, err := p.Fingerprint()
if err != nil {
log.Warnf("failed to add package to catalog: %w", err)
return
}
if p.ID == "" {
p.ID = newID()
p.ID = ID(fingerprint)
}
// store by package ID

View File

@ -1,12 +1,4 @@
package pkg
import (
"github.com/google/uuid"
)
// ID represents a unique value for each package added to a package catalog.
type ID string
func newID() ID {
return ID(uuid.New().String())
}

View File

@ -7,11 +7,13 @@ import (
"fmt"
"github.com/anchore/syft/syft/source"
"github.com/mitchellh/hashstructure"
)
// Package represents an application or library that has been bundled into a distributable format.
// TODO: if we ignore FoundBy for ID generation should we merge the field to show it was found in two places?
type Package struct {
ID ID // uniquely identifies a package, set by the cataloger
ID ID `hash:"ignore"` // uniquely identifies a package, set by the cataloger
Name string // the package name
Version string // the version of the package
FoundBy string // the specific cataloger that discovered this package
@ -30,3 +32,15 @@ type Package struct {
func (p Package) String() string {
return fmt.Sprintf("Pkg(type=%s, name=%s, version=%s)", p.Type, p.Name, p.Version)
}
func (p Package) Fingerprint() (string, error) {
f, err := hashstructure.Hash(p, &hashstructure.HashOptions{
ZeroNil: true,
SlicesAsSets: true,
})
if err != nil {
return "", fmt.Errorf("could not build package fingerprint for: %s version: %s", p.Name, p.Version)
}
return fmt.Sprint(f), nil
}

206
syft/pkg/package_test.go Normal file
View File

@ -0,0 +1,206 @@
package pkg
import (
"testing"
"github.com/anchore/syft/syft/source"
"github.com/stretchr/testify/assert"
)
func TestFingerprint(t *testing.T) {
originalPkg := Package{
ID: "π",
Name: "pi",
Version: "3.14",
FoundBy: "Archimedes",
Locations: []source.Location{
{
RealPath: "39.0742° N, 21.8243° E",
VirtualPath: "/Ancient-Greece",
FileSystemID: "Earth",
},
},
Licenses: []string{
"cc0-1.0",
"MIT",
},
Language: "math",
Type: PythonPkg,
CPEs: []CPE{
must(NewCPE(`cpe:2.3:a:Archimedes:pi:3.14:*:*:*:*:math:*:*`)),
},
PURL: "pkg:pypi/pi@3.14",
MetadataType: PythonPackageMetadataType,
Metadata: PythonPackageMetadata{
Name: "pi",
Version: "3.14",
License: "cc0-1.0",
Author: "Archimedes",
AuthorEmail: "Archimedes@circles.io",
Platform: "universe",
SitePackagesRootPath: "Pi",
},
}
// this is a set of differential tests, ensuring that select mutations are reflected in the fingerprint (or not)
tests := []struct {
name string
transform func(pkg Package) Package
expectIdentical bool
}{
{
name: "go case (no transform)",
transform: func(pkg Package) Package {
// do nothing!
return pkg
},
expectIdentical: true,
},
{
name: "same metadata is ignored",
transform: func(pkg Package) Package {
// note: this is the same as the original values, just a new allocation
pkg.Metadata = PythonPackageMetadata{
Name: "pi",
Version: "3.14",
License: "cc0-1.0",
Author: "Archimedes",
AuthorEmail: "Archimedes@circles.io",
Platform: "universe",
SitePackagesRootPath: "Pi",
}
return pkg
},
expectIdentical: true,
},
{
name: "licenses order is ignored",
transform: func(pkg Package) Package {
// note: same as the original package, only a different order
pkg.Licenses = []string{
"MIT",
"cc0-1.0",
}
return pkg
},
expectIdentical: true,
},
{
name: "name is reflected",
transform: func(pkg Package) Package {
pkg.Name = "new!"
return pkg
},
expectIdentical: false,
},
{
name: "version is reflected",
transform: func(pkg Package) Package {
pkg.Version = "new!"
return pkg
},
expectIdentical: false,
},
{
name: "licenses is reflected",
transform: func(pkg Package) Package {
pkg.Licenses = []string{"new!"}
return pkg
},
expectIdentical: false,
},
{
name: "type is reflected",
transform: func(pkg Package) Package {
pkg.Type = RustPkg
return pkg
},
expectIdentical: false,
},
{
name: "metadata type is reflected",
transform: func(pkg Package) Package {
pkg.MetadataType = RustCargoPackageMetadataType
return pkg
},
expectIdentical: false,
},
{
name: "CPEs is reflected",
transform: func(pkg Package) Package {
pkg.CPEs = []CPE{}
return pkg
},
expectIdentical: false,
},
{
name: "pURL is reflected",
transform: func(pkg Package) Package {
pkg.PURL = "new!"
return pkg
},
expectIdentical: false,
},
{
name: "language is reflected",
transform: func(pkg Package) Package {
pkg.Language = Rust
return pkg
},
expectIdentical: false,
},
{
name: "foundBy is reflected",
transform: func(pkg Package) Package {
pkg.FoundBy = "new!"
return pkg
},
expectIdentical: false,
},
{
name: "metadata mutation is reflected",
transform: func(pkg Package) Package {
metadata := pkg.Metadata.(PythonPackageMetadata)
metadata.Name = "new!"
pkg.Metadata = metadata
return pkg
},
expectIdentical: false,
},
{
name: "new metadata is reflected",
transform: func(pkg Package) Package {
pkg.Metadata = PythonPackageMetadata{
Name: "new!",
}
return pkg
},
expectIdentical: false,
},
{
name: "nil metadata is reflected",
transform: func(pkg Package) Package {
pkg.Metadata = nil
return pkg
},
expectIdentical: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
transformedPkg := test.transform(originalPkg)
originalFingerprint, err := originalPkg.Fingerprint()
assert.NoError(t, err, "expected no error on package fingerprint")
transformedFingerprint, err := transformedPkg.Fingerprint()
assert.NoError(t, err, "expected no error on package fingerprint")
if test.expectIdentical {
assert.Equal(t, originalFingerprint, transformedFingerprint)
} else {
assert.NotEqual(t, originalFingerprint, transformedFingerprint)
}
})
}
}