From 52adfcbd446b2d0d5212cd042eec21fb13df4cf1 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 9 Nov 2021 12:31:39 -0500 Subject: [PATCH] stabilize package ID relative to encode-decode format cycles Signed-off-by: Alex Goodman --- go.mod | 2 +- go.sum | 4 +- .../formats/cyclonedx12xml/to_format_model.go | 2 +- .../formats/spdx22tagvalue/to_format_model.go | 2 +- internal/formats/syftjson/decoder_test.go | 6 +-- .../snapshot/TestDirectoryPresenter.golden | 4 +- .../snapshot/TestImagePresenter.golden | 4 +- internal/formats/syftjson/to_format_model.go | 2 +- .../poweruser/json_presenter_test.go | 2 - .../snapshot/TestJSONPresenter.golden | 4 +- syft/pkg/cataloger/common/cpe/generate.go | 2 +- .../pkg/cataloger/common/cpe/generate_test.go | 2 +- syft/pkg/cpe.go | 3 ++ syft/pkg/cpe_test.go | 36 ++++++++++++- syft/pkg/java_metadata.go | 2 +- syft/pkg/package.go | 8 ++- syft/source/location.go | 9 ++-- .../pkgs/project/package-lock.json | 52 ------------------- .../integration/encode_decode_cycle_test.go | 32 ++++++------ 19 files changed, 78 insertions(+), 100 deletions(-) delete mode 100644 syft/test-fixtures/pkgs/project/package-lock.json rename syft/encode_decode_test.go => test/integration/encode_decode_cycle_test.go (68%) diff --git a/go.mod b/go.mod index 747686789..ea7804280 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,7 @@ require ( github.com/hashicorp/go-version v1.2.0 github.com/jinzhu/copier v0.3.2 github.com/mitchellh/go-homedir v1.1.0 - github.com/mitchellh/hashstructure v1.1.0 + github.com/mitchellh/hashstructure/v2 v2.0.2 github.com/mitchellh/mapstructure v1.3.1 github.com/olekukonko/tablewriter v0.0.4 github.com/pelletier/go-toml v1.8.1 diff --git a/go.sum b/go.sum index 9a21d79be..e373a0c9c 100644 --- a/go.sum +++ b/go.sum @@ -546,8 +546,8 @@ github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrk github.com/mitchellh/go-ps v0.0.0-20190716172923-621e5597135b/go.mod h1:r1VsdOzOPt1ZSrGZWFoNhsAedKnEd6r9Np1+5blZCWk= github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg= -github.com/mitchellh/hashstructure v1.1.0 h1:P6P1hdjqAAknpY/M1CGipelZgp+4y9ja9kmUZPXP+H0= -github.com/mitchellh/hashstructure v1.1.0/go.mod h1:xUDAozZz0Wmdiufv0uyhnHkUTN6/6d8ulp4AwfLKrmA= +github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4NcD46KavDd4= +github.com/mitchellh/hashstructure/v2 v2.0.2/go.mod h1:MG3aRVU/N29oo/V/IhBX8GR/zz4kQkprJgF2EVszyDE= github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY= github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= diff --git a/internal/formats/cyclonedx12xml/to_format_model.go b/internal/formats/cyclonedx12xml/to_format_model.go index 70744662a..6a6c43703 100644 --- a/internal/formats/cyclonedx12xml/to_format_model.go +++ b/internal/formats/cyclonedx12xml/to_format_model.go @@ -33,7 +33,7 @@ func toFormatModel(s sbom.SBOM) model.Document { return doc } -func toComponent(p *pkg.Package) model.Component { +func toComponent(p pkg.Package) model.Component { return model.Component{ Type: "library", // TODO: this is not accurate Name: p.Name, diff --git a/internal/formats/spdx22tagvalue/to_format_model.go b/internal/formats/spdx22tagvalue/to_format_model.go index 44c728565..4e9fb5d1f 100644 --- a/internal/formats/spdx22tagvalue/to_format_model.go +++ b/internal/formats/spdx22tagvalue/to_format_model.go @@ -256,7 +256,7 @@ func toFormatPackages(catalog *pkg.Catalog) map[spdx.ElementID]*spdx.Package2_2 return results } -func formatSPDXExternalRefs(p *pkg.Package) (refs []*spdx.PackageExternalReference2_2) { +func formatSPDXExternalRefs(p pkg.Package) (refs []*spdx.PackageExternalReference2_2) { for _, ref := range spdxhelpers.ExternalRefs(p) { refs = append(refs, &spdx.PackageExternalReference2_2{ Category: string(ref.ReferenceCategory), diff --git a/internal/formats/syftjson/decoder_test.go b/internal/formats/syftjson/decoder_test.go index bd537c366..ea69ee2c7 100644 --- a/internal/formats/syftjson/decoder_test.go +++ b/internal/formats/syftjson/decoder_test.go @@ -31,11 +31,7 @@ func TestEncodeDecodeCycle(t *testing.T) { continue } - // ids will never be equal - p.ID = "" - actualPackages[idx].ID = "" - - for _, d := range deep.Equal(*p, *actualPackages[idx]) { + for _, d := range deep.Equal(p, actualPackages[idx]) { if strings.Contains(d, ".VirtualPath: ") { // location.Virtual path is not exposed in the json output continue diff --git a/internal/formats/syftjson/test-fixtures/snapshot/TestDirectoryPresenter.golden b/internal/formats/syftjson/test-fixtures/snapshot/TestDirectoryPresenter.golden index 7289b4213..5216ffdc2 100644 --- a/internal/formats/syftjson/test-fixtures/snapshot/TestDirectoryPresenter.golden +++ b/internal/formats/syftjson/test-fixtures/snapshot/TestDirectoryPresenter.golden @@ -1,7 +1,7 @@ { "artifacts": [ { - "id": "package-1-id", + "id": "810333194629225077", "name": "package-1", "version": "1.0.1", "type": "python", @@ -36,7 +36,7 @@ } }, { - "id": "package-2-id", + "id": "1889729387356865209", "name": "package-2", "version": "2.0.1", "type": "deb", diff --git a/internal/formats/syftjson/test-fixtures/snapshot/TestImagePresenter.golden b/internal/formats/syftjson/test-fixtures/snapshot/TestImagePresenter.golden index 42fce092a..fee0eba15 100644 --- a/internal/formats/syftjson/test-fixtures/snapshot/TestImagePresenter.golden +++ b/internal/formats/syftjson/test-fixtures/snapshot/TestImagePresenter.golden @@ -1,7 +1,7 @@ { "artifacts": [ { - "id": "package-1-id", + "id": "15119766234833480967", "name": "package-1", "version": "1.0.1", "type": "python", @@ -32,7 +32,7 @@ } }, { - "id": "package-2-id", + "id": "3293866126252599174", "name": "package-2", "version": "2.0.1", "type": "deb", diff --git a/internal/formats/syftjson/to_format_model.go b/internal/formats/syftjson/to_format_model.go index 5140891b6..5e2c4f362 100644 --- a/internal/formats/syftjson/to_format_model.go +++ b/internal/formats/syftjson/to_format_model.go @@ -52,7 +52,7 @@ func toPackageModels(catalog *pkg.Catalog) []model.Package { } // toPackageModel crates a new Package from the given pkg.Package. -func toPackageModel(p *pkg.Package) model.Package { +func toPackageModel(p pkg.Package) model.Package { var cpes = make([]string, len(p.CPEs)) for i, c := range p.CPEs { cpes[i] = c.BindToFmtString() diff --git a/internal/presenter/poweruser/json_presenter_test.go b/internal/presenter/poweruser/json_presenter_test.go index f4dbda5be..40b0f40c3 100644 --- a/internal/presenter/poweruser/json_presenter_test.go +++ b/internal/presenter/poweruser/json_presenter_test.go @@ -33,7 +33,6 @@ func TestJSONPresenter(t *testing.T) { catalog := pkg.NewCatalog() catalog.Add(pkg.Package{ - ID: "package-1-id", Name: "package-1", Version: "1.0.1", Locations: []source.Location{ @@ -57,7 +56,6 @@ func TestJSONPresenter(t *testing.T) { }, }) catalog.Add(pkg.Package{ - ID: "package-2-id", Name: "package-2", Version: "2.0.1", Locations: []source.Location{ diff --git a/internal/presenter/poweruser/test-fixtures/snapshot/TestJSONPresenter.golden b/internal/presenter/poweruser/test-fixtures/snapshot/TestJSONPresenter.golden index d71439257..3a3665919 100644 --- a/internal/presenter/poweruser/test-fixtures/snapshot/TestJSONPresenter.golden +++ b/internal/presenter/poweruser/test-fixtures/snapshot/TestJSONPresenter.golden @@ -72,7 +72,7 @@ ], "artifacts": [ { - "id": "package-1-id", + "id": "13280550215267739407", "name": "package-1", "version": "1.0.1", "type": "python", @@ -102,7 +102,7 @@ } }, { - "id": "package-2-id", + "id": "7356949319602771519", "name": "package-2", "version": "2.0.1", "type": "deb", diff --git a/syft/pkg/cataloger/common/cpe/generate.go b/syft/pkg/cataloger/common/cpe/generate.go index 46febb2df..daa1820c1 100644 --- a/syft/pkg/cataloger/common/cpe/generate.go +++ b/syft/pkg/cataloger/common/cpe/generate.go @@ -83,7 +83,7 @@ func candidateVendors(p pkg.Package) []string { // allow * as a candidate. Note: do NOT allow Java packages to have * vendors. switch p.Language { case pkg.Ruby, pkg.JavaScript: - vendors.addValue("*") + vendors.addValue(wfn.Any) } switch p.MetadataType { diff --git a/syft/pkg/cataloger/common/cpe/generate_test.go b/syft/pkg/cataloger/common/cpe/generate_test.go index 884c2c244..9f3eb7dd0 100644 --- a/syft/pkg/cataloger/common/cpe/generate_test.go +++ b/syft/pkg/cataloger/common/cpe/generate_test.go @@ -637,7 +637,7 @@ func TestCandidateProducts(t *testing.T) { } for _, test := range tests { - t.Run(fmt.Sprintf("%+v %+v", test.p, test.expected), func(t *testing.T) { + t.Run(test.name, func(t *testing.T) { assert.ElementsMatch(t, test.expected, candidateProducts(test.p)) }) } diff --git a/syft/pkg/cpe.go b/syft/pkg/cpe.go index 52a86adce..c009f4e29 100644 --- a/syft/pkg/cpe.go +++ b/syft/pkg/cpe.go @@ -45,5 +45,8 @@ func MustCPE(cpeStr string) CPE { func normalizeCpeField(field string) string { // keep dashes and forward slashes unescaped + if field == "*" { + return wfn.Any + } return strings.ReplaceAll(wfn.StripSlashes(field), `\/`, "/") } diff --git a/syft/pkg/cpe_test.go b/syft/pkg/cpe_test.go index d06efa34b..cf0a1e1f7 100644 --- a/syft/pkg/cpe_test.go +++ b/syft/pkg/cpe_test.go @@ -1,6 +1,10 @@ package pkg -import "testing" +import ( + "testing" + + "github.com/stretchr/testify/assert" +) func must(c CPE, e error) CPE { if e != nil { @@ -46,3 +50,33 @@ func TestNewCPE(t *testing.T) { }) } } + +func Test_normalizeCpeField(t *testing.T) { + + tests := []struct { + field string + expected string + }{ + { + field: "something", + expected: "something", + }, + { + field: "some\\thing", + expected: `some\thing`, + }, + { + field: "*", + expected: "", + }, + { + field: "", + expected: "", + }, + } + for _, test := range tests { + t.Run(test.field, func(t *testing.T) { + assert.Equal(t, test.expected, normalizeCpeField(test.field)) + }) + } +} diff --git a/syft/pkg/java_metadata.go b/syft/pkg/java_metadata.go index 641b9294f..ceaf476e9 100644 --- a/syft/pkg/java_metadata.go +++ b/syft/pkg/java_metadata.go @@ -21,7 +21,7 @@ type JavaMetadata struct { Manifest *JavaManifest `mapstructure:"Manifest" json:"manifest,omitempty"` PomProperties *PomProperties `mapstructure:"PomProperties" json:"pomProperties,omitempty"` PomProject *PomProject `mapstructure:"PomProject" json:"pomProject,omitempty"` - Parent *Package `json:"-"` + Parent *Package `hash:"ignore" json:"-"` // note: the parent cannot be included in the minimal definition of uniqueness since this field is not reproducible in an encode-decode cycle (is lossy). } // PomProperties represents the fields of interest extracted from a Java archive's pom.properties file. diff --git a/syft/pkg/package.go b/syft/pkg/package.go index d888c5173..15cd04551 100644 --- a/syft/pkg/package.go +++ b/syft/pkg/package.go @@ -7,11 +7,9 @@ import ( "fmt" "github.com/anchore/syft/internal/log" - "github.com/anchore/syft/syft/artifact" - "github.com/anchore/syft/syft/source" - "github.com/mitchellh/hashstructure" + "github.com/mitchellh/hashstructure/v2" ) // Package represents an application or library that has been bundled into a distributable format. @@ -48,8 +46,8 @@ func (p Package) String() string { } func (p Package) Fingerprint() (string, error) { - f, err := hashstructure.Hash(p, &hashstructure.HashOptions{ - ZeroNil: true, + f, err := hashstructure.Hash(p, hashstructure.FormatV2, &hashstructure.HashOptions{ + //ZeroNil: true, SlicesAsSets: true, }) if err != nil { diff --git a/syft/source/location.go b/syft/source/location.go index f7cb729f3..a74f59b39 100644 --- a/syft/source/location.go +++ b/syft/source/location.go @@ -10,12 +10,15 @@ import ( ) // Location represents a path relative to a particular filesystem resolved to a specific file.Reference. This struct is used as a key -// in content fetching to uniquely identify a file relative to a request (the VirtualPath). +// in content fetching to uniquely identify a file relative to a request (the VirtualPath). Note that the VirtualPath +// and ref are ignored fields when using github.com/mitchellh/hashstructure. The reason for this is to ensure that +// only the minimally expressible fields of a location are baked into the uniqueness of a Location. Since VirutalPath +// and ref are not captured in JSON output they cannot be included in this minimal definition. type Location struct { RealPath string `json:"path"` // The path where all path ancestors have no hardlinks / symlinks - VirtualPath string `json:"-"` // The path to the file which may or may not have hardlinks / symlinks + VirtualPath string `hash:"ignore" json:"-"` // The path to the file which may or may not have hardlinks / symlinks FileSystemID string `json:"layerID,omitempty"` // An ID representing the filesystem. For container images this is a layer digest, directories or root filesystem this is blank. - ref file.Reference // The file reference relative to the stereoscope.FileCatalog that has more information about this location. + ref file.Reference `hash:"ignore"` // The file reference relative to the stereoscope.FileCatalog that has more information about this location. } // NewLocation creates a new Location representing a path without denoting a filesystem or FileCatalog reference. diff --git a/syft/test-fixtures/pkgs/project/package-lock.json b/syft/test-fixtures/pkgs/project/package-lock.json deleted file mode 100644 index f18505de8..000000000 --- a/syft/test-fixtures/pkgs/project/package-lock.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "name": "npm-lock", - "version": "1.0.0", - "lockfileVersion": 1, - "requires": true, - "dependencies": { - "collapse-white-space": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/collapse-white-space/-/collapse-white-space-2.0.0.tgz", - "integrity": "sha512-eh9krktAIMDL0KHuN7WTBJ/0PMv8KUvfQRBkIlGmW61idRM2DJjgd1qXEPr4wyk2PimZZeNww3RVYo6CMvDGlg==" - }, - "end-of-stream": { - "version": "1.4.4", - "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz", - "integrity": "sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==", - "dev": true, - "requires": { - "once": "^1.4.0" - } - }, - "insert-css": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/insert-css/-/insert-css-2.0.0.tgz", - "integrity": "sha1-610Ql7dUL0x56jBg067gfQU4gPQ=" - }, - "once": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", - "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=", - "dev": true, - "requires": { - "wrappy": "1" - } - }, - "pump": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", - "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", - "dev": true, - "requires": { - "end-of-stream": "^1.1.0", - "once": "^1.3.1" - } - }, - "wrappy": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", - "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=", - "dev": true - } - } -} diff --git a/syft/encode_decode_test.go b/test/integration/encode_decode_cycle_test.go similarity index 68% rename from syft/encode_decode_test.go rename to test/integration/encode_decode_cycle_test.go index d070b206b..6d31b361f 100644 --- a/syft/encode_decode_test.go +++ b/test/integration/encode_decode_cycle_test.go @@ -1,15 +1,16 @@ -package syft +package integration import ( "bytes" "testing" + "github.com/anchore/syft/syft" + + "github.com/sergi/go-diff/diffmatchpatch" + "github.com/anchore/syft/syft/sbom" - "github.com/go-test/deep" - "github.com/anchore/syft/syft/format" - "github.com/anchore/syft/syft/source" "github.com/stretchr/testify/assert" ) @@ -20,7 +21,6 @@ import ( // to do an object-to-object comparison. For this reason this test focuses on a bytes-to-bytes comparison after an // encode-decode-encode loop which will detect lossy behavior in both directions. func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { - testImage := "image-simple" tests := []struct { format format.Option }{ @@ -29,13 +29,9 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { }, } for _, test := range tests { - t.Run(testImage, func(t *testing.T) { + t.Run(string(test.format), func(t *testing.T) { - src, err := source.NewFromDirectory("./test-fixtures/pkgs") - if err != nil { - t.Fatalf("cant get dir") - } - originalCatalog, _, d, err := CatalogPackages(&src, source.SquashedScope) + originalCatalog, _, d, src := catalogFixtureImage(t, "image-pkg-coverage") originalSBOM := sbom.SBOM{ Artifacts: sbom.Artifacts{ @@ -45,19 +41,21 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { Source: src.Metadata, } - by1, err := Encode(originalSBOM, test.format) + by1, err := syft.Encode(originalSBOM, test.format) assert.NoError(t, err) - newSBOM, newFormat, err := Decode(bytes.NewReader(by1)) + newSBOM, newFormat, err := syft.Decode(bytes.NewReader(by1)) assert.NoError(t, err) assert.Equal(t, test.format, newFormat) - by2, err := Encode(*newSBOM, test.format) + by2, err := syft.Encode(*newSBOM, test.format) assert.NoError(t, err) - for _, diff := range deep.Equal(by1, by2) { - t.Errorf(diff) + + if !assert.True(t, bytes.Equal(by1, by2)) { + dmp := diffmatchpatch.New() + diffs := dmp.DiffMain(string(by1), string(by2), true) + t.Errorf("diff: %s", dmp.DiffPrettyText(diffs)) } - assert.True(t, bytes.Equal(by1, by2)) }) } }