diff --git a/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONDirectoryPresenter.golden b/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONDirectoryPresenter.golden index 3791b5d4f..fec66e759 100644 --- a/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONDirectoryPresenter.golden +++ b/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONDirectoryPresenter.golden @@ -3,7 +3,7 @@ "name": "/some/path", "spdxVersion": "SPDX-2.2", "creationInfo": { - "created": "2021-12-01T15:08:29.469369Z", + "created": "2021-12-15T23:56:14.459753Z", "creators": [ "Organization: Anchore, Inc", "Tool: syft-[not provided]" @@ -11,10 +11,10 @@ "licenseListVersion": "3.15" }, "dataLicense": "CC0-1.0", - "documentNamespace": "https://anchore.com/syft/dir/some/path-f4586501-2da6-4541-a8e9-232b32f25e9a", + "documentNamespace": "https://anchore.com/syft/dir/some/path-7ed51d00-2c50-4c6d-aedc-271ed41009cb", "packages": [ { - "SPDXID": "SPDXRef-2a115ac97d018a0e", + "SPDXID": "SPDXRef-96e6e51fe8ba6d8b", "name": "package-1", "licenseConcluded": "MIT", "downloadLocation": "NOASSERTION", @@ -36,7 +36,7 @@ "versionInfo": "1.0.1" }, { - "SPDXID": "SPDXRef-5e920b2bece2c3ae", + "SPDXID": "SPDXRef-ad3d1c4abd84bf75", "name": "package-2", "licenseConcluded": "NONE", "downloadLocation": "NOASSERTION", diff --git a/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONImagePresenter.golden b/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONImagePresenter.golden index 470366466..ce2afcc71 100644 --- a/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONImagePresenter.golden +++ b/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONImagePresenter.golden @@ -3,7 +3,7 @@ "name": "user-image-input", "spdxVersion": "SPDX-2.2", "creationInfo": { - "created": "2021-12-01T15:08:29.476498Z", + "created": "2021-12-15T23:56:14.468453Z", "creators": [ "Organization: Anchore, Inc", "Tool: syft-[not provided]" @@ -11,10 +11,10 @@ "licenseListVersion": "3.15" }, "dataLicense": "CC0-1.0", - "documentNamespace": "https://anchore.com/syft/image/user-image-input-e3b7637c-9b2f-4005-a683-58e60f979082", + "documentNamespace": "https://anchore.com/syft/image/user-image-input-f7c12e3a-8390-4f0d-a4a9-7d756e7e8d7d", "packages": [ { - "SPDXID": "SPDXRef-888661d4f0362f02", + "SPDXID": "SPDXRef-b8995af4e6171091", "name": "package-1", "licenseConcluded": "MIT", "downloadLocation": "NOASSERTION", @@ -36,7 +36,7 @@ "versionInfo": "1.0.1" }, { - "SPDXID": "SPDXRef-4068ff5e8926b305", + "SPDXID": "SPDXRef-73f796c846875b9e", "name": "package-2", "licenseConcluded": "NONE", "downloadLocation": "NOASSERTION", diff --git a/internal/formats/syftjson/encoder.go b/internal/formats/syftjson/encoder.go index ae52818f3..360d36940 100644 --- a/internal/formats/syftjson/encoder.go +++ b/internal/formats/syftjson/encoder.go @@ -8,7 +8,7 @@ import ( ) func encoder(output io.Writer, s sbom.SBOM) error { - doc := ToFormatModel(s) + doc := toFormatModel(s) enc := json.NewEncoder(output) // prevent > and < from being escaped in the payload diff --git a/internal/formats/syftjson/model/package.go b/internal/formats/syftjson/model/package.go index a1b967cef..cfc98d232 100644 --- a/internal/formats/syftjson/model/package.go +++ b/internal/formats/syftjson/model/package.go @@ -4,9 +4,10 @@ import ( "encoding/json" "fmt" + "github.com/anchore/syft/syft/source" + "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/source" ) // Package represents a pkg.Package object specialized for JSON marshaling and unmarshalling. diff --git a/internal/formats/syftjson/test-fixtures/snapshot/TestDirectoryPresenter.golden b/internal/formats/syftjson/test-fixtures/snapshot/TestDirectoryPresenter.golden index ec910d63f..d00832f45 100644 --- a/internal/formats/syftjson/test-fixtures/snapshot/TestDirectoryPresenter.golden +++ b/internal/formats/syftjson/test-fixtures/snapshot/TestDirectoryPresenter.golden @@ -1,7 +1,7 @@ { "artifacts": [ { - "id": "2a115ac97d018a0e", + "id": "96e6e51fe8ba6d8b", "name": "package-1", "version": "1.0.1", "type": "python", @@ -36,7 +36,7 @@ } }, { - "id": "5e920b2bece2c3ae", + "id": "ad3d1c4abd84bf75", "name": "package-2", "version": "2.0.1", "type": "deb", diff --git a/internal/formats/syftjson/test-fixtures/snapshot/TestEncodeFullJSONDocument.golden b/internal/formats/syftjson/test-fixtures/snapshot/TestEncodeFullJSONDocument.golden index 76e131341..131d3bed4 100644 --- a/internal/formats/syftjson/test-fixtures/snapshot/TestEncodeFullJSONDocument.golden +++ b/internal/formats/syftjson/test-fixtures/snapshot/TestEncodeFullJSONDocument.golden @@ -1,7 +1,7 @@ { "artifacts": [ { - "id": "962403cfb7be50d7", + "id": "2a5c2dadd6f80c07", "name": "package-1", "version": "1.0.1", "type": "python", @@ -31,7 +31,7 @@ } }, { - "id": "b11f44847bba0ed1", + "id": "ac462e450060da2c", "name": "package-2", "version": "2.0.1", "type": "deb", @@ -62,8 +62,8 @@ ], "artifactRelationships": [ { - "parent": "962403cfb7be50d7", - "child": "b11f44847bba0ed1", + "parent": "", + "child": "", "type": "ownership-by-file-overlap", "metadata": { "file": "path" diff --git a/internal/formats/syftjson/test-fixtures/snapshot/TestImagePresenter.golden b/internal/formats/syftjson/test-fixtures/snapshot/TestImagePresenter.golden index 9431176ed..724299861 100644 --- a/internal/formats/syftjson/test-fixtures/snapshot/TestImagePresenter.golden +++ b/internal/formats/syftjson/test-fixtures/snapshot/TestImagePresenter.golden @@ -1,7 +1,7 @@ { "artifacts": [ { - "id": "888661d4f0362f02", + "id": "b8995af4e6171091", "name": "package-1", "version": "1.0.1", "type": "python", @@ -32,7 +32,7 @@ } }, { - "id": "4068ff5e8926b305", + "id": "73f796c846875b9e", "name": "package-2", "version": "2.0.1", "type": "deb", diff --git a/internal/formats/syftjson/to_format_model.go b/internal/formats/syftjson/to_format_model.go index 8417d8a12..dbbcf5fb2 100644 --- a/internal/formats/syftjson/to_format_model.go +++ b/internal/formats/syftjson/to_format_model.go @@ -19,8 +19,7 @@ import ( "github.com/anchore/syft/syft/source" ) -// TODO: this is exported for the use of the power-user command (temp) -func ToFormatModel(s sbom.SBOM) model.Document { +func toFormatModel(s sbom.SBOM) model.Document { src, err := toSourceModel(s.Source) if err != nil { log.Warnf("unable to create syft-json source object: %+v", err) diff --git a/syft/artifact/id.go b/syft/artifact/id.go index 50498467c..fc624a7eb 100644 --- a/syft/artifact/id.go +++ b/syft/artifact/id.go @@ -13,7 +13,7 @@ type Identifiable interface { ID() ID } -func IDFromHash(obj interface{}) (ID, error) { +func IDByHash(obj interface{}) (ID, error) { f, err := hashstructure.Hash(obj, hashstructure.FormatV2, &hashstructure.HashOptions{ ZeroNil: true, SlicesAsSets: true, diff --git a/syft/pkg/cargo_package_metadata.go b/syft/pkg/cargo_package_metadata.go index c56de29eb..8ab6da20e 100644 --- a/syft/pkg/cargo_package_metadata.go +++ b/syft/pkg/cargo_package_metadata.go @@ -9,8 +9,8 @@ type CargoPackageMetadata struct { } // Pkg returns the standard `pkg.Package` representation of the package referenced within the Cargo.lock metadata. -func (p CargoPackageMetadata) Pkg() Package { - return Package{ +func (p CargoPackageMetadata) Pkg() *Package { + return &Package{ Name: p.Name, Version: p.Version, Language: Rust, diff --git a/syft/pkg/catalog.go b/syft/pkg/catalog.go index d0008990a..f2f267eb8 100644 --- a/syft/pkg/catalog.go +++ b/syft/pkg/catalog.go @@ -51,6 +51,7 @@ func (c *Catalog) Package(id artifact.ID) *Package { log.Warnf("unable to copy package id=%q name=%q: %+v", id, v.Name, err) return nil } + p.id = v.id return &p } @@ -75,8 +76,12 @@ func (c *Catalog) Add(p Package) { c.lock.Lock() defer c.lock.Unlock() - // note: since we are capturing the ID, we cannot modify the package being added from this point forward id := p.ID() + if id == "" { + log.Warnf("found package with empty ID while adding to the catalog: %+v", p) + p.SetID() + id = p.ID() + } // store by package ID c.byID[id] = p @@ -142,7 +147,7 @@ func (c *Catalog) Sorted(types ...Type) (pkgs []Package) { sort.SliceStable(pkgs, func(i, j int) bool { if pkgs[i].Name == pkgs[j].Name { if pkgs[i].Version == pkgs[j].Version { - if pkgs[i].Type == pkgs[j].Type { + if pkgs[i].Type == pkgs[j].Type && len(pkgs[i].Locations) > 0 && len(pkgs[j].Locations) > 0 { return pkgs[i].Locations[0].String() < pkgs[j].Locations[0].String() } return pkgs[i].Type < pkgs[j].Type diff --git a/syft/pkg/catalog_test.go b/syft/pkg/catalog_test.go index 5700bf813..005714c0c 100644 --- a/syft/pkg/catalog_test.go +++ b/syft/pkg/catalog_test.go @@ -10,23 +10,6 @@ import ( "github.com/anchore/syft/syft/source" ) -var catalogAddAndRemoveTestPkgs = []Package{ - { - Locations: []source.Location{ - source.NewVirtualLocation("/a/path", "/another/path"), - source.NewVirtualLocation("/b/path", "/bee/path"), - }, - Type: RpmPkg, - }, - { - Locations: []source.Location{ - source.NewVirtualLocation("/c/path", "/another/path"), - source.NewVirtualLocation("/d/path", "/another/path"), - }, - Type: NpmPkg, - }, -} - type expectedIndexes struct { byType map[Type]*strset.Set byPath map[string]*strset.Set @@ -34,18 +17,38 @@ type expectedIndexes struct { func TestCatalogAddPopulatesIndex(t *testing.T) { + var pkgs = []Package{ + { + Locations: []source.Location{ + source.NewVirtualLocation("/a/path", "/another/path"), + source.NewVirtualLocation("/b/path", "/bee/path"), + }, + Type: RpmPkg, + }, + { + Locations: []source.Location{ + source.NewVirtualLocation("/c/path", "/another/path"), + source.NewVirtualLocation("/d/path", "/another/path"), + }, + Type: NpmPkg, + }, + } + + for i := range pkgs { + p := &pkgs[i] + p.SetID() + } + fixtureID := func(i int) string { - return string(catalogAddAndRemoveTestPkgs[i].ID()) + return string(pkgs[i].ID()) } tests := []struct { name string - pkgs []Package expectedIndexes expectedIndexes }{ { name: "vanilla-add", - pkgs: catalogAddAndRemoveTestPkgs, expectedIndexes: expectedIndexes{ byType: map[Type]*strset.Set{ RpmPkg: strset.New(fixtureID(0)), @@ -65,7 +68,7 @@ func TestCatalogAddPopulatesIndex(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - c := NewCatalog(test.pkgs...) + c := NewCatalog(pkgs...) assertIndexes(t, c, test.expectedIndexes) @@ -75,9 +78,7 @@ func TestCatalogAddPopulatesIndex(t *testing.T) { func assertIndexes(t *testing.T, c *Catalog, expectedIndexes expectedIndexes) { // assert path index - if len(c.idsByPath) != len(expectedIndexes.byPath) { - t.Errorf("unexpected path index length: %d != %d", len(c.idsByPath), len(expectedIndexes.byPath)) - } + assert.Len(t, c.idsByPath, len(expectedIndexes.byPath), "unexpected path index length") for path, expectedIds := range expectedIndexes.byPath { actualIds := strset.New() for _, p := range c.PackagesByPath(path) { @@ -90,9 +91,7 @@ func assertIndexes(t *testing.T, c *Catalog, expectedIndexes expectedIndexes) { } // assert type index - if len(c.idsByType) != len(expectedIndexes.byType) { - t.Errorf("unexpected type index length: %d != %d", len(c.idsByType), len(expectedIndexes.byType)) - } + assert.Len(t, c.idsByType, len(expectedIndexes.byType), "unexpected type index length") for ty, expectedIds := range expectedIndexes.byType { actualIds := strset.New() for p := range c.Enumerate(ty) { diff --git a/syft/pkg/cataloger/apkdb/parse_apk_db.go b/syft/pkg/cataloger/apkdb/parse_apk_db.go index 835822f5d..26d359b70 100644 --- a/syft/pkg/cataloger/apkdb/parse_apk_db.go +++ b/syft/pkg/cataloger/apkdb/parse_apk_db.go @@ -21,14 +21,25 @@ import ( // integrity check var _ common.ParserFn = parseApkDB +func newApkDBPackage(d *pkg.ApkMetadata) *pkg.Package { + return &pkg.Package{ + Name: d.Package, + Version: d.Version, + Licenses: strings.Split(d.License, " "), + Type: pkg.ApkPkg, + MetadataType: pkg.ApkMetadataType, + Metadata: *d, + } +} + // parseApkDb parses individual packages from a given Alpine DB file. For more information on specific fields // see https://wiki.alpinelinux.org/wiki/Apk_spec . -func parseApkDB(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { +func parseApkDB(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { // larger capacity for the scanner. const maxScannerCapacity = 1024 * 1024 // a new larger buffer for the scanner bufScan := make([]byte, maxScannerCapacity) - packages := make([]pkg.Package, 0) + packages := make([]*pkg.Package, 0) scanner := bufio.NewScanner(reader) scanner.Buffer(bufScan, maxScannerCapacity) @@ -52,14 +63,7 @@ func parseApkDB(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relations return nil, nil, err } if metadata != nil { - packages = append(packages, pkg.Package{ - Name: metadata.Package, - Version: metadata.Version, - Licenses: strings.Split(metadata.License, " "), - Type: pkg.ApkPkg, - MetadataType: pkg.ApkMetadataType, - Metadata: *metadata, - }) + packages = append(packages, newApkDBPackage(metadata)) } } diff --git a/syft/pkg/cataloger/apkdb/parse_apk_db_test.go b/syft/pkg/cataloger/apkdb/parse_apk_db_test.go index 13926274e..958c1f870 100644 --- a/syft/pkg/cataloger/apkdb/parse_apk_db_test.go +++ b/syft/pkg/cataloger/apkdb/parse_apk_db_test.go @@ -647,11 +647,11 @@ func TestSinglePackageDetails(t *testing.T) { func TestMultiplePackages(t *testing.T) { tests := []struct { fixture string - expected []pkg.Package + expected []*pkg.Package }{ { fixture: "test-fixtures/multiple", - expected: []pkg.Package{ + expected: []*pkg.Package{ { Name: "libc-utils", Version: "0.7.2-r0", diff --git a/syft/pkg/cataloger/catalog.go b/syft/pkg/cataloger/catalog.go index 5093fa965..568b08766 100644 --- a/syft/pkg/cataloger/catalog.go +++ b/syft/pkg/cataloger/catalog.go @@ -49,10 +49,10 @@ func Catalog(resolver source.FileResolver, theDistro *distro.Distro, catalogers // perform analysis, accumulating errors for each failed analysis var errs error - for _, theCataloger := range catalogers { + for _, c := range catalogers { // find packages from the underlying raw data - log.Debugf("cataloging with %q", theCataloger.Name()) - packages, relationships, err := theCataloger.Catalog(resolver) + log.Debugf("cataloging with %q", c.Name()) + packages, relationships, err := c.Catalog(resolver) if err != nil { errs = multierror.Append(errs, err) continue @@ -64,10 +64,10 @@ func Catalog(resolver source.FileResolver, theDistro *distro.Distro, catalogers packagesDiscovered.N += int64(catalogedPackages) for _, p := range packages { - // generate CPEs + // generate CPEs (note: this is excluded from package ID, so is safe to mutate) p.CPEs = cpe.Generate(p) - // generate PURL + // generate PURL (note: this is excluded from package ID, so is safe to mutate) p.PURL = generatePackageURL(p, theDistro) // create file-to-package relationships for files owned by the package diff --git a/syft/pkg/cataloger/common/generic_cataloger.go b/syft/pkg/cataloger/common/generic_cataloger.go index 9035bda7a..7d2e3d477 100644 --- a/syft/pkg/cataloger/common/generic_cataloger.go +++ b/syft/pkg/cataloger/common/generic_cataloger.go @@ -45,22 +45,23 @@ func (c *GenericCataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, contentReader, err := resolver.FileContentsByLocation(location) if err != nil { // TODO: fail or log? - return nil, nil, fmt.Errorf("unable to fetch contents for location=%v : %w", location, err) + return nil, nil, fmt.Errorf("unable to fetch contents at location=%v: %w", location, err) } discoveredPackages, discoveredRelationships, err := parser(location.RealPath, contentReader) internal.CloseAndLogError(contentReader, location.VirtualPath) if err != nil { // TODO: should we fail? or only log? - log.Warnf("cataloger '%s' failed to parse entries (location=%+v): %+v", c.upstreamCataloger, location, err) + log.Warnf("cataloger '%s' failed to parse entries at location=%+v: %+v", c.upstreamCataloger, location, err) continue } for _, p := range discoveredPackages { p.FoundBy = c.upstreamCataloger p.Locations = append(p.Locations, location) + p.SetID() - packages = append(packages, p) + packages = append(packages, *p) } relationships = append(relationships, discoveredRelationships...) diff --git a/syft/pkg/cataloger/common/generic_cataloger_test.go b/syft/pkg/cataloger/common/generic_cataloger_test.go index 36a0cb61a..68dfe6fd1 100644 --- a/syft/pkg/cataloger/common/generic_cataloger_test.go +++ b/syft/pkg/cataloger/common/generic_cataloger_test.go @@ -13,12 +13,12 @@ import ( "github.com/anchore/syft/syft/source" ) -func parser(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { +func parser(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { contents, err := ioutil.ReadAll(reader) if err != nil { panic(err) } - return []pkg.Package{ + return []*pkg.Package{ { Name: string(contents), }, diff --git a/syft/pkg/cataloger/common/parser.go b/syft/pkg/cataloger/common/parser.go index b2094f276..b79834dbd 100644 --- a/syft/pkg/cataloger/common/parser.go +++ b/syft/pkg/cataloger/common/parser.go @@ -8,4 +8,4 @@ import ( ) // ParserFn standardizes a function signature for parser functions that accept the virtual file path (not usable for file reads) and contents and return any discovered packages from that file -type ParserFn func(string, io.Reader) ([]pkg.Package, []artifact.Relationship, error) +type ParserFn func(string, io.Reader) ([]*pkg.Package, []artifact.Relationship, error) diff --git a/syft/pkg/cataloger/deb/cataloger.go b/syft/pkg/cataloger/deb/cataloger.go index 418533834..ea431a11b 100644 --- a/syft/pkg/cataloger/deb/cataloger.go +++ b/syft/pkg/cataloger/deb/cataloger.go @@ -68,6 +68,8 @@ func (c *Cataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, []arti // fetch additional data from the copyright file to derive the license information addLicenses(resolver, dbLocation, p) + + p.SetID() } allPackages = append(allPackages, pkgs...) diff --git a/syft/pkg/cataloger/deb/parse_dpkg_status.go b/syft/pkg/cataloger/deb/parse_dpkg_status.go index a5338640f..c2f84c115 100644 --- a/syft/pkg/cataloger/deb/parse_dpkg_status.go +++ b/syft/pkg/cataloger/deb/parse_dpkg_status.go @@ -20,6 +20,16 @@ var ( sourceRegexp = regexp.MustCompile(`(?P\S+)( \((?P.*)\))?`) ) +func newDpkgPackage(d pkg.DpkgMetadata) pkg.Package { + return pkg.Package{ + Name: d.Package, + Version: d.Version, + Type: pkg.DebPkg, + MetadataType: pkg.DpkgMetadataType, + Metadata: d, + } +} + // parseDpkgStatus is a parser function for Debian DB status contents, returning all Debian packages listed. func parseDpkgStatus(reader io.Reader) ([]pkg.Package, error) { buffedReader := bufio.NewReader(reader) @@ -37,13 +47,7 @@ func parseDpkgStatus(reader io.Reader) ([]pkg.Package, error) { } if entry.Package != "" { - packages = append(packages, pkg.Package{ - Name: entry.Package, - Version: entry.Version, - Type: pkg.DebPkg, - MetadataType: pkg.DpkgMetadataType, - Metadata: entry, - }) + packages = append(packages, newDpkgPackage(entry)) } } diff --git a/syft/pkg/cataloger/golang/parse_go_bin.go b/syft/pkg/cataloger/golang/parse_go_bin.go index 8992efce6..bcb33abda 100644 --- a/syft/pkg/cataloger/golang/parse_go_bin.go +++ b/syft/pkg/cataloger/golang/parse_go_bin.go @@ -17,6 +17,28 @@ const ( type exeOpener func(file io.ReadCloser) ([]exe, error) +func newGoBinaryPackage(name, version, h1Digest, goVersion, architecture string, location source.Location) pkg.Package { + p := pkg.Package{ + Name: name, + Version: version, + Language: pkg.Go, + Type: pkg.GoModulePkg, + Locations: []source.Location{ + location, + }, + MetadataType: pkg.GolangBinMetadataType, + Metadata: pkg.GolangBinMetadata{ + GoCompiledVersion: goVersion, + H1Digest: h1Digest, + Architecture: architecture, + }, + } + + p.SetID() + + return p +} + func parseGoBin(location source.Location, reader io.ReadCloser, opener exeOpener) (pkgs []pkg.Package, err error) { var exes []exe // it has been found that there are stdlib paths within openExe that can panic. We want to prevent this behavior @@ -54,21 +76,10 @@ func buildGoPkgInfo(location source.Location, mod, goVersion, arch string) []pkg } if fields[0] == packageIdentifier || fields[0] == replaceIdentifier { - pkgsSlice = append(pkgsSlice, pkg.Package{ - Name: fields[1], - Version: fields[2], - Language: pkg.Go, - Type: pkg.GoModulePkg, - Locations: []source.Location{ - location, - }, - MetadataType: pkg.GolangBinMetadataType, - Metadata: pkg.GolangBinMetadata{ - GoCompiledVersion: goVersion, - H1Digest: fields[3], - Architecture: arch, - }, - }) + name := fields[1] + version := fields[2] + h1Digest := fields[3] + pkgsSlice = append(pkgsSlice, newGoBinaryPackage(name, version, h1Digest, goVersion, arch, location)) } } diff --git a/syft/pkg/cataloger/golang/parse_go_bin_test.go b/syft/pkg/cataloger/golang/parse_go_bin_test.go index b1d645036..7bd53b35c 100644 --- a/syft/pkg/cataloger/golang/parse_go_bin_test.go +++ b/syft/pkg/cataloger/golang/parse_go_bin_test.go @@ -147,6 +147,10 @@ func TestBuildGoPkgInfo(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { + for i := range test.expected { + p := &test.expected[i] + p.SetID() + } location := source.Location{ Coordinates: source.Coordinates{ RealPath: "/a-path", diff --git a/syft/pkg/cataloger/golang/parse_go_mod.go b/syft/pkg/cataloger/golang/parse_go_mod.go index c7cc49e5a..ab2845e94 100644 --- a/syft/pkg/cataloger/golang/parse_go_mod.go +++ b/syft/pkg/cataloger/golang/parse_go_mod.go @@ -12,8 +12,8 @@ import ( ) // parseGoMod takes a go.mod and lists all packages discovered. -func parseGoMod(path string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { - packages := make(map[string]pkg.Package) +func parseGoMod(path string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { + packages := make(map[string]*pkg.Package) contents, err := ioutil.ReadAll(reader) if err != nil { @@ -26,7 +26,7 @@ func parseGoMod(path string, reader io.Reader) ([]pkg.Package, []artifact.Relati } for _, m := range file.Require { - packages[m.Mod.Path] = pkg.Package{ + packages[m.Mod.Path] = &pkg.Package{ Name: m.Mod.Path, Version: m.Mod.Version, Language: pkg.Go, @@ -36,7 +36,7 @@ func parseGoMod(path string, reader io.Reader) ([]pkg.Package, []artifact.Relati // remove any old packages and replace with new ones... for _, m := range file.Replace { - packages[m.New.Path] = pkg.Package{ + packages[m.New.Path] = &pkg.Package{ Name: m.New.Path, Version: m.New.Version, Language: pkg.Go, @@ -49,7 +49,7 @@ func parseGoMod(path string, reader io.Reader) ([]pkg.Package, []artifact.Relati delete(packages, m.Mod.Path) } - pkgsSlice := make([]pkg.Package, len(packages)) + pkgsSlice := make([]*pkg.Package, len(packages)) idx := 0 for _, p := range packages { pkgsSlice[idx] = p diff --git a/syft/pkg/cataloger/golang/parse_go_mod_test.go b/syft/pkg/cataloger/golang/parse_go_mod_test.go index 54d5b5465..f685024b9 100644 --- a/syft/pkg/cataloger/golang/parse_go_mod_test.go +++ b/syft/pkg/cataloger/golang/parse_go_mod_test.go @@ -87,7 +87,7 @@ func TestParseGoMod(t *testing.T) { continue } - diffs := deep.Equal(a, e) + diffs := deep.Equal(a, &e) if len(diffs) > 0 { t.Errorf("diffs found for %q", a.Name) for _, d := range diffs { diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index 7e188dce9..14067b593 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -35,7 +35,7 @@ type archiveParser struct { } // parseJavaArchive is a parser function for java archive contents, returning all Java libraries and nested archives. -func parseJavaArchive(virtualPath string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { +func parseJavaArchive(virtualPath string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { parser, cleanupFn, err := newJavaArchiveParser(virtualPath, reader, true) // note: even on error, we should always run cleanup functions defer cleanupFn() @@ -81,8 +81,8 @@ func newJavaArchiveParser(virtualPath string, reader io.Reader, detectNested boo } // parse the loaded archive and return all packages found. -func (j *archiveParser) parse() ([]pkg.Package, []artifact.Relationship, error) { - var pkgs []pkg.Package +func (j *archiveParser) parse() ([]*pkg.Package, []artifact.Relationship, error) { + var pkgs []*pkg.Package var relationships []artifact.Relationship // find the parent package from the java manifest @@ -110,7 +110,7 @@ func (j *archiveParser) parse() ([]pkg.Package, []artifact.Relationship, error) // lastly, add the parent package to the list (assuming the parent exists) if parentPkg != nil { - pkgs = append([]pkg.Package{*parentPkg}, pkgs...) + pkgs = append([]*pkg.Package{parentPkg}, pkgs...) } return pkgs, relationships, nil @@ -158,12 +158,12 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) { // parent package, returning all listed Java packages found for each pom // properties discovered and potentially updating the given parentPkg with new // data. -func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([]pkg.Package, error) { +func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([]*pkg.Package, error) { if parentPkg == nil { return nil, nil } - var pkgs []pkg.Package + var pkgs []*pkg.Package properties, err := pomPropertiesByParentPath(j.archivePath, j.fileManifest.GlobMatch(pomPropertiesGlob), j.virtualPath) if err != nil { @@ -183,7 +183,7 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([ pkgFromPom := newPackageFromMavenData(propertiesObj, pomProject, parentPkg, j.virtualPath) if pkgFromPom != nil { - pkgs = append(pkgs, *pkgFromPom) + pkgs = append(pkgs, pkgFromPom) } } @@ -192,8 +192,8 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([ // discoverPkgsFromNestedArchives finds Java archives within Java archives, returning all listed Java packages found and // associating each discovered package to the given parent package. -func (j *archiveParser) discoverPkgsFromNestedArchives(parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) { - var pkgs []pkg.Package +func (j *archiveParser) discoverPkgsFromNestedArchives(parentPkg *pkg.Package) ([]*pkg.Package, []artifact.Relationship, error) { + var pkgs []*pkg.Package var relationships []artifact.Relationship // search and parse pom.properties files & fetch the contents diff --git a/syft/pkg/cataloger/java/archive_parser_test.go b/syft/pkg/cataloger/java/archive_parser_test.go index 64e51b8e9..68296ec28 100644 --- a/syft/pkg/cataloger/java/archive_parser_test.go +++ b/syft/pkg/cataloger/java/archive_parser_test.go @@ -257,7 +257,7 @@ func TestParseJar(t *testing.T) { var parent *pkg.Package for _, a := range actual { if strings.Contains(a.Name, "example-") { - parent = &a + parent = a } } @@ -292,7 +292,7 @@ func TestParseJar(t *testing.T) { // write censored data back a.Metadata = metadata - diffs := deep.Equal(e, a) + diffs := deep.Equal(&e, a) if len(diffs) > 0 { t.Errorf("diffs found for %q", a.Name) for _, d := range diffs { @@ -527,7 +527,7 @@ func TestParseNestedJar(t *testing.T) { actualNameVersionPairSet := internal.NewStringSet() for _, a := range actual { - key := makeKey(&a) + key := makeKey(a) actualNameVersionPairSet.Add(key) if !expectedNameVersionPairSet.Contains(key) { t.Errorf("extra package: %s", a) @@ -545,7 +545,7 @@ func TestParseNestedJar(t *testing.T) { } for _, a := range actual { - actualKey := makeKey(&a) + actualKey := makeKey(a) metadata := a.Metadata.(pkg.JavaMetadata) if actualKey == "spring-boot|0.0.1-SNAPSHOT" { diff --git a/syft/pkg/cataloger/javascript/parse_package_json.go b/syft/pkg/cataloger/javascript/parse_package_json.go index eb3fdd3dc..9b0486d2c 100644 --- a/syft/pkg/cataloger/javascript/parse_package_json.go +++ b/syft/pkg/cataloger/javascript/parse_package_json.go @@ -50,7 +50,6 @@ type Repository struct { // ---> name: "Isaac Z. Schlueter" email: "i@izs.me" url: "http://blog.izs.me" var authorPattern = regexp.MustCompile(`^\s*(?P[^<(]*)(\s+<(?P.*)>)?(\s\((?P.*)\))?\s*$`) -// Exports Author.UnmarshalJSON interface to help normalize the json structure. func (a *Author) UnmarshalJSON(b []byte) error { var authorStr string var fields map[string]string @@ -135,7 +134,7 @@ func licenseFromJSON(b []byte) (string, error) { return "", errors.New("unable to unmarshal license field as either string or object") } -func licensesFromJSON(p PackageJSON) ([]string, error) { +func (p PackageJSON) licensesFromJSON() ([]string, error) { if p.License == nil && p.Licenses == nil { // This package.json doesn't specify any licenses whatsoever return []string{}, nil @@ -163,8 +162,8 @@ func licensesFromJSON(p PackageJSON) ([]string, error) { } // parsePackageJSON parses a package.json and returns the discovered JavaScript packages. -func parsePackageJSON(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { - var packages []pkg.Package +func parsePackageJSON(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { + var packages []*pkg.Package dec := json.NewDecoder(reader) for { @@ -180,30 +179,34 @@ func parsePackageJSON(_ string, reader io.Reader) ([]pkg.Package, []artifact.Rel return nil, nil, nil } - licenses, err := licensesFromJSON(p) - if err != nil { - return nil, nil, fmt.Errorf("failed to parse package.json file: %w", err) - } - - packages = append(packages, pkg.Package{ - Name: p.Name, - Version: p.Version, - Licenses: licenses, - Language: pkg.JavaScript, - Type: pkg.NpmPkg, - MetadataType: pkg.NpmPackageJSONMetadataType, - Metadata: pkg.NpmPackageJSONMetadata{ - Author: p.Author.AuthorString(), - Homepage: p.Homepage, - URL: p.Repository.URL, - Licenses: licenses, - }, - }) + packages = append(packages, newPackageJSONPackage(p)) } return packages, nil, nil } +func newPackageJSONPackage(p PackageJSON) *pkg.Package { + licenses, err := p.licensesFromJSON() + if err != nil { + log.Warnf("unable to extract licenses from javascript package.json: %+v", err) + } + + return &pkg.Package{ + Name: p.Name, + Version: p.Version, + Licenses: licenses, + Language: pkg.JavaScript, + Type: pkg.NpmPkg, + MetadataType: pkg.NpmPackageJSONMetadataType, + Metadata: pkg.NpmPackageJSONMetadata{ + Author: p.Author.AuthorString(), + Homepage: p.Homepage, + URL: p.Repository.URL, + Licenses: licenses, + }, + } +} + func (p PackageJSON) hasNameAndVersionValues() bool { return p.Name != "" && p.Version != "" } diff --git a/syft/pkg/cataloger/javascript/parse_package_json_test.go b/syft/pkg/cataloger/javascript/parse_package_json_test.go index c162e97ed..99861a111 100644 --- a/syft/pkg/cataloger/javascript/parse_package_json_test.go +++ b/syft/pkg/cataloger/javascript/parse_package_json_test.go @@ -136,7 +136,7 @@ func TestParsePackageJSON(t *testing.T) { t.Fatalf("unexpected package count: %d!=1", len(actual)) } - for _, d := range deep.Equal(actual[0], test.ExpectedPkg) { + for _, d := range deep.Equal(actual[0], &test.ExpectedPkg) { t.Errorf("diff: %+v", d) } diff --git a/syft/pkg/cataloger/javascript/parse_package_lock.go b/syft/pkg/cataloger/javascript/parse_package_lock.go index 880674963..30c2897b0 100644 --- a/syft/pkg/cataloger/javascript/parse_package_lock.go +++ b/syft/pkg/cataloger/javascript/parse_package_lock.go @@ -29,14 +29,14 @@ type Dependency struct { } // parsePackageLock parses a package-lock.json and returns the discovered JavaScript packages. -func parsePackageLock(path string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { +func parsePackageLock(path string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { // in the case we find package-lock.json files in the node_modules directories, skip those // as the whole purpose of the lock file is for the specific dependencies of the root project if pathContainsNodeModulesDirectory(path) { return nil, nil, nil } - var packages []pkg.Package + var packages []*pkg.Package dec := json.NewDecoder(reader) for { @@ -47,7 +47,7 @@ func parsePackageLock(path string, reader io.Reader) ([]pkg.Package, []artifact. return nil, nil, fmt.Errorf("failed to parse package-lock.json file: %w", err) } for name, pkgMeta := range lock.Dependencies { - packages = append(packages, pkg.Package{ + packages = append(packages, &pkg.Package{ Name: name, Version: pkgMeta.Version, Language: pkg.JavaScript, diff --git a/syft/pkg/cataloger/javascript/parse_package_lock_test.go b/syft/pkg/cataloger/javascript/parse_package_lock_test.go index 0f33aaa83..ab88e2f16 100644 --- a/syft/pkg/cataloger/javascript/parse_package_lock_test.go +++ b/syft/pkg/cataloger/javascript/parse_package_lock_test.go @@ -4,10 +4,12 @@ import ( "os" "testing" + "github.com/stretchr/testify/assert" + "github.com/anchore/syft/syft/pkg" ) -func assertPkgsEqual(t *testing.T, actual []pkg.Package, expected map[string]pkg.Package) { +func assertPkgsEqual(t *testing.T, actual []*pkg.Package, expected map[string]pkg.Package) { t.Helper() if len(actual) != len(expected) { for _, a := range actual { @@ -18,26 +20,11 @@ func assertPkgsEqual(t *testing.T, actual []pkg.Package, expected map[string]pkg for _, a := range actual { expectedPkg, ok := expected[a.Name] - if !ok { - t.Errorf("unexpected package found: '%s'", a.Name) - } - - if expectedPkg.Version != a.Version { - t.Errorf("%s : unexpected package version: '%s', expected: '%s'", a.Name, a.Version, expectedPkg.Version) - } - - if a.Language != expectedPkg.Language { - t.Errorf("%s : bad language: '%+v', expected: '%+v'", a.Name, a.Language, expectedPkg.Language) - } - - if a.Type != expectedPkg.Type { - t.Errorf("%s : bad package type: %+v, expected: %+v", a.Name, a.Type, expectedPkg.Type) - } - - if len(a.Licenses) < len(expectedPkg.Licenses) { - t.Errorf("%s : bad package licenses count: '%+v'", a.Name, a.Licenses) - } - + assert.True(t, ok) + assert.Equal(t, expectedPkg.Version, a.Version, "bad version") + assert.Equal(t, expectedPkg.Language, a.Language, "bad language") + assert.Equal(t, expectedPkg.Type, a.Type, "bad type") + assert.Equal(t, expectedPkg.Licenses, a.Licenses, "bad license count") } } diff --git a/syft/pkg/cataloger/javascript/parse_yarn_lock.go b/syft/pkg/cataloger/javascript/parse_yarn_lock.go index 71e9dd3da..7fad405c9 100644 --- a/syft/pkg/cataloger/javascript/parse_yarn_lock.go +++ b/syft/pkg/cataloger/javascript/parse_yarn_lock.go @@ -35,14 +35,14 @@ const ( noVersion = "" ) -func parseYarnLock(path string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { +func parseYarnLock(path string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { // in the case we find yarn.lock files in the node_modules directories, skip those // as the whole purpose of the lock file is for the specific dependencies of the project if pathContainsNodeModulesDirectory(path) { return nil, nil, nil } - var packages []pkg.Package + var packages []*pkg.Package scanner := bufio.NewScanner(reader) parsedPackages := internal.NewStringSet() currentPackage := noPackage @@ -106,8 +106,8 @@ func findPackageVersion(line string) string { return noVersion } -func newYarnLockPackage(name, version string) pkg.Package { - return pkg.Package{ +func newYarnLockPackage(name, version string) *pkg.Package { + return &pkg.Package{ Name: name, Version: version, Language: pkg.JavaScript, diff --git a/syft/pkg/cataloger/php/parse_composer_lock.go b/syft/pkg/cataloger/php/parse_composer_lock.go index b92f4fa64..21052d9d9 100644 --- a/syft/pkg/cataloger/php/parse_composer_lock.go +++ b/syft/pkg/cataloger/php/parse_composer_lock.go @@ -8,7 +8,6 @@ import ( "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/pkg/cataloger/common" ) type ComposerLock struct { @@ -21,12 +20,9 @@ type Dependency struct { Version string `json:"version"` } -// integrity check -var _ common.ParserFn = parseComposerLock - // parseComposerLock is a parser function for Composer.lock contents, returning "Default" php packages discovered. -func parseComposerLock(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { - packages := make([]pkg.Package, 0) +func parseComposerLock(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { + packages := make([]*pkg.Package, 0) dec := json.NewDecoder(reader) for { @@ -39,7 +35,7 @@ func parseComposerLock(_ string, reader io.Reader) ([]pkg.Package, []artifact.Re for _, pkgMeta := range lock.Packages { version := pkgMeta.Version name := pkgMeta.Name - packages = append(packages, pkg.Package{ + packages = append(packages, &pkg.Package{ Name: name, Version: version, Language: pkg.PHP, diff --git a/syft/pkg/cataloger/php/parse_composer_lock_test.go b/syft/pkg/cataloger/php/parse_composer_lock_test.go index 16b97fc89..6e14dcd7f 100644 --- a/syft/pkg/cataloger/php/parse_composer_lock_test.go +++ b/syft/pkg/cataloger/php/parse_composer_lock_test.go @@ -9,7 +9,7 @@ import ( ) func TestParseComposerFileLock(t *testing.T) { - expected := []pkg.Package{ + expected := []*pkg.Package{ { Name: "adoy/fastcgi-client", Version: "1.0.2", diff --git a/syft/pkg/cataloger/php/parse_installed_json.go b/syft/pkg/cataloger/php/parse_installed_json.go index 63272d3f2..178dcfa2b 100644 --- a/syft/pkg/cataloger/php/parse_installed_json.go +++ b/syft/pkg/cataloger/php/parse_installed_json.go @@ -40,8 +40,8 @@ func (w *installedJSONComposerV2) UnmarshalJSON(data []byte) error { var _ common.ParserFn = parseComposerLock // parseComposerLock is a parser function for Composer.lock contents, returning "Default" php packages discovered. -func parseInstalledJSON(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { - packages := make([]pkg.Package, 0) +func parseInstalledJSON(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { + packages := make([]*pkg.Package, 0) dec := json.NewDecoder(reader) for { @@ -54,7 +54,7 @@ func parseInstalledJSON(_ string, reader io.Reader) ([]pkg.Package, []artifact.R for _, pkgMeta := range lock.Packages { version := pkgMeta.Version name := pkgMeta.Name - packages = append(packages, pkg.Package{ + packages = append(packages, &pkg.Package{ Name: name, Version: version, Language: pkg.PHP, diff --git a/syft/pkg/cataloger/php/parse_installed_json_test.go b/syft/pkg/cataloger/php/parse_installed_json_test.go index bc9221548..d553a51f4 100644 --- a/syft/pkg/cataloger/php/parse_installed_json_test.go +++ b/syft/pkg/cataloger/php/parse_installed_json_test.go @@ -9,7 +9,7 @@ import ( ) func TestParseInstalledJsonComposerV1(t *testing.T) { - expected := []pkg.Package{ + expected := []*pkg.Package{ { Name: "asm89/stack-cors", Version: "1.3.0", @@ -41,7 +41,7 @@ func TestParseInstalledJsonComposerV1(t *testing.T) { } func TestParseInstalledJsonComposerV2(t *testing.T) { - expected := []pkg.Package{ + expected := []*pkg.Package{ { Name: "asm89/stack-cors", Version: "1.3.0", diff --git a/syft/pkg/cataloger/python/package_cataloger.go b/syft/pkg/cataloger/python/package_cataloger.go index d4749c874..f8a3a1228 100644 --- a/syft/pkg/cataloger/python/package_cataloger.go +++ b/syft/pkg/cataloger/python/package_cataloger.go @@ -74,7 +74,7 @@ func (c *PackageCataloger) catalogEggOrWheel(resolver source.FileResolver, metad licenses = []string{metadata.License} } - return &pkg.Package{ + p := &pkg.Package{ Name: metadata.Name, Version: metadata.Version, FoundBy: c.Name(), @@ -84,7 +84,11 @@ func (c *PackageCataloger) catalogEggOrWheel(resolver source.FileResolver, metad Type: pkg.PythonPkg, MetadataType: pkg.PythonPackageMetadataType, Metadata: *metadata, - }, nil + } + + p.SetID() + + return p, nil } // fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained. diff --git a/syft/pkg/cataloger/python/parse_pipfile_lock.go b/syft/pkg/cataloger/python/parse_pipfile_lock.go index 3d34bbd0f..7ea963f47 100644 --- a/syft/pkg/cataloger/python/parse_pipfile_lock.go +++ b/syft/pkg/cataloger/python/parse_pipfile_lock.go @@ -38,8 +38,8 @@ type Dependency struct { var _ common.ParserFn = parsePipfileLock // parsePipfileLock is a parser function for Pipfile.lock contents, returning "Default" python packages discovered. -func parsePipfileLock(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { - packages := make([]pkg.Package, 0) +func parsePipfileLock(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { + packages := make([]*pkg.Package, 0) dec := json.NewDecoder(reader) for { @@ -51,7 +51,7 @@ func parsePipfileLock(_ string, reader io.Reader) ([]pkg.Package, []artifact.Rel } for name, pkgMeta := range lock.Default { version := strings.TrimPrefix(pkgMeta.Version, "==") - packages = append(packages, pkg.Package{ + packages = append(packages, &pkg.Package{ Name: name, Version: version, Language: pkg.Python, diff --git a/syft/pkg/cataloger/python/parse_poetry_lock.go b/syft/pkg/cataloger/python/parse_poetry_lock.go index 70ab2f471..8cae5ed9e 100644 --- a/syft/pkg/cataloger/python/parse_poetry_lock.go +++ b/syft/pkg/cataloger/python/parse_poetry_lock.go @@ -14,7 +14,7 @@ import ( var _ common.ParserFn = parsePoetryLock // parsePoetryLock is a parser function for poetry.lock contents, returning all python packages discovered. -func parsePoetryLock(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { +func parsePoetryLock(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { tree, err := toml.LoadReader(reader) if err != nil { return nil, nil, fmt.Errorf("unable to load poetry.lock for parsing: %v", err) diff --git a/syft/pkg/cataloger/python/parse_poetry_lock_test.go b/syft/pkg/cataloger/python/parse_poetry_lock_test.go index 5f30c213f..a582a8344 100644 --- a/syft/pkg/cataloger/python/parse_poetry_lock_test.go +++ b/syft/pkg/cataloger/python/parse_poetry_lock_test.go @@ -9,7 +9,7 @@ import ( ) func TestParsePoetryLock(t *testing.T) { - expected := []pkg.Package{ + expected := []*pkg.Package{ { Name: "added-value", Version: "0.14.2", diff --git a/syft/pkg/cataloger/python/parse_requirements.go b/syft/pkg/cataloger/python/parse_requirements.go index 9a7d403e6..e18810505 100644 --- a/syft/pkg/cataloger/python/parse_requirements.go +++ b/syft/pkg/cataloger/python/parse_requirements.go @@ -16,8 +16,8 @@ var _ common.ParserFn = parseRequirementsTxt // parseRequirementsTxt takes a Python requirements.txt file, returning all Python packages that are locked to a // specific version. -func parseRequirementsTxt(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { - packages := make([]pkg.Package, 0) +func parseRequirementsTxt(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { + packages := make([]*pkg.Package, 0) scanner := bufio.NewScanner(reader) for scanner.Scan() { @@ -44,7 +44,7 @@ func parseRequirementsTxt(_ string, reader io.Reader) ([]pkg.Package, []artifact parts := strings.Split(uncommented, "==") name := strings.TrimSpace(parts[0]) version := strings.TrimSpace(parts[1]) - packages = append(packages, pkg.Package{ + packages = append(packages, &pkg.Package{ Name: name, Version: version, Language: pkg.Python, diff --git a/syft/pkg/cataloger/python/parse_requirements_test.go b/syft/pkg/cataloger/python/parse_requirements_test.go index f84913ac0..a79ff6393 100644 --- a/syft/pkg/cataloger/python/parse_requirements_test.go +++ b/syft/pkg/cataloger/python/parse_requirements_test.go @@ -4,12 +4,14 @@ import ( "os" "testing" + "github.com/stretchr/testify/assert" + "github.com/go-test/deep" "github.com/anchore/syft/syft/pkg" ) -func assertPackagesEqual(t *testing.T, actual []pkg.Package, expected map[string]pkg.Package) { +func assertPackagesEqual(t *testing.T, actual []*pkg.Package, expected map[string]pkg.Package) { t.Helper() if len(actual) != len(expected) { for _, a := range actual { @@ -20,11 +22,9 @@ func assertPackagesEqual(t *testing.T, actual []pkg.Package, expected map[string for _, a := range actual { expectedPkg, ok := expected[a.Name] - if !ok { - t.Errorf("unexpected package found: '%s'", a.Name) - } + assert.True(t, ok) - for _, d := range deep.Equal(a, expectedPkg) { + for _, d := range deep.Equal(a, &expectedPkg) { t.Errorf("diff: %+v", d) } } diff --git a/syft/pkg/cataloger/python/parse_setup.go b/syft/pkg/cataloger/python/parse_setup.go index d21bbabba..5a762fcbf 100644 --- a/syft/pkg/cataloger/python/parse_setup.go +++ b/syft/pkg/cataloger/python/parse_setup.go @@ -20,8 +20,8 @@ var _ common.ParserFn = parseSetup // " mypy2 == v0.770", ' mypy3== v0.770', --> match(name=mypy2 version=v0.770), match(name=mypy3, version=v0.770) var pinnedDependency = regexp.MustCompile(`['"]\W?(\w+\W?==\W?[\w\.]*)`) -func parseSetup(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { - packages := make([]pkg.Package, 0) +func parseSetup(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { + packages := make([]*pkg.Package, 0) scanner := bufio.NewScanner(reader) @@ -38,7 +38,7 @@ func parseSetup(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relations name = strings.TrimSpace(name) version := strings.TrimSpace(parts[len(parts)-1]) - packages = append(packages, pkg.Package{ + packages = append(packages, &pkg.Package{ Name: strings.Trim(name, "'\""), Version: strings.Trim(version, "'\""), Language: pkg.Python, diff --git a/syft/pkg/cataloger/python/poetry_metadata.go b/syft/pkg/cataloger/python/poetry_metadata.go index 8b15dd7e0..f75180566 100644 --- a/syft/pkg/cataloger/python/poetry_metadata.go +++ b/syft/pkg/cataloger/python/poetry_metadata.go @@ -7,8 +7,8 @@ type PoetryMetadata struct { } // Pkgs returns all of the packages referenced within the poetry.lock metadata. -func (m PoetryMetadata) Pkgs() []pkg.Package { - pkgs := make([]pkg.Package, 0) +func (m PoetryMetadata) Pkgs() []*pkg.Package { + pkgs := make([]*pkg.Package, 0) for _, p := range m.Packages { pkgs = append(pkgs, p.Pkg()) diff --git a/syft/pkg/cataloger/python/poetry_metadata_package.go b/syft/pkg/cataloger/python/poetry_metadata_package.go index 5e9e454b1..db75c19f4 100644 --- a/syft/pkg/cataloger/python/poetry_metadata_package.go +++ b/syft/pkg/cataloger/python/poetry_metadata_package.go @@ -11,8 +11,8 @@ type PoetryMetadataPackage struct { } // Pkg returns the standard `pkg.Package` representation of the package referenced within the poetry.lock metadata. -func (p PoetryMetadataPackage) Pkg() pkg.Package { - return pkg.Package{ +func (p PoetryMetadataPackage) Pkg() *pkg.Package { + return &pkg.Package{ Name: p.Name, Version: p.Version, Language: pkg.Python, diff --git a/syft/pkg/cataloger/rpmdb/parse_rpmdb.go b/syft/pkg/cataloger/rpmdb/parse_rpmdb.go index 93220994b..74f694c09 100644 --- a/syft/pkg/cataloger/rpmdb/parse_rpmdb.go +++ b/syft/pkg/cataloger/rpmdb/parse_rpmdb.go @@ -70,6 +70,8 @@ func parseRpmDB(resolver source.FilePathResolver, dbLocation source.Location, re Metadata: metadata, } + p.SetID() + allPkgs = append(allPkgs, p) } diff --git a/syft/pkg/cataloger/ruby/catalogers.go b/syft/pkg/cataloger/ruby/catalogers.go index 6e7499fce..fe176bc6e 100644 --- a/syft/pkg/cataloger/ruby/catalogers.go +++ b/syft/pkg/cataloger/ruby/catalogers.go @@ -1,5 +1,5 @@ /* -Package bundler provides a concrete Cataloger implementation for Ruby Gemfile.lock bundler files. +Package ruby bundler provides a concrete Cataloger implementation for Ruby Gemfile.lock bundler files. */ package ruby diff --git a/syft/pkg/cataloger/ruby/parse_gemfile_lock.go b/syft/pkg/cataloger/ruby/parse_gemfile_lock.go index 2d2f62c42..6cf96676d 100644 --- a/syft/pkg/cataloger/ruby/parse_gemfile_lock.go +++ b/syft/pkg/cataloger/ruby/parse_gemfile_lock.go @@ -17,8 +17,8 @@ var _ common.ParserFn = parseGemFileLockEntries var sectionsOfInterest = internal.NewStringSetFromSlice([]string{"GEM"}) // parseGemFileLockEntries is a parser function for Gemfile.lock contents, returning all Gems discovered. -func parseGemFileLockEntries(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { - pkgs := make([]pkg.Package, 0) +func parseGemFileLockEntries(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { + pkgs := make([]*pkg.Package, 0) scanner := bufio.NewScanner(reader) var currentSection string @@ -41,7 +41,7 @@ func parseGemFileLockEntries(_ string, reader io.Reader) ([]pkg.Package, []artif if len(candidate) != 2 { continue } - pkgs = append(pkgs, pkg.Package{ + pkgs = append(pkgs, &pkg.Package{ Name: candidate[0], Version: strings.Trim(candidate[1], "()"), Language: pkg.Ruby, diff --git a/syft/pkg/cataloger/ruby/parse_gemspec.go b/syft/pkg/cataloger/ruby/parse_gemspec.go index afab064e7..e175c417b 100644 --- a/syft/pkg/cataloger/ruby/parse_gemspec.go +++ b/syft/pkg/cataloger/ruby/parse_gemspec.go @@ -61,8 +61,8 @@ func processList(s string) []string { return results } -func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { - var pkgs []pkg.Package +func parseGemSpecEntries(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { + var pkgs []*pkg.Package var fields = make(map[string]interface{}) scanner := bufio.NewScanner(reader) @@ -97,7 +97,7 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, []artifact. return nil, nil, fmt.Errorf("unable to decode gem metadata: %w", err) } - pkgs = append(pkgs, pkg.Package{ + pkgs = append(pkgs, &pkg.Package{ Name: metadata.Name, Version: metadata.Version, Licenses: metadata.Licenses, diff --git a/syft/pkg/cataloger/ruby/parse_gemspec_test.go b/syft/pkg/cataloger/ruby/parse_gemspec_test.go index d98cc4370..75e3fa24e 100644 --- a/syft/pkg/cataloger/ruby/parse_gemspec_test.go +++ b/syft/pkg/cataloger/ruby/parse_gemspec_test.go @@ -44,7 +44,7 @@ func TestParseGemspec(t *testing.T) { t.Fatalf("unexpected package count: %d!=1", len(actual)) } - for _, d := range deep.Equal(actual[0], expectedPkg) { + for _, d := range deep.Equal(actual[0], &expectedPkg) { t.Errorf("diff: %+v", d) } } diff --git a/syft/pkg/cataloger/rust/cargo_metadata.go b/syft/pkg/cataloger/rust/cargo_metadata.go index e1607ffc6..7b1563d83 100644 --- a/syft/pkg/cataloger/rust/cargo_metadata.go +++ b/syft/pkg/cataloger/rust/cargo_metadata.go @@ -7,8 +7,8 @@ type CargoMetadata struct { } // Pkgs returns all of the packages referenced within the Cargo.lock metadata. -func (m CargoMetadata) Pkgs() []pkg.Package { - pkgs := make([]pkg.Package, 0) +func (m CargoMetadata) Pkgs() []*pkg.Package { + pkgs := make([]*pkg.Package, 0) for _, p := range m.Packages { if p.Dependencies == nil { diff --git a/syft/pkg/cataloger/rust/parse_cargo_lock.go b/syft/pkg/cataloger/rust/parse_cargo_lock.go index ab5918978..87859e98c 100644 --- a/syft/pkg/cataloger/rust/parse_cargo_lock.go +++ b/syft/pkg/cataloger/rust/parse_cargo_lock.go @@ -14,7 +14,7 @@ import ( var _ common.ParserFn = parseCargoLock // parseCargoLock is a parser function for Cargo.lock contents, returning all rust cargo crates discovered. -func parseCargoLock(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { +func parseCargoLock(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { tree, err := toml.LoadReader(reader) if err != nil { return nil, nil, fmt.Errorf("unable to load Cargo.lock for parsing: %v", err) diff --git a/syft/pkg/cataloger/rust/parse_cargo_lock_test.go b/syft/pkg/cataloger/rust/parse_cargo_lock_test.go index c5c88ffa4..4bf2cd3e2 100644 --- a/syft/pkg/cataloger/rust/parse_cargo_lock_test.go +++ b/syft/pkg/cataloger/rust/parse_cargo_lock_test.go @@ -9,7 +9,7 @@ import ( ) func TestParseCargoLock(t *testing.T) { - expected := []pkg.Package{ + expected := []*pkg.Package{ { Name: "ansi_term", Version: "0.12.1", diff --git a/syft/pkg/package.go b/syft/pkg/package.go index d91b211cb..604432a86 100644 --- a/syft/pkg/package.go +++ b/syft/pkg/package.go @@ -14,6 +14,7 @@ import ( // Package represents an application or library that has been bundled into a distributable format. // TODO: if we ignore FoundBy for ID generation should we merge the field to show it was found in two places? type Package struct { + id artifact.ID `hash:"ignore"` Name string // the package name Version string // the version of the package FoundBy string // the specific cataloger that discovered this package @@ -21,24 +22,27 @@ type Package struct { Licenses []string // licenses discovered with the package metadata Language Language // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc) Type Type // the package type (e.g. Npm, Yarn, Python, Rpm, Deb, etc) - CPEs []CPE // all possible Common Platform Enumerators - PURL string // the Package URL (see https://github.com/package-url/purl-spec) + CPEs []CPE `hash:"ignore"` // all possible Common Platform Enumerators (note: this is NOT included in the definition of the ID since all fields on a CPE are derived from other fields) + PURL string `hash:"ignore"` // the Package URL (see https://github.com/package-url/purl-spec) (note: this is NOT included in the definition of the ID since all fields on a pURL are derived from other fields) MetadataType MetadataType // the shape of the additional data in the "metadata" field Metadata interface{} // additional data found while parsing the package source } -func (p Package) ID() artifact.ID { - f, err := artifact.IDFromHash(p) +func (p *Package) SetID() { + id, err := artifact.IDByHash(p) if err != nil { // TODO: what to do in this case? log.Warnf("unable to get fingerprint of package=%s@%s: %+v", p.Name, p.Version, err) - return "" + return } + p.id = id +} - return f +func (p Package) ID() artifact.ID { + return p.id } // Stringer to represent a package. func (p Package) String() string { - return fmt.Sprintf("Pkg(type=%s, name=%s, version=%s)", p.Type, p.Name, p.Version) + return fmt.Sprintf("Pkg(name=%q version=%q type=%q id=%q)", p.Name, p.Version, p.Type, p.id) } diff --git a/syft/pkg/package_test.go b/syft/pkg/package_test.go index 14483cc9a..228726a37 100644 --- a/syft/pkg/package_test.go +++ b/syft/pkg/package_test.go @@ -127,20 +127,20 @@ func TestFingerprint(t *testing.T) { expectIdentical: false, }, { - name: "CPEs is reflected", + name: "CPEs is ignored", transform: func(pkg Package) Package { pkg.CPEs = []CPE{} return pkg }, - expectIdentical: false, + expectIdentical: true, }, { - name: "pURL is reflected", + name: "pURL is ignored", transform: func(pkg Package) Package { pkg.PURL = "new!" return pkg }, - expectIdentical: false, + expectIdentical: true, }, { name: "language is reflected", @@ -190,7 +190,10 @@ func TestFingerprint(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { + originalPkg.SetID() transformedPkg := test.transform(originalPkg) + transformedPkg.SetID() + originalFingerprint := originalPkg.ID() assert.NotEmpty(t, originalFingerprint) transformedFingerprint := transformedPkg.ID() diff --git a/syft/pkg/relationships_by_file_ownership.go b/syft/pkg/relationships_by_file_ownership.go index 5c01c949d..62ba262c8 100644 --- a/syft/pkg/relationships_by_file_ownership.go +++ b/syft/pkg/relationships_by_file_ownership.go @@ -27,11 +27,11 @@ func RelationshipsByFileOwnership(catalog *Catalog) []artifact.Relationship { var relationships = findOwnershipByFilesRelationships(catalog) var edges []artifact.Relationship - for parent, children := range relationships { - for child, files := range children { + for parentID, children := range relationships { + for childID, files := range children { edges = append(edges, artifact.Relationship{ - From: catalog.byID[parent], - To: catalog.byID[child], + From: catalog.byID[parentID], + To: catalog.byID[childID], Type: artifact.OwnershipByFileOverlapRelationship, Data: ownershipByFilesMetadata{ Files: files.List(), diff --git a/syft/pkg/relationships_by_file_ownership_test.go b/syft/pkg/relationships_by_file_ownership_test.go index 7bae5f3e3..69867f270 100644 --- a/syft/pkg/relationships_by_file_ownership_test.go +++ b/syft/pkg/relationships_by_file_ownership_test.go @@ -32,6 +32,7 @@ func TestOwnershipByFilesRelationship(t *testing.T) { }, }, } + parent.SetID() child := Package{ Locations: []source.Location{ @@ -40,6 +41,7 @@ func TestOwnershipByFilesRelationship(t *testing.T) { }, Type: NpmPkg, } + child.SetID() relationship := artifact.Relationship{ From: parent, @@ -73,6 +75,7 @@ func TestOwnershipByFilesRelationship(t *testing.T) { }, }, } + parent.SetID() child := Package{ Locations: []source.Location{ @@ -81,6 +84,7 @@ func TestOwnershipByFilesRelationship(t *testing.T) { }, Type: NpmPkg, } + child.SetID() relationship := artifact.Relationship{ From: parent, @@ -114,6 +118,8 @@ func TestOwnershipByFilesRelationship(t *testing.T) { }, } + parent.SetID() + child := Package{ Locations: []source.Location{ source.NewVirtualLocation("/c/path", "/another/path"), @@ -122,6 +128,8 @@ func TestOwnershipByFilesRelationship(t *testing.T) { Type: NpmPkg, } + child.SetID() + return []Package{parent, child}, nil }, }, diff --git a/syft/source/coordinates.go b/syft/source/coordinates.go index 049f418b7..b97199588 100644 --- a/syft/source/coordinates.go +++ b/syft/source/coordinates.go @@ -27,7 +27,7 @@ func NewCoordinateSet(start ...Coordinates) CoordinateSet { } func (c Coordinates) ID() artifact.ID { - f, err := artifact.IDFromHash(c) + f, err := artifact.IDByHash(c) if err != nil { // TODO: what to do in this case? log.Warnf("unable to get fingerprint of location coordinate=%+v: %+v", c, err) diff --git a/syft/source/location.go b/syft/source/location.go index ccc0aa002..3b4c6ff1e 100644 --- a/syft/source/location.go +++ b/syft/source/location.go @@ -3,21 +3,19 @@ package source import ( "fmt" - "github.com/mitchellh/hashstructure/v2" - "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/image" "github.com/anchore/syft/internal/log" ) -var _ hashstructure.Hashable = (*Location)(nil) - // Location represents a path relative to a particular filesystem resolved to a specific file.Reference. This struct is used as a key // in content fetching to uniquely identify a file relative to a request (the VirtualPath). type Location struct { Coordinates - VirtualPath string // The path to the file which may or may not have hardlinks / symlinks - ref file.Reference // The file reference relative to the stereoscope.FileCatalog that has more information about this location. + // note: it is IMPORTANT to ignore anything but the coordinates for a Location when considering the ID (hash value) + // since the coordinates are the minimally correct ID for a location (symlinks should not come into play) + VirtualPath string `hash:"ignore"` // The path to the file which may or may not have hardlinks / symlinks + ref file.Reference `hash:"ignore"` // The file reference relative to the stereoscope.FileCatalog that has more information about this location. } // NewLocation creates a new Location representing a path without denoting a filesystem or FileCatalog reference. @@ -97,9 +95,3 @@ func (l Location) String() string { } return fmt.Sprintf("Location<%s>", str) } - -func (l Location) Hash() (uint64, error) { - // since location is part of the package definition it is important that only coordinates are used during object - // hashing. (Location hash should be a pass-through for the coordinates and not include ref or VirtualPath.) - return hashstructure.Hash(l.ID(), hashstructure.FormatV2, nil) -} diff --git a/syft/source/location_test.go b/syft/source/location_test.go new file mode 100644 index 000000000..eb3532058 --- /dev/null +++ b/syft/source/location_test.go @@ -0,0 +1,47 @@ +package source + +import ( + "testing" + + "github.com/anchore/stereoscope/pkg/file" + "github.com/stretchr/testify/assert" +) + +func TestLocation_ID(t *testing.T) { + tests := []struct { + name string + coordinates Coordinates + virtualPath string + ref file.Reference + }{ + { + name: "coordinates should match location hash", + coordinates: Coordinates{ + RealPath: "path!", + FileSystemID: "filesystem!", + }, + }, + { + name: "coordinates should match location hash (with extra fields)", + coordinates: Coordinates{ + RealPath: "path!", + FileSystemID: "filesystem!", + }, + virtualPath: "virtualPath!", + ref: file.Reference{ + RealPath: "other-real-path!", + }, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + l := Location{ + Coordinates: test.coordinates, + VirtualPath: test.virtualPath, + ref: test.ref, + } + assert.Equal(t, l.ID(), test.coordinates.ID()) + }) + } + +}