Performance improvements around package ID (#698)

* set package ID in catalogers and improve hashing performance

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* update setting ID + tests

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2021-12-16 08:55:53 -05:00 committed by GitHub
parent 727b84ce0d
commit a27907659d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
59 changed files with 319 additions and 243 deletions

View File

@ -3,7 +3,7 @@
"name": "/some/path", "name": "/some/path",
"spdxVersion": "SPDX-2.2", "spdxVersion": "SPDX-2.2",
"creationInfo": { "creationInfo": {
"created": "2021-12-01T15:08:29.469369Z", "created": "2021-12-15T23:56:14.459753Z",
"creators": [ "creators": [
"Organization: Anchore, Inc", "Organization: Anchore, Inc",
"Tool: syft-[not provided]" "Tool: syft-[not provided]"
@ -11,10 +11,10 @@
"licenseListVersion": "3.15" "licenseListVersion": "3.15"
}, },
"dataLicense": "CC0-1.0", "dataLicense": "CC0-1.0",
"documentNamespace": "https://anchore.com/syft/dir/some/path-f4586501-2da6-4541-a8e9-232b32f25e9a", "documentNamespace": "https://anchore.com/syft/dir/some/path-7ed51d00-2c50-4c6d-aedc-271ed41009cb",
"packages": [ "packages": [
{ {
"SPDXID": "SPDXRef-2a115ac97d018a0e", "SPDXID": "SPDXRef-96e6e51fe8ba6d8b",
"name": "package-1", "name": "package-1",
"licenseConcluded": "MIT", "licenseConcluded": "MIT",
"downloadLocation": "NOASSERTION", "downloadLocation": "NOASSERTION",
@ -36,7 +36,7 @@
"versionInfo": "1.0.1" "versionInfo": "1.0.1"
}, },
{ {
"SPDXID": "SPDXRef-5e920b2bece2c3ae", "SPDXID": "SPDXRef-ad3d1c4abd84bf75",
"name": "package-2", "name": "package-2",
"licenseConcluded": "NONE", "licenseConcluded": "NONE",
"downloadLocation": "NOASSERTION", "downloadLocation": "NOASSERTION",

View File

@ -3,7 +3,7 @@
"name": "user-image-input", "name": "user-image-input",
"spdxVersion": "SPDX-2.2", "spdxVersion": "SPDX-2.2",
"creationInfo": { "creationInfo": {
"created": "2021-12-01T15:08:29.476498Z", "created": "2021-12-15T23:56:14.468453Z",
"creators": [ "creators": [
"Organization: Anchore, Inc", "Organization: Anchore, Inc",
"Tool: syft-[not provided]" "Tool: syft-[not provided]"
@ -11,10 +11,10 @@
"licenseListVersion": "3.15" "licenseListVersion": "3.15"
}, },
"dataLicense": "CC0-1.0", "dataLicense": "CC0-1.0",
"documentNamespace": "https://anchore.com/syft/image/user-image-input-e3b7637c-9b2f-4005-a683-58e60f979082", "documentNamespace": "https://anchore.com/syft/image/user-image-input-f7c12e3a-8390-4f0d-a4a9-7d756e7e8d7d",
"packages": [ "packages": [
{ {
"SPDXID": "SPDXRef-888661d4f0362f02", "SPDXID": "SPDXRef-b8995af4e6171091",
"name": "package-1", "name": "package-1",
"licenseConcluded": "MIT", "licenseConcluded": "MIT",
"downloadLocation": "NOASSERTION", "downloadLocation": "NOASSERTION",
@ -36,7 +36,7 @@
"versionInfo": "1.0.1" "versionInfo": "1.0.1"
}, },
{ {
"SPDXID": "SPDXRef-4068ff5e8926b305", "SPDXID": "SPDXRef-73f796c846875b9e",
"name": "package-2", "name": "package-2",
"licenseConcluded": "NONE", "licenseConcluded": "NONE",
"downloadLocation": "NOASSERTION", "downloadLocation": "NOASSERTION",

View File

@ -8,7 +8,7 @@ import (
) )
func encoder(output io.Writer, s sbom.SBOM) error { func encoder(output io.Writer, s sbom.SBOM) error {
doc := ToFormatModel(s) doc := toFormatModel(s)
enc := json.NewEncoder(output) enc := json.NewEncoder(output)
// prevent > and < from being escaped in the payload // prevent > and < from being escaped in the payload

View File

@ -4,9 +4,10 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"github.com/anchore/syft/syft/source"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
) )
// Package represents a pkg.Package object specialized for JSON marshaling and unmarshalling. // Package represents a pkg.Package object specialized for JSON marshaling and unmarshalling.

View File

@ -1,7 +1,7 @@
{ {
"artifacts": [ "artifacts": [
{ {
"id": "2a115ac97d018a0e", "id": "96e6e51fe8ba6d8b",
"name": "package-1", "name": "package-1",
"version": "1.0.1", "version": "1.0.1",
"type": "python", "type": "python",
@ -36,7 +36,7 @@
} }
}, },
{ {
"id": "5e920b2bece2c3ae", "id": "ad3d1c4abd84bf75",
"name": "package-2", "name": "package-2",
"version": "2.0.1", "version": "2.0.1",
"type": "deb", "type": "deb",

View File

@ -1,7 +1,7 @@
{ {
"artifacts": [ "artifacts": [
{ {
"id": "962403cfb7be50d7", "id": "2a5c2dadd6f80c07",
"name": "package-1", "name": "package-1",
"version": "1.0.1", "version": "1.0.1",
"type": "python", "type": "python",
@ -31,7 +31,7 @@
} }
}, },
{ {
"id": "b11f44847bba0ed1", "id": "ac462e450060da2c",
"name": "package-2", "name": "package-2",
"version": "2.0.1", "version": "2.0.1",
"type": "deb", "type": "deb",
@ -62,8 +62,8 @@
], ],
"artifactRelationships": [ "artifactRelationships": [
{ {
"parent": "962403cfb7be50d7", "parent": "",
"child": "b11f44847bba0ed1", "child": "",
"type": "ownership-by-file-overlap", "type": "ownership-by-file-overlap",
"metadata": { "metadata": {
"file": "path" "file": "path"

View File

@ -1,7 +1,7 @@
{ {
"artifacts": [ "artifacts": [
{ {
"id": "888661d4f0362f02", "id": "b8995af4e6171091",
"name": "package-1", "name": "package-1",
"version": "1.0.1", "version": "1.0.1",
"type": "python", "type": "python",
@ -32,7 +32,7 @@
} }
}, },
{ {
"id": "4068ff5e8926b305", "id": "73f796c846875b9e",
"name": "package-2", "name": "package-2",
"version": "2.0.1", "version": "2.0.1",
"type": "deb", "type": "deb",

View File

@ -19,8 +19,7 @@ import (
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
) )
// TODO: this is exported for the use of the power-user command (temp) func toFormatModel(s sbom.SBOM) model.Document {
func ToFormatModel(s sbom.SBOM) model.Document {
src, err := toSourceModel(s.Source) src, err := toSourceModel(s.Source)
if err != nil { if err != nil {
log.Warnf("unable to create syft-json source object: %+v", err) log.Warnf("unable to create syft-json source object: %+v", err)

View File

@ -13,7 +13,7 @@ type Identifiable interface {
ID() ID ID() ID
} }
func IDFromHash(obj interface{}) (ID, error) { func IDByHash(obj interface{}) (ID, error) {
f, err := hashstructure.Hash(obj, hashstructure.FormatV2, &hashstructure.HashOptions{ f, err := hashstructure.Hash(obj, hashstructure.FormatV2, &hashstructure.HashOptions{
ZeroNil: true, ZeroNil: true,
SlicesAsSets: true, SlicesAsSets: true,

View File

@ -9,8 +9,8 @@ type CargoPackageMetadata struct {
} }
// Pkg returns the standard `pkg.Package` representation of the package referenced within the Cargo.lock metadata. // Pkg returns the standard `pkg.Package` representation of the package referenced within the Cargo.lock metadata.
func (p CargoPackageMetadata) Pkg() Package { func (p CargoPackageMetadata) Pkg() *Package {
return Package{ return &Package{
Name: p.Name, Name: p.Name,
Version: p.Version, Version: p.Version,
Language: Rust, Language: Rust,

View File

@ -51,6 +51,7 @@ func (c *Catalog) Package(id artifact.ID) *Package {
log.Warnf("unable to copy package id=%q name=%q: %+v", id, v.Name, err) log.Warnf("unable to copy package id=%q name=%q: %+v", id, v.Name, err)
return nil return nil
} }
p.id = v.id
return &p return &p
} }
@ -75,8 +76,12 @@ func (c *Catalog) Add(p Package) {
c.lock.Lock() c.lock.Lock()
defer c.lock.Unlock() defer c.lock.Unlock()
// note: since we are capturing the ID, we cannot modify the package being added from this point forward
id := p.ID() id := p.ID()
if id == "" {
log.Warnf("found package with empty ID while adding to the catalog: %+v", p)
p.SetID()
id = p.ID()
}
// store by package ID // store by package ID
c.byID[id] = p c.byID[id] = p
@ -142,7 +147,7 @@ func (c *Catalog) Sorted(types ...Type) (pkgs []Package) {
sort.SliceStable(pkgs, func(i, j int) bool { sort.SliceStable(pkgs, func(i, j int) bool {
if pkgs[i].Name == pkgs[j].Name { if pkgs[i].Name == pkgs[j].Name {
if pkgs[i].Version == pkgs[j].Version { if pkgs[i].Version == pkgs[j].Version {
if pkgs[i].Type == pkgs[j].Type { if pkgs[i].Type == pkgs[j].Type && len(pkgs[i].Locations) > 0 && len(pkgs[j].Locations) > 0 {
return pkgs[i].Locations[0].String() < pkgs[j].Locations[0].String() return pkgs[i].Locations[0].String() < pkgs[j].Locations[0].String()
} }
return pkgs[i].Type < pkgs[j].Type return pkgs[i].Type < pkgs[j].Type

View File

@ -10,23 +10,6 @@ import (
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
) )
var catalogAddAndRemoveTestPkgs = []Package{
{
Locations: []source.Location{
source.NewVirtualLocation("/a/path", "/another/path"),
source.NewVirtualLocation("/b/path", "/bee/path"),
},
Type: RpmPkg,
},
{
Locations: []source.Location{
source.NewVirtualLocation("/c/path", "/another/path"),
source.NewVirtualLocation("/d/path", "/another/path"),
},
Type: NpmPkg,
},
}
type expectedIndexes struct { type expectedIndexes struct {
byType map[Type]*strset.Set byType map[Type]*strset.Set
byPath map[string]*strset.Set byPath map[string]*strset.Set
@ -34,18 +17,38 @@ type expectedIndexes struct {
func TestCatalogAddPopulatesIndex(t *testing.T) { func TestCatalogAddPopulatesIndex(t *testing.T) {
var pkgs = []Package{
{
Locations: []source.Location{
source.NewVirtualLocation("/a/path", "/another/path"),
source.NewVirtualLocation("/b/path", "/bee/path"),
},
Type: RpmPkg,
},
{
Locations: []source.Location{
source.NewVirtualLocation("/c/path", "/another/path"),
source.NewVirtualLocation("/d/path", "/another/path"),
},
Type: NpmPkg,
},
}
for i := range pkgs {
p := &pkgs[i]
p.SetID()
}
fixtureID := func(i int) string { fixtureID := func(i int) string {
return string(catalogAddAndRemoveTestPkgs[i].ID()) return string(pkgs[i].ID())
} }
tests := []struct { tests := []struct {
name string name string
pkgs []Package
expectedIndexes expectedIndexes expectedIndexes expectedIndexes
}{ }{
{ {
name: "vanilla-add", name: "vanilla-add",
pkgs: catalogAddAndRemoveTestPkgs,
expectedIndexes: expectedIndexes{ expectedIndexes: expectedIndexes{
byType: map[Type]*strset.Set{ byType: map[Type]*strset.Set{
RpmPkg: strset.New(fixtureID(0)), RpmPkg: strset.New(fixtureID(0)),
@ -65,7 +68,7 @@ func TestCatalogAddPopulatesIndex(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
c := NewCatalog(test.pkgs...) c := NewCatalog(pkgs...)
assertIndexes(t, c, test.expectedIndexes) assertIndexes(t, c, test.expectedIndexes)
@ -75,9 +78,7 @@ func TestCatalogAddPopulatesIndex(t *testing.T) {
func assertIndexes(t *testing.T, c *Catalog, expectedIndexes expectedIndexes) { func assertIndexes(t *testing.T, c *Catalog, expectedIndexes expectedIndexes) {
// assert path index // assert path index
if len(c.idsByPath) != len(expectedIndexes.byPath) { assert.Len(t, c.idsByPath, len(expectedIndexes.byPath), "unexpected path index length")
t.Errorf("unexpected path index length: %d != %d", len(c.idsByPath), len(expectedIndexes.byPath))
}
for path, expectedIds := range expectedIndexes.byPath { for path, expectedIds := range expectedIndexes.byPath {
actualIds := strset.New() actualIds := strset.New()
for _, p := range c.PackagesByPath(path) { for _, p := range c.PackagesByPath(path) {
@ -90,9 +91,7 @@ func assertIndexes(t *testing.T, c *Catalog, expectedIndexes expectedIndexes) {
} }
// assert type index // assert type index
if len(c.idsByType) != len(expectedIndexes.byType) { assert.Len(t, c.idsByType, len(expectedIndexes.byType), "unexpected type index length")
t.Errorf("unexpected type index length: %d != %d", len(c.idsByType), len(expectedIndexes.byType))
}
for ty, expectedIds := range expectedIndexes.byType { for ty, expectedIds := range expectedIndexes.byType {
actualIds := strset.New() actualIds := strset.New()
for p := range c.Enumerate(ty) { for p := range c.Enumerate(ty) {

View File

@ -21,14 +21,25 @@ import (
// integrity check // integrity check
var _ common.ParserFn = parseApkDB var _ common.ParserFn = parseApkDB
func newApkDBPackage(d *pkg.ApkMetadata) *pkg.Package {
return &pkg.Package{
Name: d.Package,
Version: d.Version,
Licenses: strings.Split(d.License, " "),
Type: pkg.ApkPkg,
MetadataType: pkg.ApkMetadataType,
Metadata: *d,
}
}
// parseApkDb parses individual packages from a given Alpine DB file. For more information on specific fields // parseApkDb parses individual packages from a given Alpine DB file. For more information on specific fields
// see https://wiki.alpinelinux.org/wiki/Apk_spec . // see https://wiki.alpinelinux.org/wiki/Apk_spec .
func parseApkDB(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { func parseApkDB(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
// larger capacity for the scanner. // larger capacity for the scanner.
const maxScannerCapacity = 1024 * 1024 const maxScannerCapacity = 1024 * 1024
// a new larger buffer for the scanner // a new larger buffer for the scanner
bufScan := make([]byte, maxScannerCapacity) bufScan := make([]byte, maxScannerCapacity)
packages := make([]pkg.Package, 0) packages := make([]*pkg.Package, 0)
scanner := bufio.NewScanner(reader) scanner := bufio.NewScanner(reader)
scanner.Buffer(bufScan, maxScannerCapacity) scanner.Buffer(bufScan, maxScannerCapacity)
@ -52,14 +63,7 @@ func parseApkDB(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relations
return nil, nil, err return nil, nil, err
} }
if metadata != nil { if metadata != nil {
packages = append(packages, pkg.Package{ packages = append(packages, newApkDBPackage(metadata))
Name: metadata.Package,
Version: metadata.Version,
Licenses: strings.Split(metadata.License, " "),
Type: pkg.ApkPkg,
MetadataType: pkg.ApkMetadataType,
Metadata: *metadata,
})
} }
} }

View File

@ -647,11 +647,11 @@ func TestSinglePackageDetails(t *testing.T) {
func TestMultiplePackages(t *testing.T) { func TestMultiplePackages(t *testing.T) {
tests := []struct { tests := []struct {
fixture string fixture string
expected []pkg.Package expected []*pkg.Package
}{ }{
{ {
fixture: "test-fixtures/multiple", fixture: "test-fixtures/multiple",
expected: []pkg.Package{ expected: []*pkg.Package{
{ {
Name: "libc-utils", Name: "libc-utils",
Version: "0.7.2-r0", Version: "0.7.2-r0",

View File

@ -49,10 +49,10 @@ func Catalog(resolver source.FileResolver, theDistro *distro.Distro, catalogers
// perform analysis, accumulating errors for each failed analysis // perform analysis, accumulating errors for each failed analysis
var errs error var errs error
for _, theCataloger := range catalogers { for _, c := range catalogers {
// find packages from the underlying raw data // find packages from the underlying raw data
log.Debugf("cataloging with %q", theCataloger.Name()) log.Debugf("cataloging with %q", c.Name())
packages, relationships, err := theCataloger.Catalog(resolver) packages, relationships, err := c.Catalog(resolver)
if err != nil { if err != nil {
errs = multierror.Append(errs, err) errs = multierror.Append(errs, err)
continue continue
@ -64,10 +64,10 @@ func Catalog(resolver source.FileResolver, theDistro *distro.Distro, catalogers
packagesDiscovered.N += int64(catalogedPackages) packagesDiscovered.N += int64(catalogedPackages)
for _, p := range packages { for _, p := range packages {
// generate CPEs // generate CPEs (note: this is excluded from package ID, so is safe to mutate)
p.CPEs = cpe.Generate(p) p.CPEs = cpe.Generate(p)
// generate PURL // generate PURL (note: this is excluded from package ID, so is safe to mutate)
p.PURL = generatePackageURL(p, theDistro) p.PURL = generatePackageURL(p, theDistro)
// create file-to-package relationships for files owned by the package // create file-to-package relationships for files owned by the package

View File

@ -45,22 +45,23 @@ func (c *GenericCataloger) Catalog(resolver source.FileResolver) ([]pkg.Package,
contentReader, err := resolver.FileContentsByLocation(location) contentReader, err := resolver.FileContentsByLocation(location)
if err != nil { if err != nil {
// TODO: fail or log? // TODO: fail or log?
return nil, nil, fmt.Errorf("unable to fetch contents for location=%v : %w", location, err) return nil, nil, fmt.Errorf("unable to fetch contents at location=%v: %w", location, err)
} }
discoveredPackages, discoveredRelationships, err := parser(location.RealPath, contentReader) discoveredPackages, discoveredRelationships, err := parser(location.RealPath, contentReader)
internal.CloseAndLogError(contentReader, location.VirtualPath) internal.CloseAndLogError(contentReader, location.VirtualPath)
if err != nil { if err != nil {
// TODO: should we fail? or only log? // TODO: should we fail? or only log?
log.Warnf("cataloger '%s' failed to parse entries (location=%+v): %+v", c.upstreamCataloger, location, err) log.Warnf("cataloger '%s' failed to parse entries at location=%+v: %+v", c.upstreamCataloger, location, err)
continue continue
} }
for _, p := range discoveredPackages { for _, p := range discoveredPackages {
p.FoundBy = c.upstreamCataloger p.FoundBy = c.upstreamCataloger
p.Locations = append(p.Locations, location) p.Locations = append(p.Locations, location)
p.SetID()
packages = append(packages, p) packages = append(packages, *p)
} }
relationships = append(relationships, discoveredRelationships...) relationships = append(relationships, discoveredRelationships...)

View File

@ -13,12 +13,12 @@ import (
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
) )
func parser(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { func parser(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
contents, err := ioutil.ReadAll(reader) contents, err := ioutil.ReadAll(reader)
if err != nil { if err != nil {
panic(err) panic(err)
} }
return []pkg.Package{ return []*pkg.Package{
{ {
Name: string(contents), Name: string(contents),
}, },

View File

@ -8,4 +8,4 @@ import (
) )
// ParserFn standardizes a function signature for parser functions that accept the virtual file path (not usable for file reads) and contents and return any discovered packages from that file // ParserFn standardizes a function signature for parser functions that accept the virtual file path (not usable for file reads) and contents and return any discovered packages from that file
type ParserFn func(string, io.Reader) ([]pkg.Package, []artifact.Relationship, error) type ParserFn func(string, io.Reader) ([]*pkg.Package, []artifact.Relationship, error)

View File

@ -68,6 +68,8 @@ func (c *Cataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, []arti
// fetch additional data from the copyright file to derive the license information // fetch additional data from the copyright file to derive the license information
addLicenses(resolver, dbLocation, p) addLicenses(resolver, dbLocation, p)
p.SetID()
} }
allPackages = append(allPackages, pkgs...) allPackages = append(allPackages, pkgs...)

View File

@ -20,6 +20,16 @@ var (
sourceRegexp = regexp.MustCompile(`(?P<name>\S+)( \((?P<version>.*)\))?`) sourceRegexp = regexp.MustCompile(`(?P<name>\S+)( \((?P<version>.*)\))?`)
) )
func newDpkgPackage(d pkg.DpkgMetadata) pkg.Package {
return pkg.Package{
Name: d.Package,
Version: d.Version,
Type: pkg.DebPkg,
MetadataType: pkg.DpkgMetadataType,
Metadata: d,
}
}
// parseDpkgStatus is a parser function for Debian DB status contents, returning all Debian packages listed. // parseDpkgStatus is a parser function for Debian DB status contents, returning all Debian packages listed.
func parseDpkgStatus(reader io.Reader) ([]pkg.Package, error) { func parseDpkgStatus(reader io.Reader) ([]pkg.Package, error) {
buffedReader := bufio.NewReader(reader) buffedReader := bufio.NewReader(reader)
@ -37,13 +47,7 @@ func parseDpkgStatus(reader io.Reader) ([]pkg.Package, error) {
} }
if entry.Package != "" { if entry.Package != "" {
packages = append(packages, pkg.Package{ packages = append(packages, newDpkgPackage(entry))
Name: entry.Package,
Version: entry.Version,
Type: pkg.DebPkg,
MetadataType: pkg.DpkgMetadataType,
Metadata: entry,
})
} }
} }

View File

@ -17,6 +17,28 @@ const (
type exeOpener func(file io.ReadCloser) ([]exe, error) type exeOpener func(file io.ReadCloser) ([]exe, error)
func newGoBinaryPackage(name, version, h1Digest, goVersion, architecture string, location source.Location) pkg.Package {
p := pkg.Package{
Name: name,
Version: version,
Language: pkg.Go,
Type: pkg.GoModulePkg,
Locations: []source.Location{
location,
},
MetadataType: pkg.GolangBinMetadataType,
Metadata: pkg.GolangBinMetadata{
GoCompiledVersion: goVersion,
H1Digest: h1Digest,
Architecture: architecture,
},
}
p.SetID()
return p
}
func parseGoBin(location source.Location, reader io.ReadCloser, opener exeOpener) (pkgs []pkg.Package, err error) { func parseGoBin(location source.Location, reader io.ReadCloser, opener exeOpener) (pkgs []pkg.Package, err error) {
var exes []exe var exes []exe
// it has been found that there are stdlib paths within openExe that can panic. We want to prevent this behavior // it has been found that there are stdlib paths within openExe that can panic. We want to prevent this behavior
@ -54,21 +76,10 @@ func buildGoPkgInfo(location source.Location, mod, goVersion, arch string) []pkg
} }
if fields[0] == packageIdentifier || fields[0] == replaceIdentifier { if fields[0] == packageIdentifier || fields[0] == replaceIdentifier {
pkgsSlice = append(pkgsSlice, pkg.Package{ name := fields[1]
Name: fields[1], version := fields[2]
Version: fields[2], h1Digest := fields[3]
Language: pkg.Go, pkgsSlice = append(pkgsSlice, newGoBinaryPackage(name, version, h1Digest, goVersion, arch, location))
Type: pkg.GoModulePkg,
Locations: []source.Location{
location,
},
MetadataType: pkg.GolangBinMetadataType,
Metadata: pkg.GolangBinMetadata{
GoCompiledVersion: goVersion,
H1Digest: fields[3],
Architecture: arch,
},
})
} }
} }

View File

@ -147,6 +147,10 @@ func TestBuildGoPkgInfo(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
for i := range test.expected {
p := &test.expected[i]
p.SetID()
}
location := source.Location{ location := source.Location{
Coordinates: source.Coordinates{ Coordinates: source.Coordinates{
RealPath: "/a-path", RealPath: "/a-path",

View File

@ -12,8 +12,8 @@ import (
) )
// parseGoMod takes a go.mod and lists all packages discovered. // parseGoMod takes a go.mod and lists all packages discovered.
func parseGoMod(path string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { func parseGoMod(path string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
packages := make(map[string]pkg.Package) packages := make(map[string]*pkg.Package)
contents, err := ioutil.ReadAll(reader) contents, err := ioutil.ReadAll(reader)
if err != nil { if err != nil {
@ -26,7 +26,7 @@ func parseGoMod(path string, reader io.Reader) ([]pkg.Package, []artifact.Relati
} }
for _, m := range file.Require { for _, m := range file.Require {
packages[m.Mod.Path] = pkg.Package{ packages[m.Mod.Path] = &pkg.Package{
Name: m.Mod.Path, Name: m.Mod.Path,
Version: m.Mod.Version, Version: m.Mod.Version,
Language: pkg.Go, Language: pkg.Go,
@ -36,7 +36,7 @@ func parseGoMod(path string, reader io.Reader) ([]pkg.Package, []artifact.Relati
// remove any old packages and replace with new ones... // remove any old packages and replace with new ones...
for _, m := range file.Replace { for _, m := range file.Replace {
packages[m.New.Path] = pkg.Package{ packages[m.New.Path] = &pkg.Package{
Name: m.New.Path, Name: m.New.Path,
Version: m.New.Version, Version: m.New.Version,
Language: pkg.Go, Language: pkg.Go,
@ -49,7 +49,7 @@ func parseGoMod(path string, reader io.Reader) ([]pkg.Package, []artifact.Relati
delete(packages, m.Mod.Path) delete(packages, m.Mod.Path)
} }
pkgsSlice := make([]pkg.Package, len(packages)) pkgsSlice := make([]*pkg.Package, len(packages))
idx := 0 idx := 0
for _, p := range packages { for _, p := range packages {
pkgsSlice[idx] = p pkgsSlice[idx] = p

View File

@ -87,7 +87,7 @@ func TestParseGoMod(t *testing.T) {
continue continue
} }
diffs := deep.Equal(a, e) diffs := deep.Equal(a, &e)
if len(diffs) > 0 { if len(diffs) > 0 {
t.Errorf("diffs found for %q", a.Name) t.Errorf("diffs found for %q", a.Name)
for _, d := range diffs { for _, d := range diffs {

View File

@ -35,7 +35,7 @@ type archiveParser struct {
} }
// parseJavaArchive is a parser function for java archive contents, returning all Java libraries and nested archives. // parseJavaArchive is a parser function for java archive contents, returning all Java libraries and nested archives.
func parseJavaArchive(virtualPath string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { func parseJavaArchive(virtualPath string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
parser, cleanupFn, err := newJavaArchiveParser(virtualPath, reader, true) parser, cleanupFn, err := newJavaArchiveParser(virtualPath, reader, true)
// note: even on error, we should always run cleanup functions // note: even on error, we should always run cleanup functions
defer cleanupFn() defer cleanupFn()
@ -81,8 +81,8 @@ func newJavaArchiveParser(virtualPath string, reader io.Reader, detectNested boo
} }
// parse the loaded archive and return all packages found. // parse the loaded archive and return all packages found.
func (j *archiveParser) parse() ([]pkg.Package, []artifact.Relationship, error) { func (j *archiveParser) parse() ([]*pkg.Package, []artifact.Relationship, error) {
var pkgs []pkg.Package var pkgs []*pkg.Package
var relationships []artifact.Relationship var relationships []artifact.Relationship
// find the parent package from the java manifest // find the parent package from the java manifest
@ -110,7 +110,7 @@ func (j *archiveParser) parse() ([]pkg.Package, []artifact.Relationship, error)
// lastly, add the parent package to the list (assuming the parent exists) // lastly, add the parent package to the list (assuming the parent exists)
if parentPkg != nil { if parentPkg != nil {
pkgs = append([]pkg.Package{*parentPkg}, pkgs...) pkgs = append([]*pkg.Package{parentPkg}, pkgs...)
} }
return pkgs, relationships, nil return pkgs, relationships, nil
@ -158,12 +158,12 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
// parent package, returning all listed Java packages found for each pom // parent package, returning all listed Java packages found for each pom
// properties discovered and potentially updating the given parentPkg with new // properties discovered and potentially updating the given parentPkg with new
// data. // data.
func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([]pkg.Package, error) { func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([]*pkg.Package, error) {
if parentPkg == nil { if parentPkg == nil {
return nil, nil return nil, nil
} }
var pkgs []pkg.Package var pkgs []*pkg.Package
properties, err := pomPropertiesByParentPath(j.archivePath, j.fileManifest.GlobMatch(pomPropertiesGlob), j.virtualPath) properties, err := pomPropertiesByParentPath(j.archivePath, j.fileManifest.GlobMatch(pomPropertiesGlob), j.virtualPath)
if err != nil { if err != nil {
@ -183,7 +183,7 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([
pkgFromPom := newPackageFromMavenData(propertiesObj, pomProject, parentPkg, j.virtualPath) pkgFromPom := newPackageFromMavenData(propertiesObj, pomProject, parentPkg, j.virtualPath)
if pkgFromPom != nil { if pkgFromPom != nil {
pkgs = append(pkgs, *pkgFromPom) pkgs = append(pkgs, pkgFromPom)
} }
} }
@ -192,8 +192,8 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([
// discoverPkgsFromNestedArchives finds Java archives within Java archives, returning all listed Java packages found and // discoverPkgsFromNestedArchives finds Java archives within Java archives, returning all listed Java packages found and
// associating each discovered package to the given parent package. // associating each discovered package to the given parent package.
func (j *archiveParser) discoverPkgsFromNestedArchives(parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) { func (j *archiveParser) discoverPkgsFromNestedArchives(parentPkg *pkg.Package) ([]*pkg.Package, []artifact.Relationship, error) {
var pkgs []pkg.Package var pkgs []*pkg.Package
var relationships []artifact.Relationship var relationships []artifact.Relationship
// search and parse pom.properties files & fetch the contents // search and parse pom.properties files & fetch the contents

View File

@ -257,7 +257,7 @@ func TestParseJar(t *testing.T) {
var parent *pkg.Package var parent *pkg.Package
for _, a := range actual { for _, a := range actual {
if strings.Contains(a.Name, "example-") { if strings.Contains(a.Name, "example-") {
parent = &a parent = a
} }
} }
@ -292,7 +292,7 @@ func TestParseJar(t *testing.T) {
// write censored data back // write censored data back
a.Metadata = metadata a.Metadata = metadata
diffs := deep.Equal(e, a) diffs := deep.Equal(&e, a)
if len(diffs) > 0 { if len(diffs) > 0 {
t.Errorf("diffs found for %q", a.Name) t.Errorf("diffs found for %q", a.Name)
for _, d := range diffs { for _, d := range diffs {
@ -527,7 +527,7 @@ func TestParseNestedJar(t *testing.T) {
actualNameVersionPairSet := internal.NewStringSet() actualNameVersionPairSet := internal.NewStringSet()
for _, a := range actual { for _, a := range actual {
key := makeKey(&a) key := makeKey(a)
actualNameVersionPairSet.Add(key) actualNameVersionPairSet.Add(key)
if !expectedNameVersionPairSet.Contains(key) { if !expectedNameVersionPairSet.Contains(key) {
t.Errorf("extra package: %s", a) t.Errorf("extra package: %s", a)
@ -545,7 +545,7 @@ func TestParseNestedJar(t *testing.T) {
} }
for _, a := range actual { for _, a := range actual {
actualKey := makeKey(&a) actualKey := makeKey(a)
metadata := a.Metadata.(pkg.JavaMetadata) metadata := a.Metadata.(pkg.JavaMetadata)
if actualKey == "spring-boot|0.0.1-SNAPSHOT" { if actualKey == "spring-boot|0.0.1-SNAPSHOT" {

View File

@ -50,7 +50,6 @@ type Repository struct {
// ---> name: "Isaac Z. Schlueter" email: "i@izs.me" url: "http://blog.izs.me" // ---> name: "Isaac Z. Schlueter" email: "i@izs.me" url: "http://blog.izs.me"
var authorPattern = regexp.MustCompile(`^\s*(?P<name>[^<(]*)(\s+<(?P<email>.*)>)?(\s\((?P<url>.*)\))?\s*$`) var authorPattern = regexp.MustCompile(`^\s*(?P<name>[^<(]*)(\s+<(?P<email>.*)>)?(\s\((?P<url>.*)\))?\s*$`)
// Exports Author.UnmarshalJSON interface to help normalize the json structure.
func (a *Author) UnmarshalJSON(b []byte) error { func (a *Author) UnmarshalJSON(b []byte) error {
var authorStr string var authorStr string
var fields map[string]string var fields map[string]string
@ -135,7 +134,7 @@ func licenseFromJSON(b []byte) (string, error) {
return "", errors.New("unable to unmarshal license field as either string or object") return "", errors.New("unable to unmarshal license field as either string or object")
} }
func licensesFromJSON(p PackageJSON) ([]string, error) { func (p PackageJSON) licensesFromJSON() ([]string, error) {
if p.License == nil && p.Licenses == nil { if p.License == nil && p.Licenses == nil {
// This package.json doesn't specify any licenses whatsoever // This package.json doesn't specify any licenses whatsoever
return []string{}, nil return []string{}, nil
@ -163,8 +162,8 @@ func licensesFromJSON(p PackageJSON) ([]string, error) {
} }
// parsePackageJSON parses a package.json and returns the discovered JavaScript packages. // parsePackageJSON parses a package.json and returns the discovered JavaScript packages.
func parsePackageJSON(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { func parsePackageJSON(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
var packages []pkg.Package var packages []*pkg.Package
dec := json.NewDecoder(reader) dec := json.NewDecoder(reader)
for { for {
@ -180,30 +179,34 @@ func parsePackageJSON(_ string, reader io.Reader) ([]pkg.Package, []artifact.Rel
return nil, nil, nil return nil, nil, nil
} }
licenses, err := licensesFromJSON(p) packages = append(packages, newPackageJSONPackage(p))
if err != nil {
return nil, nil, fmt.Errorf("failed to parse package.json file: %w", err)
}
packages = append(packages, pkg.Package{
Name: p.Name,
Version: p.Version,
Licenses: licenses,
Language: pkg.JavaScript,
Type: pkg.NpmPkg,
MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJSONMetadata{
Author: p.Author.AuthorString(),
Homepage: p.Homepage,
URL: p.Repository.URL,
Licenses: licenses,
},
})
} }
return packages, nil, nil return packages, nil, nil
} }
func newPackageJSONPackage(p PackageJSON) *pkg.Package {
licenses, err := p.licensesFromJSON()
if err != nil {
log.Warnf("unable to extract licenses from javascript package.json: %+v", err)
}
return &pkg.Package{
Name: p.Name,
Version: p.Version,
Licenses: licenses,
Language: pkg.JavaScript,
Type: pkg.NpmPkg,
MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJSONMetadata{
Author: p.Author.AuthorString(),
Homepage: p.Homepage,
URL: p.Repository.URL,
Licenses: licenses,
},
}
}
func (p PackageJSON) hasNameAndVersionValues() bool { func (p PackageJSON) hasNameAndVersionValues() bool {
return p.Name != "" && p.Version != "" return p.Name != "" && p.Version != ""
} }

View File

@ -136,7 +136,7 @@ func TestParsePackageJSON(t *testing.T) {
t.Fatalf("unexpected package count: %d!=1", len(actual)) t.Fatalf("unexpected package count: %d!=1", len(actual))
} }
for _, d := range deep.Equal(actual[0], test.ExpectedPkg) { for _, d := range deep.Equal(actual[0], &test.ExpectedPkg) {
t.Errorf("diff: %+v", d) t.Errorf("diff: %+v", d)
} }

View File

@ -29,14 +29,14 @@ type Dependency struct {
} }
// parsePackageLock parses a package-lock.json and returns the discovered JavaScript packages. // parsePackageLock parses a package-lock.json and returns the discovered JavaScript packages.
func parsePackageLock(path string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { func parsePackageLock(path string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
// in the case we find package-lock.json files in the node_modules directories, skip those // in the case we find package-lock.json files in the node_modules directories, skip those
// as the whole purpose of the lock file is for the specific dependencies of the root project // as the whole purpose of the lock file is for the specific dependencies of the root project
if pathContainsNodeModulesDirectory(path) { if pathContainsNodeModulesDirectory(path) {
return nil, nil, nil return nil, nil, nil
} }
var packages []pkg.Package var packages []*pkg.Package
dec := json.NewDecoder(reader) dec := json.NewDecoder(reader)
for { for {
@ -47,7 +47,7 @@ func parsePackageLock(path string, reader io.Reader) ([]pkg.Package, []artifact.
return nil, nil, fmt.Errorf("failed to parse package-lock.json file: %w", err) return nil, nil, fmt.Errorf("failed to parse package-lock.json file: %w", err)
} }
for name, pkgMeta := range lock.Dependencies { for name, pkgMeta := range lock.Dependencies {
packages = append(packages, pkg.Package{ packages = append(packages, &pkg.Package{
Name: name, Name: name,
Version: pkgMeta.Version, Version: pkgMeta.Version,
Language: pkg.JavaScript, Language: pkg.JavaScript,

View File

@ -4,10 +4,12 @@ import (
"os" "os"
"testing" "testing"
"github.com/stretchr/testify/assert"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
) )
func assertPkgsEqual(t *testing.T, actual []pkg.Package, expected map[string]pkg.Package) { func assertPkgsEqual(t *testing.T, actual []*pkg.Package, expected map[string]pkg.Package) {
t.Helper() t.Helper()
if len(actual) != len(expected) { if len(actual) != len(expected) {
for _, a := range actual { for _, a := range actual {
@ -18,26 +20,11 @@ func assertPkgsEqual(t *testing.T, actual []pkg.Package, expected map[string]pkg
for _, a := range actual { for _, a := range actual {
expectedPkg, ok := expected[a.Name] expectedPkg, ok := expected[a.Name]
if !ok { assert.True(t, ok)
t.Errorf("unexpected package found: '%s'", a.Name) assert.Equal(t, expectedPkg.Version, a.Version, "bad version")
} assert.Equal(t, expectedPkg.Language, a.Language, "bad language")
assert.Equal(t, expectedPkg.Type, a.Type, "bad type")
if expectedPkg.Version != a.Version { assert.Equal(t, expectedPkg.Licenses, a.Licenses, "bad license count")
t.Errorf("%s : unexpected package version: '%s', expected: '%s'", a.Name, a.Version, expectedPkg.Version)
}
if a.Language != expectedPkg.Language {
t.Errorf("%s : bad language: '%+v', expected: '%+v'", a.Name, a.Language, expectedPkg.Language)
}
if a.Type != expectedPkg.Type {
t.Errorf("%s : bad package type: %+v, expected: %+v", a.Name, a.Type, expectedPkg.Type)
}
if len(a.Licenses) < len(expectedPkg.Licenses) {
t.Errorf("%s : bad package licenses count: '%+v'", a.Name, a.Licenses)
}
} }
} }

View File

@ -35,14 +35,14 @@ const (
noVersion = "" noVersion = ""
) )
func parseYarnLock(path string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { func parseYarnLock(path string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
// in the case we find yarn.lock files in the node_modules directories, skip those // in the case we find yarn.lock files in the node_modules directories, skip those
// as the whole purpose of the lock file is for the specific dependencies of the project // as the whole purpose of the lock file is for the specific dependencies of the project
if pathContainsNodeModulesDirectory(path) { if pathContainsNodeModulesDirectory(path) {
return nil, nil, nil return nil, nil, nil
} }
var packages []pkg.Package var packages []*pkg.Package
scanner := bufio.NewScanner(reader) scanner := bufio.NewScanner(reader)
parsedPackages := internal.NewStringSet() parsedPackages := internal.NewStringSet()
currentPackage := noPackage currentPackage := noPackage
@ -106,8 +106,8 @@ func findPackageVersion(line string) string {
return noVersion return noVersion
} }
func newYarnLockPackage(name, version string) pkg.Package { func newYarnLockPackage(name, version string) *pkg.Package {
return pkg.Package{ return &pkg.Package{
Name: name, Name: name,
Version: version, Version: version,
Language: pkg.JavaScript, Language: pkg.JavaScript,

View File

@ -8,7 +8,6 @@ import (
"github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common"
) )
type ComposerLock struct { type ComposerLock struct {
@ -21,12 +20,9 @@ type Dependency struct {
Version string `json:"version"` Version string `json:"version"`
} }
// integrity check
var _ common.ParserFn = parseComposerLock
// parseComposerLock is a parser function for Composer.lock contents, returning "Default" php packages discovered. // parseComposerLock is a parser function for Composer.lock contents, returning "Default" php packages discovered.
func parseComposerLock(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { func parseComposerLock(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
packages := make([]pkg.Package, 0) packages := make([]*pkg.Package, 0)
dec := json.NewDecoder(reader) dec := json.NewDecoder(reader)
for { for {
@ -39,7 +35,7 @@ func parseComposerLock(_ string, reader io.Reader) ([]pkg.Package, []artifact.Re
for _, pkgMeta := range lock.Packages { for _, pkgMeta := range lock.Packages {
version := pkgMeta.Version version := pkgMeta.Version
name := pkgMeta.Name name := pkgMeta.Name
packages = append(packages, pkg.Package{ packages = append(packages, &pkg.Package{
Name: name, Name: name,
Version: version, Version: version,
Language: pkg.PHP, Language: pkg.PHP,

View File

@ -9,7 +9,7 @@ import (
) )
func TestParseComposerFileLock(t *testing.T) { func TestParseComposerFileLock(t *testing.T) {
expected := []pkg.Package{ expected := []*pkg.Package{
{ {
Name: "adoy/fastcgi-client", Name: "adoy/fastcgi-client",
Version: "1.0.2", Version: "1.0.2",

View File

@ -40,8 +40,8 @@ func (w *installedJSONComposerV2) UnmarshalJSON(data []byte) error {
var _ common.ParserFn = parseComposerLock var _ common.ParserFn = parseComposerLock
// parseComposerLock is a parser function for Composer.lock contents, returning "Default" php packages discovered. // parseComposerLock is a parser function for Composer.lock contents, returning "Default" php packages discovered.
func parseInstalledJSON(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { func parseInstalledJSON(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
packages := make([]pkg.Package, 0) packages := make([]*pkg.Package, 0)
dec := json.NewDecoder(reader) dec := json.NewDecoder(reader)
for { for {
@ -54,7 +54,7 @@ func parseInstalledJSON(_ string, reader io.Reader) ([]pkg.Package, []artifact.R
for _, pkgMeta := range lock.Packages { for _, pkgMeta := range lock.Packages {
version := pkgMeta.Version version := pkgMeta.Version
name := pkgMeta.Name name := pkgMeta.Name
packages = append(packages, pkg.Package{ packages = append(packages, &pkg.Package{
Name: name, Name: name,
Version: version, Version: version,
Language: pkg.PHP, Language: pkg.PHP,

View File

@ -9,7 +9,7 @@ import (
) )
func TestParseInstalledJsonComposerV1(t *testing.T) { func TestParseInstalledJsonComposerV1(t *testing.T) {
expected := []pkg.Package{ expected := []*pkg.Package{
{ {
Name: "asm89/stack-cors", Name: "asm89/stack-cors",
Version: "1.3.0", Version: "1.3.0",
@ -41,7 +41,7 @@ func TestParseInstalledJsonComposerV1(t *testing.T) {
} }
func TestParseInstalledJsonComposerV2(t *testing.T) { func TestParseInstalledJsonComposerV2(t *testing.T) {
expected := []pkg.Package{ expected := []*pkg.Package{
{ {
Name: "asm89/stack-cors", Name: "asm89/stack-cors",
Version: "1.3.0", Version: "1.3.0",

View File

@ -74,7 +74,7 @@ func (c *PackageCataloger) catalogEggOrWheel(resolver source.FileResolver, metad
licenses = []string{metadata.License} licenses = []string{metadata.License}
} }
return &pkg.Package{ p := &pkg.Package{
Name: metadata.Name, Name: metadata.Name,
Version: metadata.Version, Version: metadata.Version,
FoundBy: c.Name(), FoundBy: c.Name(),
@ -84,7 +84,11 @@ func (c *PackageCataloger) catalogEggOrWheel(resolver source.FileResolver, metad
Type: pkg.PythonPkg, Type: pkg.PythonPkg,
MetadataType: pkg.PythonPackageMetadataType, MetadataType: pkg.PythonPackageMetadataType,
Metadata: *metadata, Metadata: *metadata,
}, nil }
p.SetID()
return p, nil
} }
// fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained. // fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained.

View File

@ -38,8 +38,8 @@ type Dependency struct {
var _ common.ParserFn = parsePipfileLock var _ common.ParserFn = parsePipfileLock
// parsePipfileLock is a parser function for Pipfile.lock contents, returning "Default" python packages discovered. // parsePipfileLock is a parser function for Pipfile.lock contents, returning "Default" python packages discovered.
func parsePipfileLock(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { func parsePipfileLock(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
packages := make([]pkg.Package, 0) packages := make([]*pkg.Package, 0)
dec := json.NewDecoder(reader) dec := json.NewDecoder(reader)
for { for {
@ -51,7 +51,7 @@ func parsePipfileLock(_ string, reader io.Reader) ([]pkg.Package, []artifact.Rel
} }
for name, pkgMeta := range lock.Default { for name, pkgMeta := range lock.Default {
version := strings.TrimPrefix(pkgMeta.Version, "==") version := strings.TrimPrefix(pkgMeta.Version, "==")
packages = append(packages, pkg.Package{ packages = append(packages, &pkg.Package{
Name: name, Name: name,
Version: version, Version: version,
Language: pkg.Python, Language: pkg.Python,

View File

@ -14,7 +14,7 @@ import (
var _ common.ParserFn = parsePoetryLock var _ common.ParserFn = parsePoetryLock
// parsePoetryLock is a parser function for poetry.lock contents, returning all python packages discovered. // parsePoetryLock is a parser function for poetry.lock contents, returning all python packages discovered.
func parsePoetryLock(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { func parsePoetryLock(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
tree, err := toml.LoadReader(reader) tree, err := toml.LoadReader(reader)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("unable to load poetry.lock for parsing: %v", err) return nil, nil, fmt.Errorf("unable to load poetry.lock for parsing: %v", err)

View File

@ -9,7 +9,7 @@ import (
) )
func TestParsePoetryLock(t *testing.T) { func TestParsePoetryLock(t *testing.T) {
expected := []pkg.Package{ expected := []*pkg.Package{
{ {
Name: "added-value", Name: "added-value",
Version: "0.14.2", Version: "0.14.2",

View File

@ -16,8 +16,8 @@ var _ common.ParserFn = parseRequirementsTxt
// parseRequirementsTxt takes a Python requirements.txt file, returning all Python packages that are locked to a // parseRequirementsTxt takes a Python requirements.txt file, returning all Python packages that are locked to a
// specific version. // specific version.
func parseRequirementsTxt(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { func parseRequirementsTxt(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
packages := make([]pkg.Package, 0) packages := make([]*pkg.Package, 0)
scanner := bufio.NewScanner(reader) scanner := bufio.NewScanner(reader)
for scanner.Scan() { for scanner.Scan() {
@ -44,7 +44,7 @@ func parseRequirementsTxt(_ string, reader io.Reader) ([]pkg.Package, []artifact
parts := strings.Split(uncommented, "==") parts := strings.Split(uncommented, "==")
name := strings.TrimSpace(parts[0]) name := strings.TrimSpace(parts[0])
version := strings.TrimSpace(parts[1]) version := strings.TrimSpace(parts[1])
packages = append(packages, pkg.Package{ packages = append(packages, &pkg.Package{
Name: name, Name: name,
Version: version, Version: version,
Language: pkg.Python, Language: pkg.Python,

View File

@ -4,12 +4,14 @@ import (
"os" "os"
"testing" "testing"
"github.com/stretchr/testify/assert"
"github.com/go-test/deep" "github.com/go-test/deep"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
) )
func assertPackagesEqual(t *testing.T, actual []pkg.Package, expected map[string]pkg.Package) { func assertPackagesEqual(t *testing.T, actual []*pkg.Package, expected map[string]pkg.Package) {
t.Helper() t.Helper()
if len(actual) != len(expected) { if len(actual) != len(expected) {
for _, a := range actual { for _, a := range actual {
@ -20,11 +22,9 @@ func assertPackagesEqual(t *testing.T, actual []pkg.Package, expected map[string
for _, a := range actual { for _, a := range actual {
expectedPkg, ok := expected[a.Name] expectedPkg, ok := expected[a.Name]
if !ok { assert.True(t, ok)
t.Errorf("unexpected package found: '%s'", a.Name)
}
for _, d := range deep.Equal(a, expectedPkg) { for _, d := range deep.Equal(a, &expectedPkg) {
t.Errorf("diff: %+v", d) t.Errorf("diff: %+v", d)
} }
} }

View File

@ -20,8 +20,8 @@ var _ common.ParserFn = parseSetup
// " mypy2 == v0.770", ' mypy3== v0.770', --> match(name=mypy2 version=v0.770), match(name=mypy3, version=v0.770) // " mypy2 == v0.770", ' mypy3== v0.770', --> match(name=mypy2 version=v0.770), match(name=mypy3, version=v0.770)
var pinnedDependency = regexp.MustCompile(`['"]\W?(\w+\W?==\W?[\w\.]*)`) var pinnedDependency = regexp.MustCompile(`['"]\W?(\w+\W?==\W?[\w\.]*)`)
func parseSetup(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { func parseSetup(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
packages := make([]pkg.Package, 0) packages := make([]*pkg.Package, 0)
scanner := bufio.NewScanner(reader) scanner := bufio.NewScanner(reader)
@ -38,7 +38,7 @@ func parseSetup(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relations
name = strings.TrimSpace(name) name = strings.TrimSpace(name)
version := strings.TrimSpace(parts[len(parts)-1]) version := strings.TrimSpace(parts[len(parts)-1])
packages = append(packages, pkg.Package{ packages = append(packages, &pkg.Package{
Name: strings.Trim(name, "'\""), Name: strings.Trim(name, "'\""),
Version: strings.Trim(version, "'\""), Version: strings.Trim(version, "'\""),
Language: pkg.Python, Language: pkg.Python,

View File

@ -7,8 +7,8 @@ type PoetryMetadata struct {
} }
// Pkgs returns all of the packages referenced within the poetry.lock metadata. // Pkgs returns all of the packages referenced within the poetry.lock metadata.
func (m PoetryMetadata) Pkgs() []pkg.Package { func (m PoetryMetadata) Pkgs() []*pkg.Package {
pkgs := make([]pkg.Package, 0) pkgs := make([]*pkg.Package, 0)
for _, p := range m.Packages { for _, p := range m.Packages {
pkgs = append(pkgs, p.Pkg()) pkgs = append(pkgs, p.Pkg())

View File

@ -11,8 +11,8 @@ type PoetryMetadataPackage struct {
} }
// Pkg returns the standard `pkg.Package` representation of the package referenced within the poetry.lock metadata. // Pkg returns the standard `pkg.Package` representation of the package referenced within the poetry.lock metadata.
func (p PoetryMetadataPackage) Pkg() pkg.Package { func (p PoetryMetadataPackage) Pkg() *pkg.Package {
return pkg.Package{ return &pkg.Package{
Name: p.Name, Name: p.Name,
Version: p.Version, Version: p.Version,
Language: pkg.Python, Language: pkg.Python,

View File

@ -70,6 +70,8 @@ func parseRpmDB(resolver source.FilePathResolver, dbLocation source.Location, re
Metadata: metadata, Metadata: metadata,
} }
p.SetID()
allPkgs = append(allPkgs, p) allPkgs = append(allPkgs, p)
} }

View File

@ -1,5 +1,5 @@
/* /*
Package bundler provides a concrete Cataloger implementation for Ruby Gemfile.lock bundler files. Package ruby bundler provides a concrete Cataloger implementation for Ruby Gemfile.lock bundler files.
*/ */
package ruby package ruby

View File

@ -17,8 +17,8 @@ var _ common.ParserFn = parseGemFileLockEntries
var sectionsOfInterest = internal.NewStringSetFromSlice([]string{"GEM"}) var sectionsOfInterest = internal.NewStringSetFromSlice([]string{"GEM"})
// parseGemFileLockEntries is a parser function for Gemfile.lock contents, returning all Gems discovered. // parseGemFileLockEntries is a parser function for Gemfile.lock contents, returning all Gems discovered.
func parseGemFileLockEntries(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { func parseGemFileLockEntries(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
pkgs := make([]pkg.Package, 0) pkgs := make([]*pkg.Package, 0)
scanner := bufio.NewScanner(reader) scanner := bufio.NewScanner(reader)
var currentSection string var currentSection string
@ -41,7 +41,7 @@ func parseGemFileLockEntries(_ string, reader io.Reader) ([]pkg.Package, []artif
if len(candidate) != 2 { if len(candidate) != 2 {
continue continue
} }
pkgs = append(pkgs, pkg.Package{ pkgs = append(pkgs, &pkg.Package{
Name: candidate[0], Name: candidate[0],
Version: strings.Trim(candidate[1], "()"), Version: strings.Trim(candidate[1], "()"),
Language: pkg.Ruby, Language: pkg.Ruby,

View File

@ -61,8 +61,8 @@ func processList(s string) []string {
return results return results
} }
func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { func parseGemSpecEntries(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
var pkgs []pkg.Package var pkgs []*pkg.Package
var fields = make(map[string]interface{}) var fields = make(map[string]interface{})
scanner := bufio.NewScanner(reader) scanner := bufio.NewScanner(reader)
@ -97,7 +97,7 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, []artifact.
return nil, nil, fmt.Errorf("unable to decode gem metadata: %w", err) return nil, nil, fmt.Errorf("unable to decode gem metadata: %w", err)
} }
pkgs = append(pkgs, pkg.Package{ pkgs = append(pkgs, &pkg.Package{
Name: metadata.Name, Name: metadata.Name,
Version: metadata.Version, Version: metadata.Version,
Licenses: metadata.Licenses, Licenses: metadata.Licenses,

View File

@ -44,7 +44,7 @@ func TestParseGemspec(t *testing.T) {
t.Fatalf("unexpected package count: %d!=1", len(actual)) t.Fatalf("unexpected package count: %d!=1", len(actual))
} }
for _, d := range deep.Equal(actual[0], expectedPkg) { for _, d := range deep.Equal(actual[0], &expectedPkg) {
t.Errorf("diff: %+v", d) t.Errorf("diff: %+v", d)
} }
} }

View File

@ -7,8 +7,8 @@ type CargoMetadata struct {
} }
// Pkgs returns all of the packages referenced within the Cargo.lock metadata. // Pkgs returns all of the packages referenced within the Cargo.lock metadata.
func (m CargoMetadata) Pkgs() []pkg.Package { func (m CargoMetadata) Pkgs() []*pkg.Package {
pkgs := make([]pkg.Package, 0) pkgs := make([]*pkg.Package, 0)
for _, p := range m.Packages { for _, p := range m.Packages {
if p.Dependencies == nil { if p.Dependencies == nil {

View File

@ -14,7 +14,7 @@ import (
var _ common.ParserFn = parseCargoLock var _ common.ParserFn = parseCargoLock
// parseCargoLock is a parser function for Cargo.lock contents, returning all rust cargo crates discovered. // parseCargoLock is a parser function for Cargo.lock contents, returning all rust cargo crates discovered.
func parseCargoLock(_ string, reader io.Reader) ([]pkg.Package, []artifact.Relationship, error) { func parseCargoLock(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
tree, err := toml.LoadReader(reader) tree, err := toml.LoadReader(reader)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("unable to load Cargo.lock for parsing: %v", err) return nil, nil, fmt.Errorf("unable to load Cargo.lock for parsing: %v", err)

View File

@ -9,7 +9,7 @@ import (
) )
func TestParseCargoLock(t *testing.T) { func TestParseCargoLock(t *testing.T) {
expected := []pkg.Package{ expected := []*pkg.Package{
{ {
Name: "ansi_term", Name: "ansi_term",
Version: "0.12.1", Version: "0.12.1",

View File

@ -14,6 +14,7 @@ import (
// Package represents an application or library that has been bundled into a distributable format. // Package represents an application or library that has been bundled into a distributable format.
// TODO: if we ignore FoundBy for ID generation should we merge the field to show it was found in two places? // TODO: if we ignore FoundBy for ID generation should we merge the field to show it was found in two places?
type Package struct { type Package struct {
id artifact.ID `hash:"ignore"`
Name string // the package name Name string // the package name
Version string // the version of the package Version string // the version of the package
FoundBy string // the specific cataloger that discovered this package FoundBy string // the specific cataloger that discovered this package
@ -21,24 +22,27 @@ type Package struct {
Licenses []string // licenses discovered with the package metadata Licenses []string // licenses discovered with the package metadata
Language Language // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc) Language Language // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc)
Type Type // the package type (e.g. Npm, Yarn, Python, Rpm, Deb, etc) Type Type // the package type (e.g. Npm, Yarn, Python, Rpm, Deb, etc)
CPEs []CPE // all possible Common Platform Enumerators CPEs []CPE `hash:"ignore"` // all possible Common Platform Enumerators (note: this is NOT included in the definition of the ID since all fields on a CPE are derived from other fields)
PURL string // the Package URL (see https://github.com/package-url/purl-spec) PURL string `hash:"ignore"` // the Package URL (see https://github.com/package-url/purl-spec) (note: this is NOT included in the definition of the ID since all fields on a pURL are derived from other fields)
MetadataType MetadataType // the shape of the additional data in the "metadata" field MetadataType MetadataType // the shape of the additional data in the "metadata" field
Metadata interface{} // additional data found while parsing the package source Metadata interface{} // additional data found while parsing the package source
} }
func (p Package) ID() artifact.ID { func (p *Package) SetID() {
f, err := artifact.IDFromHash(p) id, err := artifact.IDByHash(p)
if err != nil { if err != nil {
// TODO: what to do in this case? // TODO: what to do in this case?
log.Warnf("unable to get fingerprint of package=%s@%s: %+v", p.Name, p.Version, err) log.Warnf("unable to get fingerprint of package=%s@%s: %+v", p.Name, p.Version, err)
return "" return
} }
p.id = id
}
return f func (p Package) ID() artifact.ID {
return p.id
} }
// Stringer to represent a package. // Stringer to represent a package.
func (p Package) String() string { func (p Package) String() string {
return fmt.Sprintf("Pkg(type=%s, name=%s, version=%s)", p.Type, p.Name, p.Version) return fmt.Sprintf("Pkg(name=%q version=%q type=%q id=%q)", p.Name, p.Version, p.Type, p.id)
} }

View File

@ -127,20 +127,20 @@ func TestFingerprint(t *testing.T) {
expectIdentical: false, expectIdentical: false,
}, },
{ {
name: "CPEs is reflected", name: "CPEs is ignored",
transform: func(pkg Package) Package { transform: func(pkg Package) Package {
pkg.CPEs = []CPE{} pkg.CPEs = []CPE{}
return pkg return pkg
}, },
expectIdentical: false, expectIdentical: true,
}, },
{ {
name: "pURL is reflected", name: "pURL is ignored",
transform: func(pkg Package) Package { transform: func(pkg Package) Package {
pkg.PURL = "new!" pkg.PURL = "new!"
return pkg return pkg
}, },
expectIdentical: false, expectIdentical: true,
}, },
{ {
name: "language is reflected", name: "language is reflected",
@ -190,7 +190,10 @@ func TestFingerprint(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
originalPkg.SetID()
transformedPkg := test.transform(originalPkg) transformedPkg := test.transform(originalPkg)
transformedPkg.SetID()
originalFingerprint := originalPkg.ID() originalFingerprint := originalPkg.ID()
assert.NotEmpty(t, originalFingerprint) assert.NotEmpty(t, originalFingerprint)
transformedFingerprint := transformedPkg.ID() transformedFingerprint := transformedPkg.ID()

View File

@ -27,11 +27,11 @@ func RelationshipsByFileOwnership(catalog *Catalog) []artifact.Relationship {
var relationships = findOwnershipByFilesRelationships(catalog) var relationships = findOwnershipByFilesRelationships(catalog)
var edges []artifact.Relationship var edges []artifact.Relationship
for parent, children := range relationships { for parentID, children := range relationships {
for child, files := range children { for childID, files := range children {
edges = append(edges, artifact.Relationship{ edges = append(edges, artifact.Relationship{
From: catalog.byID[parent], From: catalog.byID[parentID],
To: catalog.byID[child], To: catalog.byID[childID],
Type: artifact.OwnershipByFileOverlapRelationship, Type: artifact.OwnershipByFileOverlapRelationship,
Data: ownershipByFilesMetadata{ Data: ownershipByFilesMetadata{
Files: files.List(), Files: files.List(),

View File

@ -32,6 +32,7 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
}, },
}, },
} }
parent.SetID()
child := Package{ child := Package{
Locations: []source.Location{ Locations: []source.Location{
@ -40,6 +41,7 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
}, },
Type: NpmPkg, Type: NpmPkg,
} }
child.SetID()
relationship := artifact.Relationship{ relationship := artifact.Relationship{
From: parent, From: parent,
@ -73,6 +75,7 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
}, },
}, },
} }
parent.SetID()
child := Package{ child := Package{
Locations: []source.Location{ Locations: []source.Location{
@ -81,6 +84,7 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
}, },
Type: NpmPkg, Type: NpmPkg,
} }
child.SetID()
relationship := artifact.Relationship{ relationship := artifact.Relationship{
From: parent, From: parent,
@ -114,6 +118,8 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
}, },
} }
parent.SetID()
child := Package{ child := Package{
Locations: []source.Location{ Locations: []source.Location{
source.NewVirtualLocation("/c/path", "/another/path"), source.NewVirtualLocation("/c/path", "/another/path"),
@ -122,6 +128,8 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
Type: NpmPkg, Type: NpmPkg,
} }
child.SetID()
return []Package{parent, child}, nil return []Package{parent, child}, nil
}, },
}, },

View File

@ -27,7 +27,7 @@ func NewCoordinateSet(start ...Coordinates) CoordinateSet {
} }
func (c Coordinates) ID() artifact.ID { func (c Coordinates) ID() artifact.ID {
f, err := artifact.IDFromHash(c) f, err := artifact.IDByHash(c)
if err != nil { if err != nil {
// TODO: what to do in this case? // TODO: what to do in this case?
log.Warnf("unable to get fingerprint of location coordinate=%+v: %+v", c, err) log.Warnf("unable to get fingerprint of location coordinate=%+v: %+v", c, err)

View File

@ -3,21 +3,19 @@ package source
import ( import (
"fmt" "fmt"
"github.com/mitchellh/hashstructure/v2"
"github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/stereoscope/pkg/image" "github.com/anchore/stereoscope/pkg/image"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
) )
var _ hashstructure.Hashable = (*Location)(nil)
// Location represents a path relative to a particular filesystem resolved to a specific file.Reference. This struct is used as a key // Location represents a path relative to a particular filesystem resolved to a specific file.Reference. This struct is used as a key
// in content fetching to uniquely identify a file relative to a request (the VirtualPath). // in content fetching to uniquely identify a file relative to a request (the VirtualPath).
type Location struct { type Location struct {
Coordinates Coordinates
VirtualPath string // The path to the file which may or may not have hardlinks / symlinks // note: it is IMPORTANT to ignore anything but the coordinates for a Location when considering the ID (hash value)
ref file.Reference // The file reference relative to the stereoscope.FileCatalog that has more information about this location. // since the coordinates are the minimally correct ID for a location (symlinks should not come into play)
VirtualPath string `hash:"ignore"` // The path to the file which may or may not have hardlinks / symlinks
ref file.Reference `hash:"ignore"` // The file reference relative to the stereoscope.FileCatalog that has more information about this location.
} }
// NewLocation creates a new Location representing a path without denoting a filesystem or FileCatalog reference. // NewLocation creates a new Location representing a path without denoting a filesystem or FileCatalog reference.
@ -97,9 +95,3 @@ func (l Location) String() string {
} }
return fmt.Sprintf("Location<%s>", str) return fmt.Sprintf("Location<%s>", str)
} }
func (l Location) Hash() (uint64, error) {
// since location is part of the package definition it is important that only coordinates are used during object
// hashing. (Location hash should be a pass-through for the coordinates and not include ref or VirtualPath.)
return hashstructure.Hash(l.ID(), hashstructure.FormatV2, nil)
}

View File

@ -0,0 +1,47 @@
package source
import (
"testing"
"github.com/anchore/stereoscope/pkg/file"
"github.com/stretchr/testify/assert"
)
func TestLocation_ID(t *testing.T) {
tests := []struct {
name string
coordinates Coordinates
virtualPath string
ref file.Reference
}{
{
name: "coordinates should match location hash",
coordinates: Coordinates{
RealPath: "path!",
FileSystemID: "filesystem!",
},
},
{
name: "coordinates should match location hash (with extra fields)",
coordinates: Coordinates{
RealPath: "path!",
FileSystemID: "filesystem!",
},
virtualPath: "virtualPath!",
ref: file.Reference{
RealPath: "other-real-path!",
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
l := Location{
Coordinates: test.coordinates,
VirtualPath: test.virtualPath,
ref: test.ref,
}
assert.Equal(t, l.ID(), test.coordinates.ID())
})
}
}