From caff67289aead9a0ed1ec05c8534c3c440669e42 Mon Sep 17 00:00:00 2001 From: Jonas Xavier Date: Fri, 3 Jun 2022 10:17:43 -0700 Subject: [PATCH] Add filters to package cataloger (#1021) * Add filters to package cataloger This PR adds filters so a package without name or version doesn't go in the list of all discovered packages. Integration and cli tests were added to validate the feature. Signed-off-by: Jonas Xavier * add nolint:funlen to cataloger/catalog.go Signed-off-by: Jonas Xavier * don't require package version Signed-off-by: Jonas Xavier * add package filtering to generic and python cataloger also removes cli tests in favor of integration and unit tests Signed-off-by: Jonas Xavier * drop nolint:funlen Signed-off-by: Jonas Xavier * check for no-removal operation Signed-off-by: Jonas Xavier * remove unused fixtures Signed-off-by: Jonas Xavier * rename no-version file to hide semantic version Signed-off-by: Jonas Xavier * drop integration tests and add pkg func for validation Signed-off-by: Jonas Xavier * python cataloger use global pkg validation Signed-off-by: Jonas Xavier * check for valid packages on deb/go/rpm catalogers Signed-off-by: Jonas Xavier * update rpm cataloger after rebase Signed-off-by: Jonas Xavier * nit with pointers Signed-off-by: Jonas Xavier * simpler use of package validation Signed-off-by: Jonas Xavier * remmove double pkg validations Signed-off-by: Jonas Xavier * rename func param to artifactsToExclude Signed-off-by: Jonas Xavier * add test for relationships and bug fix Signed-off-by: Jonas Xavier * feedback changes Signed-off-by: Jonas Xavier --- .../pkg/cataloger/common/generic_cataloger.go | 30 +++- .../common/generic_cataloger_test.go | 137 ++++++++++++++++-- .../cataloger/common/test-fixtures/empty.txt | 0 syft/pkg/cataloger/deb/parse_dpkg_status.go | 9 +- .../cataloger/deb/parse_dpkg_status_test.go | 6 +- .../image-dpkg/var/lib/dpkg/status | 2 + .../cataloger/deb/test-fixtures/status/empty | 1 + .../deb/test-fixtures/status/multiple | 4 + syft/pkg/cataloger/golang/parse_go_bin.go | 6 +- .../pkg/cataloger/golang/parse_go_bin_test.go | 32 ++++ .../pkg/cataloger/python/package_cataloger.go | 2 +- .../python/package_cataloger_test.go | 18 +++ .../test-fixtures/empty-1.0.0-py3.8.egg-info | 0 .../test-fixtures/no-version-py3.8.egg-info | 1 + syft/pkg/cataloger/rpmdb/parse_rpmdb.go | 8 +- syft/pkg/package.go | 11 ++ syft/pkg/package_test.go | 28 ++++ 17 files changed, 269 insertions(+), 26 deletions(-) create mode 100644 syft/pkg/cataloger/common/test-fixtures/empty.txt create mode 100644 syft/pkg/cataloger/deb/test-fixtures/status/empty create mode 100644 syft/pkg/cataloger/python/test-fixtures/empty-1.0.0-py3.8.egg-info create mode 100644 syft/pkg/cataloger/python/test-fixtures/no-version-py3.8.egg-info diff --git a/syft/pkg/cataloger/common/generic_cataloger.go b/syft/pkg/cataloger/common/generic_cataloger.go index 859b77ef3..5aa068114 100644 --- a/syft/pkg/cataloger/common/generic_cataloger.go +++ b/syft/pkg/cataloger/common/generic_cataloger.go @@ -56,18 +56,46 @@ func (c *GenericCataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, continue } + pkgsForRemoval := make(map[artifact.ID]struct{}) + var cleanedRelationships []artifact.Relationship for _, p := range discoveredPackages { p.FoundBy = c.upstreamCataloger p.Locations.Add(location) p.SetID() + // doing it here so all packages have an ID, + // IDs are later used to remove relationships + if !pkg.IsValid(p) { + pkgsForRemoval[p.ID()] = struct{}{} + continue + } + packages = append(packages, *p) } - relationships = append(relationships, discoveredRelationships...) + cleanedRelationships = removeRelationshipsWithArtifactIDs(pkgsForRemoval, discoveredRelationships) + relationships = append(relationships, cleanedRelationships...) } return packages, relationships, nil } +func removeRelationshipsWithArtifactIDs(artifactsToExclude map[artifact.ID]struct{}, relationships []artifact.Relationship) []artifact.Relationship { + if len(artifactsToExclude) == 0 || len(relationships) == 0 { + // no removal to do + return relationships + } + + var cleanedRelationships []artifact.Relationship + for _, r := range relationships { + _, removeTo := artifactsToExclude[r.To.ID()] + _, removaFrom := artifactsToExclude[r.From.ID()] + if !removeTo && !removaFrom { + cleanedRelationships = append(cleanedRelationships, r) + } + } + + return cleanedRelationships +} + // SelectFiles takes a set of file trees and resolves and file references of interest for future cataloging func (c *GenericCataloger) selectFiles(resolver source.FilePathResolver) map[source.Location]ParserFn { var parserByLocation = make(map[source.Location]ParserFn) diff --git a/syft/pkg/cataloger/common/generic_cataloger_test.go b/syft/pkg/cataloger/common/generic_cataloger_test.go index 2e3a7234a..6e6a2c729 100644 --- a/syft/pkg/cataloger/common/generic_cataloger_test.go +++ b/syft/pkg/cataloger/common/generic_cataloger_test.go @@ -7,28 +7,31 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/source" ) -func parser(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { - contents, err := ioutil.ReadAll(reader) - if err != nil { - panic(err) - } - return []*pkg.Package{ - { - Name: string(contents), - }, - }, nil, nil -} - func TestGenericCataloger(t *testing.T) { + allParsedPathes := make(map[string]bool) + parser := func(path string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { + allParsedPathes[path] = true + contents, err := ioutil.ReadAll(reader) + require.NoError(t, err) + + p := &pkg.Package{Name: string(contents)} + r := artifact.Relationship{From: p, To: p, + Type: artifact.ContainsRelationship, + } + + return []*pkg.Package{p}, []artifact.Relationship{r}, nil + } globParsers := map[string]ParserFn{ "**/a-path.txt": parser, + "**/empty.txt": parser, } pathParsers := map[string]ParserFn{ "test-fixtures/another-path.txt": parser, @@ -36,21 +39,27 @@ func TestGenericCataloger(t *testing.T) { } upstream := "some-other-cataloger" - expectedSelection := []string{"test-fixtures/last/path.txt", "test-fixtures/another-path.txt", "test-fixtures/a-path.txt"} + expectedSelection := []string{"test-fixtures/last/path.txt", "test-fixtures/another-path.txt", "test-fixtures/a-path.txt", "test-fixtures/empty.txt"} resolver := source.NewMockResolverForPaths(expectedSelection...) cataloger := NewGenericCataloger(pathParsers, globParsers, upstream) + actualPkgs, relationships, err := cataloger.Catalog(resolver) + assert.NoError(t, err) + expectedPkgs := make(map[string]pkg.Package) for _, path := range expectedSelection { + require.True(t, allParsedPathes[path]) expectedPkgs[path] = pkg.Package{ FoundBy: upstream, Name: fmt.Sprintf("%s file contents!", path), } } - actualPkgs, _, err := cataloger.Catalog(resolver) - assert.NoError(t, err) - assert.Len(t, actualPkgs, len(expectedPkgs)) + assert.Len(t, allParsedPathes, len(expectedSelection)) + // empty.txt won't become a package + assert.Len(t, actualPkgs, len(expectedPkgs)-1) + // right now, a relationship is created for each package, but if the relationship includes an invalid package it should be dropped. + assert.Len(t, relationships, len(actualPkgs)) for _, p := range actualPkgs { ref := p.Locations.ToSlice()[0] @@ -69,3 +78,99 @@ func TestGenericCataloger(t *testing.T) { } } } + +func Test_removeRelationshipsWithArtifactIDs(t *testing.T) { + one := &pkg.Package{Name: "one", Version: "1.0"} + two := &pkg.Package{Name: "two", Version: "1.0"} + three := &pkg.Package{Name: "three", Version: "1.0"} + four := &pkg.Package{Name: "four", Version: "bla"} + five := &pkg.Package{Name: "five", Version: "1.0"} + + pkgs := make([]artifact.Identifiable, 0) + for _, p := range []*pkg.Package{one, two, three, four, five} { + // IDs are necessary for comparison + p.SetID() + pkgs = append(pkgs, p) + } + + type args struct { + remove map[artifact.ID]struct{} + relationships []artifact.Relationship + } + tests := []struct { + name string + args args + want []artifact.Relationship + }{ + { + name: "nothing-to-remove", + args: args{ + relationships: []artifact.Relationship{ + {From: one, To: two}, + }, + }, + want: []artifact.Relationship{ + {From: one, To: two}, + }, + }, + { + name: "remove-all-relationships", + args: args{ + remove: map[artifact.ID]struct{}{ + one.ID(): {}, + three.ID(): {}, + }, + relationships: []artifact.Relationship{ + {From: one, To: two}, + {From: two, To: three}, + {From: three, To: four}, + }, + }, + want: []artifact.Relationship(nil), + }, + { + name: "remove-half-of-relationships", + args: args{ + remove: map[artifact.ID]struct{}{ + one.ID(): {}, + }, + relationships: []artifact.Relationship{ + {From: one, To: two}, + {From: one, To: three}, + {From: two, To: three}, + {From: three, To: four}, + }, + }, + want: []artifact.Relationship{ + {From: two, To: three}, + {From: three, To: four}, + }, + }, + { + name: "remove-repeated-relationships", + args: args{ + remove: map[artifact.ID]struct{}{ + one.ID(): {}, + two.ID(): {}, + }, + relationships: []artifact.Relationship{ + {From: one, To: two}, + {From: one, To: three}, + {From: two, To: three}, + {From: two, To: three}, + {From: three, To: four}, + {From: four, To: five}, + }, + }, + want: []artifact.Relationship{ + {From: three, To: four}, + {From: four, To: five}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, removeRelationshipsWithArtifactIDs(tt.args.remove, tt.args.relationships), "removeRelationshipsWithArtifactIDs(%v, %v)", tt.args.remove, tt.args.relationships) + }) + } +} diff --git a/syft/pkg/cataloger/common/test-fixtures/empty.txt b/syft/pkg/cataloger/common/test-fixtures/empty.txt new file mode 100644 index 000000000..e69de29bb diff --git a/syft/pkg/cataloger/deb/parse_dpkg_status.go b/syft/pkg/cataloger/deb/parse_dpkg_status.go index 4766a6060..3be240151 100644 --- a/syft/pkg/cataloger/deb/parse_dpkg_status.go +++ b/syft/pkg/cataloger/deb/parse_dpkg_status.go @@ -20,8 +20,8 @@ var ( sourceRegexp = regexp.MustCompile(`(?P\S+)( \((?P.*)\))?`) ) -func newDpkgPackage(d pkg.DpkgMetadata) pkg.Package { - return pkg.Package{ +func newDpkgPackage(d pkg.DpkgMetadata) *pkg.Package { + return &pkg.Package{ Name: d.Package, Version: d.Version, Type: pkg.DebPkg, @@ -46,8 +46,9 @@ func parseDpkgStatus(reader io.Reader) ([]pkg.Package, error) { } } - if entry.Package != "" { - packages = append(packages, newDpkgPackage(entry)) + p := newDpkgPackage(entry) + if pkg.IsValid(p) { + packages = append(packages, *p) } } diff --git a/syft/pkg/cataloger/deb/parse_dpkg_status_test.go b/syft/pkg/cataloger/deb/parse_dpkg_status_test.go index 0611b2c8e..8fa5b2169 100644 --- a/syft/pkg/cataloger/deb/parse_dpkg_status_test.go +++ b/syft/pkg/cataloger/deb/parse_dpkg_status_test.go @@ -121,6 +121,10 @@ func TestMultiplePackages(t *testing.T) { { name: "Test Multiple Package", expected: []pkg.DpkgMetadata{ + { + Package: "no-version", + Files: []pkg.DpkgFileRecord{}, + }, { Package: "tzdata", Version: "2020a-0+deb10u1", @@ -209,7 +213,7 @@ func TestMultiplePackages(t *testing.T) { t.Fatal("Unable to read file contents: ", err) } - if len(pkgs) != 2 { + if len(pkgs) != 3 { t.Fatalf("unexpected number of entries: %d", len(pkgs)) } diff --git a/syft/pkg/cataloger/deb/test-fixtures/image-dpkg/var/lib/dpkg/status b/syft/pkg/cataloger/deb/test-fixtures/image-dpkg/var/lib/dpkg/status index 692a28706..501e65e55 100644 --- a/syft/pkg/cataloger/deb/test-fixtures/image-dpkg/var/lib/dpkg/status +++ b/syft/pkg/cataloger/deb/test-fixtures/image-dpkg/var/lib/dpkg/status @@ -1,3 +1,5 @@ +Package: + Package: libpam-runtime Status: install ok installed Priority: required diff --git a/syft/pkg/cataloger/deb/test-fixtures/status/empty b/syft/pkg/cataloger/deb/test-fixtures/status/empty new file mode 100644 index 000000000..3a74c4a01 --- /dev/null +++ b/syft/pkg/cataloger/deb/test-fixtures/status/empty @@ -0,0 +1 @@ +Package: \ No newline at end of file diff --git a/syft/pkg/cataloger/deb/test-fixtures/status/multiple b/syft/pkg/cataloger/deb/test-fixtures/status/multiple index 633b362dc..956a1af3e 100644 --- a/syft/pkg/cataloger/deb/test-fixtures/status/multiple +++ b/syft/pkg/cataloger/deb/test-fixtures/status/multiple @@ -1,3 +1,7 @@ +Package: + +Package: no-version + Package: tzdata Status: install ok installed Priority: required diff --git a/syft/pkg/cataloger/golang/parse_go_bin.go b/syft/pkg/cataloger/golang/parse_go_bin.go index 3ec13dc28..20a7e9ca7 100644 --- a/syft/pkg/cataloger/golang/parse_go_bin.go +++ b/syft/pkg/cataloger/golang/parse_go_bin.go @@ -172,8 +172,10 @@ func buildGoPkgInfo(location source.Location, mod *debug.BuildInfo, arch string) if dep == nil { continue } - - pkgs = append(pkgs, newGoBinaryPackage(dep, mod.GoVersion, arch, location, nil)) + p := newGoBinaryPackage(dep, mod.GoVersion, arch, location, nil) + if pkg.IsValid(&p) { + pkgs = append(pkgs, p) + } } // NOTE(jonasagx): this use happened originally while creating unit tests. It might never diff --git a/syft/pkg/cataloger/golang/parse_go_bin_test.go b/syft/pkg/cataloger/golang/parse_go_bin_test.go index 9fddb2050..483c0bedd 100644 --- a/syft/pkg/cataloger/golang/parse_go_bin_test.go +++ b/syft/pkg/cataloger/golang/parse_go_bin_test.go @@ -161,6 +161,38 @@ func TestBuildGoPkgInfo(t *testing.T) { mod: nil, expected: []pkg.Package(nil), }, + { + name: "package without name", + mod: &debug.BuildInfo{ + Deps: []*debug.Module{ + { + Path: "github.com/adrg/xdg", + }, + { + Path: "", + Version: "v0.2.1", + }, + }, + }, + expected: []pkg.Package{ + { + Name: "github.com/adrg/xdg", + FoundBy: catalogerName, + Language: pkg.Go, + Type: pkg.GoModulePkg, + Locations: source.NewLocationSet( + source.Location{ + Coordinates: source.Coordinates{ + RealPath: "/a-path", + FileSystemID: "layer-id", + }, + }, + ), + MetadataType: pkg.GolangBinMetadataType, + Metadata: pkg.GolangBinMetadata{}, + }, + }, + }, { name: "buildGoPkgInfo parses a blank mod and returns no packages", mod: &debug.BuildInfo{}, diff --git a/syft/pkg/cataloger/python/package_cataloger.go b/syft/pkg/cataloger/python/package_cataloger.go index a966e865a..d58fe7ccd 100644 --- a/syft/pkg/cataloger/python/package_cataloger.go +++ b/syft/pkg/cataloger/python/package_cataloger.go @@ -51,7 +51,7 @@ func (c *PackageCataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, if err != nil { return nil, nil, fmt.Errorf("unable to catalog python package=%+v: %w", location.RealPath, err) } - if p != nil { + if pkg.IsValid(p) { pkgs = append(pkgs, *p) } } diff --git a/syft/pkg/cataloger/python/package_cataloger_test.go b/syft/pkg/cataloger/python/package_cataloger_test.go index 7479f9ef3..fa6917ac3 100644 --- a/syft/pkg/cataloger/python/package_cataloger_test.go +++ b/syft/pkg/cataloger/python/package_cataloger_test.go @@ -14,6 +14,21 @@ func TestPythonPackageWheelCataloger(t *testing.T) { fixtures []string expectedPackage pkg.Package }{ + { + name: "egg-file-no-version", + fixtures: []string{"test-fixtures/no-version-py3.8.egg-info"}, + expectedPackage: pkg.Package{ + Name: "no-version", + Type: pkg.PythonPkg, + Language: pkg.Python, + FoundBy: "python-package-cataloger", + MetadataType: pkg.PythonPackageMetadataType, + Metadata: pkg.PythonPackageMetadata{ + Name: "no-version", + SitePackagesRootPath: "test-fixtures", + }, + }, + }, { name: "egg-info directory", fixtures: []string{ @@ -169,6 +184,9 @@ func TestIgnorePackage(t *testing.T) { { MetadataFixture: "test-fixtures/Python-2.7.egg-info", }, + { + MetadataFixture: "test-fixtures/empty-1.0.0-py3.8.egg-info", + }, } for _, test := range tests { diff --git a/syft/pkg/cataloger/python/test-fixtures/empty-1.0.0-py3.8.egg-info b/syft/pkg/cataloger/python/test-fixtures/empty-1.0.0-py3.8.egg-info new file mode 100644 index 000000000..e69de29bb diff --git a/syft/pkg/cataloger/python/test-fixtures/no-version-py3.8.egg-info b/syft/pkg/cataloger/python/test-fixtures/no-version-py3.8.egg-info new file mode 100644 index 000000000..085ae6e87 --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/no-version-py3.8.egg-info @@ -0,0 +1 @@ +Name: no-version diff --git a/syft/pkg/cataloger/rpmdb/parse_rpmdb.go b/syft/pkg/cataloger/rpmdb/parse_rpmdb.go index f09790f5c..bc3e72f51 100644 --- a/syft/pkg/cataloger/rpmdb/parse_rpmdb.go +++ b/syft/pkg/cataloger/rpmdb/parse_rpmdb.go @@ -43,13 +43,19 @@ func parseRpmDB(resolver source.FilePathResolver, dbLocation source.Location, re return nil, err } - allPkgs := make([]pkg.Package, 0) + var allPkgs []pkg.Package for _, entry := range pkgList { p, err := newPkg(resolver, dbLocation, entry) if err != nil { return nil, err } + + if !pkg.IsValid(p) { + continue + } + + p.SetID() allPkgs = append(allPkgs, *p) } diff --git a/syft/pkg/package.go b/syft/pkg/package.go index d39be9912..ccfbbb01f 100644 --- a/syft/pkg/package.go +++ b/syft/pkg/package.go @@ -68,3 +68,14 @@ func (p *Package) merge(other Package) error { } return nil } + +// IsValid checks whether a package has the minimum necessary info +// which is a non-empty name. +// The nil-check was added as a helper as often, in this code base, packages +// move between callers as pointers. +// CycloneDX and SPDX define Name as the minimum required info for a valid package: +// * https://spdx.github.io/spdx-spec/package-information/#73-package-version-field +// * https://cyclonedx.org/docs/1.4/json/#components_items_name +func IsValid(p *Package) bool { + return p != nil && p.Name != "" +} diff --git a/syft/pkg/package_test.go b/syft/pkg/package_test.go index bdbcee8f1..6e95e3896 100644 --- a/syft/pkg/package_test.go +++ b/syft/pkg/package_test.go @@ -442,3 +442,31 @@ func TestPackage_Merge(t *testing.T) { }) } } + +func TestIsValid(t *testing.T) { + cases := []struct { + name string + given *Package + want bool + }{ + { + name: "nil", + given: nil, + want: false, + }, + { + name: "has-name", + given: &Package{Name: "paul"}, + want: true, + }, + { + name: "has-no-name", + given: &Package{}, + want: false, + }, + } + + for _, c := range cases { + require.Equal(t, c.want, IsValid(c.given), "when package: %s", c.name) + } +}