Add filters to package cataloger (#1021)

* Add filters to package cataloger

This PR adds filters so a package without name or version doesn't go in
the list of all discovered packages.

Integration and cli tests were added to validate the feature.

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* add nolint:funlen to cataloger/catalog.go

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* don't require package version

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* add package filtering to generic and python cataloger

also removes cli tests in favor of integration and unit tests

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* drop nolint:funlen

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* check for no-removal operation

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* remove unused fixtures

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* rename no-version file to hide semantic version

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* drop integration tests and add pkg func for validation

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* python cataloger use global pkg validation

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* check for valid packages on deb/go/rpm catalogers

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* update rpm cataloger after rebase

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* nit with pointers

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* simpler use of package validation

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* remmove double pkg validations

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* rename func param to artifactsToExclude

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* add test for relationships and bug fix

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* feedback changes

Signed-off-by: Jonas Xavier <jonasx@anchore.com>
This commit is contained in:
Jonas Xavier 2022-06-03 10:17:43 -07:00 committed by GitHub
parent 82de24cf7c
commit caff67289a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 269 additions and 26 deletions

View File

@ -56,18 +56,46 @@ func (c *GenericCataloger) Catalog(resolver source.FileResolver) ([]pkg.Package,
continue
}
pkgsForRemoval := make(map[artifact.ID]struct{})
var cleanedRelationships []artifact.Relationship
for _, p := range discoveredPackages {
p.FoundBy = c.upstreamCataloger
p.Locations.Add(location)
p.SetID()
// doing it here so all packages have an ID,
// IDs are later used to remove relationships
if !pkg.IsValid(p) {
pkgsForRemoval[p.ID()] = struct{}{}
continue
}
packages = append(packages, *p)
}
relationships = append(relationships, discoveredRelationships...)
cleanedRelationships = removeRelationshipsWithArtifactIDs(pkgsForRemoval, discoveredRelationships)
relationships = append(relationships, cleanedRelationships...)
}
return packages, relationships, nil
}
func removeRelationshipsWithArtifactIDs(artifactsToExclude map[artifact.ID]struct{}, relationships []artifact.Relationship) []artifact.Relationship {
if len(artifactsToExclude) == 0 || len(relationships) == 0 {
// no removal to do
return relationships
}
var cleanedRelationships []artifact.Relationship
for _, r := range relationships {
_, removeTo := artifactsToExclude[r.To.ID()]
_, removaFrom := artifactsToExclude[r.From.ID()]
if !removeTo && !removaFrom {
cleanedRelationships = append(cleanedRelationships, r)
}
}
return cleanedRelationships
}
// SelectFiles takes a set of file trees and resolves and file references of interest for future cataloging
func (c *GenericCataloger) selectFiles(resolver source.FilePathResolver) map[source.Location]ParserFn {
var parserByLocation = make(map[source.Location]ParserFn)

View File

@ -7,28 +7,31 @@ import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
)
func parser(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
contents, err := ioutil.ReadAll(reader)
if err != nil {
panic(err)
}
return []*pkg.Package{
{
Name: string(contents),
},
}, nil, nil
}
func TestGenericCataloger(t *testing.T) {
allParsedPathes := make(map[string]bool)
parser := func(path string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
allParsedPathes[path] = true
contents, err := ioutil.ReadAll(reader)
require.NoError(t, err)
p := &pkg.Package{Name: string(contents)}
r := artifact.Relationship{From: p, To: p,
Type: artifact.ContainsRelationship,
}
return []*pkg.Package{p}, []artifact.Relationship{r}, nil
}
globParsers := map[string]ParserFn{
"**/a-path.txt": parser,
"**/empty.txt": parser,
}
pathParsers := map[string]ParserFn{
"test-fixtures/another-path.txt": parser,
@ -36,21 +39,27 @@ func TestGenericCataloger(t *testing.T) {
}
upstream := "some-other-cataloger"
expectedSelection := []string{"test-fixtures/last/path.txt", "test-fixtures/another-path.txt", "test-fixtures/a-path.txt"}
expectedSelection := []string{"test-fixtures/last/path.txt", "test-fixtures/another-path.txt", "test-fixtures/a-path.txt", "test-fixtures/empty.txt"}
resolver := source.NewMockResolverForPaths(expectedSelection...)
cataloger := NewGenericCataloger(pathParsers, globParsers, upstream)
actualPkgs, relationships, err := cataloger.Catalog(resolver)
assert.NoError(t, err)
expectedPkgs := make(map[string]pkg.Package)
for _, path := range expectedSelection {
require.True(t, allParsedPathes[path])
expectedPkgs[path] = pkg.Package{
FoundBy: upstream,
Name: fmt.Sprintf("%s file contents!", path),
}
}
actualPkgs, _, err := cataloger.Catalog(resolver)
assert.NoError(t, err)
assert.Len(t, actualPkgs, len(expectedPkgs))
assert.Len(t, allParsedPathes, len(expectedSelection))
// empty.txt won't become a package
assert.Len(t, actualPkgs, len(expectedPkgs)-1)
// right now, a relationship is created for each package, but if the relationship includes an invalid package it should be dropped.
assert.Len(t, relationships, len(actualPkgs))
for _, p := range actualPkgs {
ref := p.Locations.ToSlice()[0]
@ -69,3 +78,99 @@ func TestGenericCataloger(t *testing.T) {
}
}
}
func Test_removeRelationshipsWithArtifactIDs(t *testing.T) {
one := &pkg.Package{Name: "one", Version: "1.0"}
two := &pkg.Package{Name: "two", Version: "1.0"}
three := &pkg.Package{Name: "three", Version: "1.0"}
four := &pkg.Package{Name: "four", Version: "bla"}
five := &pkg.Package{Name: "five", Version: "1.0"}
pkgs := make([]artifact.Identifiable, 0)
for _, p := range []*pkg.Package{one, two, three, four, five} {
// IDs are necessary for comparison
p.SetID()
pkgs = append(pkgs, p)
}
type args struct {
remove map[artifact.ID]struct{}
relationships []artifact.Relationship
}
tests := []struct {
name string
args args
want []artifact.Relationship
}{
{
name: "nothing-to-remove",
args: args{
relationships: []artifact.Relationship{
{From: one, To: two},
},
},
want: []artifact.Relationship{
{From: one, To: two},
},
},
{
name: "remove-all-relationships",
args: args{
remove: map[artifact.ID]struct{}{
one.ID(): {},
three.ID(): {},
},
relationships: []artifact.Relationship{
{From: one, To: two},
{From: two, To: three},
{From: three, To: four},
},
},
want: []artifact.Relationship(nil),
},
{
name: "remove-half-of-relationships",
args: args{
remove: map[artifact.ID]struct{}{
one.ID(): {},
},
relationships: []artifact.Relationship{
{From: one, To: two},
{From: one, To: three},
{From: two, To: three},
{From: three, To: four},
},
},
want: []artifact.Relationship{
{From: two, To: three},
{From: three, To: four},
},
},
{
name: "remove-repeated-relationships",
args: args{
remove: map[artifact.ID]struct{}{
one.ID(): {},
two.ID(): {},
},
relationships: []artifact.Relationship{
{From: one, To: two},
{From: one, To: three},
{From: two, To: three},
{From: two, To: three},
{From: three, To: four},
{From: four, To: five},
},
},
want: []artifact.Relationship{
{From: three, To: four},
{From: four, To: five},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equalf(t, tt.want, removeRelationshipsWithArtifactIDs(tt.args.remove, tt.args.relationships), "removeRelationshipsWithArtifactIDs(%v, %v)", tt.args.remove, tt.args.relationships)
})
}
}

View File

@ -20,8 +20,8 @@ var (
sourceRegexp = regexp.MustCompile(`(?P<name>\S+)( \((?P<version>.*)\))?`)
)
func newDpkgPackage(d pkg.DpkgMetadata) pkg.Package {
return pkg.Package{
func newDpkgPackage(d pkg.DpkgMetadata) *pkg.Package {
return &pkg.Package{
Name: d.Package,
Version: d.Version,
Type: pkg.DebPkg,
@ -46,8 +46,9 @@ func parseDpkgStatus(reader io.Reader) ([]pkg.Package, error) {
}
}
if entry.Package != "" {
packages = append(packages, newDpkgPackage(entry))
p := newDpkgPackage(entry)
if pkg.IsValid(p) {
packages = append(packages, *p)
}
}

View File

@ -121,6 +121,10 @@ func TestMultiplePackages(t *testing.T) {
{
name: "Test Multiple Package",
expected: []pkg.DpkgMetadata{
{
Package: "no-version",
Files: []pkg.DpkgFileRecord{},
},
{
Package: "tzdata",
Version: "2020a-0+deb10u1",
@ -209,7 +213,7 @@ func TestMultiplePackages(t *testing.T) {
t.Fatal("Unable to read file contents: ", err)
}
if len(pkgs) != 2 {
if len(pkgs) != 3 {
t.Fatalf("unexpected number of entries: %d", len(pkgs))
}

View File

@ -1,3 +1,5 @@
Package:
Package: libpam-runtime
Status: install ok installed
Priority: required

View File

@ -0,0 +1 @@
Package:

View File

@ -1,3 +1,7 @@
Package:
Package: no-version
Package: tzdata
Status: install ok installed
Priority: required

View File

@ -172,8 +172,10 @@ func buildGoPkgInfo(location source.Location, mod *debug.BuildInfo, arch string)
if dep == nil {
continue
}
pkgs = append(pkgs, newGoBinaryPackage(dep, mod.GoVersion, arch, location, nil))
p := newGoBinaryPackage(dep, mod.GoVersion, arch, location, nil)
if pkg.IsValid(&p) {
pkgs = append(pkgs, p)
}
}
// NOTE(jonasagx): this use happened originally while creating unit tests. It might never

View File

@ -161,6 +161,38 @@ func TestBuildGoPkgInfo(t *testing.T) {
mod: nil,
expected: []pkg.Package(nil),
},
{
name: "package without name",
mod: &debug.BuildInfo{
Deps: []*debug.Module{
{
Path: "github.com/adrg/xdg",
},
{
Path: "",
Version: "v0.2.1",
},
},
},
expected: []pkg.Package{
{
Name: "github.com/adrg/xdg",
FoundBy: catalogerName,
Language: pkg.Go,
Type: pkg.GoModulePkg,
Locations: source.NewLocationSet(
source.Location{
Coordinates: source.Coordinates{
RealPath: "/a-path",
FileSystemID: "layer-id",
},
},
),
MetadataType: pkg.GolangBinMetadataType,
Metadata: pkg.GolangBinMetadata{},
},
},
},
{
name: "buildGoPkgInfo parses a blank mod and returns no packages",
mod: &debug.BuildInfo{},

View File

@ -51,7 +51,7 @@ func (c *PackageCataloger) Catalog(resolver source.FileResolver) ([]pkg.Package,
if err != nil {
return nil, nil, fmt.Errorf("unable to catalog python package=%+v: %w", location.RealPath, err)
}
if p != nil {
if pkg.IsValid(p) {
pkgs = append(pkgs, *p)
}
}

View File

@ -14,6 +14,21 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
fixtures []string
expectedPackage pkg.Package
}{
{
name: "egg-file-no-version",
fixtures: []string{"test-fixtures/no-version-py3.8.egg-info"},
expectedPackage: pkg.Package{
Name: "no-version",
Type: pkg.PythonPkg,
Language: pkg.Python,
FoundBy: "python-package-cataloger",
MetadataType: pkg.PythonPackageMetadataType,
Metadata: pkg.PythonPackageMetadata{
Name: "no-version",
SitePackagesRootPath: "test-fixtures",
},
},
},
{
name: "egg-info directory",
fixtures: []string{
@ -169,6 +184,9 @@ func TestIgnorePackage(t *testing.T) {
{
MetadataFixture: "test-fixtures/Python-2.7.egg-info",
},
{
MetadataFixture: "test-fixtures/empty-1.0.0-py3.8.egg-info",
},
}
for _, test := range tests {

View File

@ -0,0 +1 @@
Name: no-version

View File

@ -43,13 +43,19 @@ func parseRpmDB(resolver source.FilePathResolver, dbLocation source.Location, re
return nil, err
}
allPkgs := make([]pkg.Package, 0)
var allPkgs []pkg.Package
for _, entry := range pkgList {
p, err := newPkg(resolver, dbLocation, entry)
if err != nil {
return nil, err
}
if !pkg.IsValid(p) {
continue
}
p.SetID()
allPkgs = append(allPkgs, *p)
}

View File

@ -68,3 +68,14 @@ func (p *Package) merge(other Package) error {
}
return nil
}
// IsValid checks whether a package has the minimum necessary info
// which is a non-empty name.
// The nil-check was added as a helper as often, in this code base, packages
// move between callers as pointers.
// CycloneDX and SPDX define Name as the minimum required info for a valid package:
// * https://spdx.github.io/spdx-spec/package-information/#73-package-version-field
// * https://cyclonedx.org/docs/1.4/json/#components_items_name
func IsValid(p *Package) bool {
return p != nil && p.Name != ""
}

View File

@ -442,3 +442,31 @@ func TestPackage_Merge(t *testing.T) {
})
}
}
func TestIsValid(t *testing.T) {
cases := []struct {
name string
given *Package
want bool
}{
{
name: "nil",
given: nil,
want: false,
},
{
name: "has-name",
given: &Package{Name: "paul"},
want: true,
},
{
name: "has-no-name",
given: &Package{},
want: false,
},
}
for _, c := range cases {
require.Equal(t, c.want, IsValid(c.given), "when package: %s", c.name)
}
}