mirror of
https://github.com/anchore/syft.git
synced 2026-02-12 10:36:45 +01:00
Add filters to package cataloger (#1021)
* Add filters to package cataloger This PR adds filters so a package without name or version doesn't go in the list of all discovered packages. Integration and cli tests were added to validate the feature. Signed-off-by: Jonas Xavier <jonasx@anchore.com> * add nolint:funlen to cataloger/catalog.go Signed-off-by: Jonas Xavier <jonasx@anchore.com> * don't require package version Signed-off-by: Jonas Xavier <jonasx@anchore.com> * add package filtering to generic and python cataloger also removes cli tests in favor of integration and unit tests Signed-off-by: Jonas Xavier <jonasx@anchore.com> * drop nolint:funlen Signed-off-by: Jonas Xavier <jonasx@anchore.com> * check for no-removal operation Signed-off-by: Jonas Xavier <jonasx@anchore.com> * remove unused fixtures Signed-off-by: Jonas Xavier <jonasx@anchore.com> * rename no-version file to hide semantic version Signed-off-by: Jonas Xavier <jonasx@anchore.com> * drop integration tests and add pkg func for validation Signed-off-by: Jonas Xavier <jonasx@anchore.com> * python cataloger use global pkg validation Signed-off-by: Jonas Xavier <jonasx@anchore.com> * check for valid packages on deb/go/rpm catalogers Signed-off-by: Jonas Xavier <jonasx@anchore.com> * update rpm cataloger after rebase Signed-off-by: Jonas Xavier <jonasx@anchore.com> * nit with pointers Signed-off-by: Jonas Xavier <jonasx@anchore.com> * simpler use of package validation Signed-off-by: Jonas Xavier <jonasx@anchore.com> * remmove double pkg validations Signed-off-by: Jonas Xavier <jonasx@anchore.com> * rename func param to artifactsToExclude Signed-off-by: Jonas Xavier <jonasx@anchore.com> * add test for relationships and bug fix Signed-off-by: Jonas Xavier <jonasx@anchore.com> * feedback changes Signed-off-by: Jonas Xavier <jonasx@anchore.com>
This commit is contained in:
parent
82de24cf7c
commit
caff67289a
@ -56,18 +56,46 @@ func (c *GenericCataloger) Catalog(resolver source.FileResolver) ([]pkg.Package,
|
||||
continue
|
||||
}
|
||||
|
||||
pkgsForRemoval := make(map[artifact.ID]struct{})
|
||||
var cleanedRelationships []artifact.Relationship
|
||||
for _, p := range discoveredPackages {
|
||||
p.FoundBy = c.upstreamCataloger
|
||||
p.Locations.Add(location)
|
||||
p.SetID()
|
||||
// doing it here so all packages have an ID,
|
||||
// IDs are later used to remove relationships
|
||||
if !pkg.IsValid(p) {
|
||||
pkgsForRemoval[p.ID()] = struct{}{}
|
||||
continue
|
||||
}
|
||||
|
||||
packages = append(packages, *p)
|
||||
}
|
||||
|
||||
relationships = append(relationships, discoveredRelationships...)
|
||||
cleanedRelationships = removeRelationshipsWithArtifactIDs(pkgsForRemoval, discoveredRelationships)
|
||||
relationships = append(relationships, cleanedRelationships...)
|
||||
}
|
||||
return packages, relationships, nil
|
||||
}
|
||||
|
||||
func removeRelationshipsWithArtifactIDs(artifactsToExclude map[artifact.ID]struct{}, relationships []artifact.Relationship) []artifact.Relationship {
|
||||
if len(artifactsToExclude) == 0 || len(relationships) == 0 {
|
||||
// no removal to do
|
||||
return relationships
|
||||
}
|
||||
|
||||
var cleanedRelationships []artifact.Relationship
|
||||
for _, r := range relationships {
|
||||
_, removeTo := artifactsToExclude[r.To.ID()]
|
||||
_, removaFrom := artifactsToExclude[r.From.ID()]
|
||||
if !removeTo && !removaFrom {
|
||||
cleanedRelationships = append(cleanedRelationships, r)
|
||||
}
|
||||
}
|
||||
|
||||
return cleanedRelationships
|
||||
}
|
||||
|
||||
// SelectFiles takes a set of file trees and resolves and file references of interest for future cataloging
|
||||
func (c *GenericCataloger) selectFiles(resolver source.FilePathResolver) map[source.Location]ParserFn {
|
||||
var parserByLocation = make(map[source.Location]ParserFn)
|
||||
|
||||
@ -7,28 +7,31 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/anchore/syft/syft/artifact"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
func parser(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
|
||||
contents, err := ioutil.ReadAll(reader)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return []*pkg.Package{
|
||||
{
|
||||
Name: string(contents),
|
||||
},
|
||||
}, nil, nil
|
||||
}
|
||||
|
||||
func TestGenericCataloger(t *testing.T) {
|
||||
allParsedPathes := make(map[string]bool)
|
||||
parser := func(path string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
|
||||
allParsedPathes[path] = true
|
||||
contents, err := ioutil.ReadAll(reader)
|
||||
require.NoError(t, err)
|
||||
|
||||
p := &pkg.Package{Name: string(contents)}
|
||||
r := artifact.Relationship{From: p, To: p,
|
||||
Type: artifact.ContainsRelationship,
|
||||
}
|
||||
|
||||
return []*pkg.Package{p}, []artifact.Relationship{r}, nil
|
||||
}
|
||||
|
||||
globParsers := map[string]ParserFn{
|
||||
"**/a-path.txt": parser,
|
||||
"**/empty.txt": parser,
|
||||
}
|
||||
pathParsers := map[string]ParserFn{
|
||||
"test-fixtures/another-path.txt": parser,
|
||||
@ -36,21 +39,27 @@ func TestGenericCataloger(t *testing.T) {
|
||||
}
|
||||
upstream := "some-other-cataloger"
|
||||
|
||||
expectedSelection := []string{"test-fixtures/last/path.txt", "test-fixtures/another-path.txt", "test-fixtures/a-path.txt"}
|
||||
expectedSelection := []string{"test-fixtures/last/path.txt", "test-fixtures/another-path.txt", "test-fixtures/a-path.txt", "test-fixtures/empty.txt"}
|
||||
resolver := source.NewMockResolverForPaths(expectedSelection...)
|
||||
cataloger := NewGenericCataloger(pathParsers, globParsers, upstream)
|
||||
|
||||
actualPkgs, relationships, err := cataloger.Catalog(resolver)
|
||||
assert.NoError(t, err)
|
||||
|
||||
expectedPkgs := make(map[string]pkg.Package)
|
||||
for _, path := range expectedSelection {
|
||||
require.True(t, allParsedPathes[path])
|
||||
expectedPkgs[path] = pkg.Package{
|
||||
FoundBy: upstream,
|
||||
Name: fmt.Sprintf("%s file contents!", path),
|
||||
}
|
||||
}
|
||||
|
||||
actualPkgs, _, err := cataloger.Catalog(resolver)
|
||||
assert.NoError(t, err)
|
||||
assert.Len(t, actualPkgs, len(expectedPkgs))
|
||||
assert.Len(t, allParsedPathes, len(expectedSelection))
|
||||
// empty.txt won't become a package
|
||||
assert.Len(t, actualPkgs, len(expectedPkgs)-1)
|
||||
// right now, a relationship is created for each package, but if the relationship includes an invalid package it should be dropped.
|
||||
assert.Len(t, relationships, len(actualPkgs))
|
||||
|
||||
for _, p := range actualPkgs {
|
||||
ref := p.Locations.ToSlice()[0]
|
||||
@ -69,3 +78,99 @@ func TestGenericCataloger(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Test_removeRelationshipsWithArtifactIDs(t *testing.T) {
|
||||
one := &pkg.Package{Name: "one", Version: "1.0"}
|
||||
two := &pkg.Package{Name: "two", Version: "1.0"}
|
||||
three := &pkg.Package{Name: "three", Version: "1.0"}
|
||||
four := &pkg.Package{Name: "four", Version: "bla"}
|
||||
five := &pkg.Package{Name: "five", Version: "1.0"}
|
||||
|
||||
pkgs := make([]artifact.Identifiable, 0)
|
||||
for _, p := range []*pkg.Package{one, two, three, four, five} {
|
||||
// IDs are necessary for comparison
|
||||
p.SetID()
|
||||
pkgs = append(pkgs, p)
|
||||
}
|
||||
|
||||
type args struct {
|
||||
remove map[artifact.ID]struct{}
|
||||
relationships []artifact.Relationship
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want []artifact.Relationship
|
||||
}{
|
||||
{
|
||||
name: "nothing-to-remove",
|
||||
args: args{
|
||||
relationships: []artifact.Relationship{
|
||||
{From: one, To: two},
|
||||
},
|
||||
},
|
||||
want: []artifact.Relationship{
|
||||
{From: one, To: two},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "remove-all-relationships",
|
||||
args: args{
|
||||
remove: map[artifact.ID]struct{}{
|
||||
one.ID(): {},
|
||||
three.ID(): {},
|
||||
},
|
||||
relationships: []artifact.Relationship{
|
||||
{From: one, To: two},
|
||||
{From: two, To: three},
|
||||
{From: three, To: four},
|
||||
},
|
||||
},
|
||||
want: []artifact.Relationship(nil),
|
||||
},
|
||||
{
|
||||
name: "remove-half-of-relationships",
|
||||
args: args{
|
||||
remove: map[artifact.ID]struct{}{
|
||||
one.ID(): {},
|
||||
},
|
||||
relationships: []artifact.Relationship{
|
||||
{From: one, To: two},
|
||||
{From: one, To: three},
|
||||
{From: two, To: three},
|
||||
{From: three, To: four},
|
||||
},
|
||||
},
|
||||
want: []artifact.Relationship{
|
||||
{From: two, To: three},
|
||||
{From: three, To: four},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "remove-repeated-relationships",
|
||||
args: args{
|
||||
remove: map[artifact.ID]struct{}{
|
||||
one.ID(): {},
|
||||
two.ID(): {},
|
||||
},
|
||||
relationships: []artifact.Relationship{
|
||||
{From: one, To: two},
|
||||
{From: one, To: three},
|
||||
{From: two, To: three},
|
||||
{From: two, To: three},
|
||||
{From: three, To: four},
|
||||
{From: four, To: five},
|
||||
},
|
||||
},
|
||||
want: []artifact.Relationship{
|
||||
{From: three, To: four},
|
||||
{From: four, To: five},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
assert.Equalf(t, tt.want, removeRelationshipsWithArtifactIDs(tt.args.remove, tt.args.relationships), "removeRelationshipsWithArtifactIDs(%v, %v)", tt.args.remove, tt.args.relationships)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
0
syft/pkg/cataloger/common/test-fixtures/empty.txt
Normal file
0
syft/pkg/cataloger/common/test-fixtures/empty.txt
Normal file
@ -20,8 +20,8 @@ var (
|
||||
sourceRegexp = regexp.MustCompile(`(?P<name>\S+)( \((?P<version>.*)\))?`)
|
||||
)
|
||||
|
||||
func newDpkgPackage(d pkg.DpkgMetadata) pkg.Package {
|
||||
return pkg.Package{
|
||||
func newDpkgPackage(d pkg.DpkgMetadata) *pkg.Package {
|
||||
return &pkg.Package{
|
||||
Name: d.Package,
|
||||
Version: d.Version,
|
||||
Type: pkg.DebPkg,
|
||||
@ -46,8 +46,9 @@ func parseDpkgStatus(reader io.Reader) ([]pkg.Package, error) {
|
||||
}
|
||||
}
|
||||
|
||||
if entry.Package != "" {
|
||||
packages = append(packages, newDpkgPackage(entry))
|
||||
p := newDpkgPackage(entry)
|
||||
if pkg.IsValid(p) {
|
||||
packages = append(packages, *p)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -121,6 +121,10 @@ func TestMultiplePackages(t *testing.T) {
|
||||
{
|
||||
name: "Test Multiple Package",
|
||||
expected: []pkg.DpkgMetadata{
|
||||
{
|
||||
Package: "no-version",
|
||||
Files: []pkg.DpkgFileRecord{},
|
||||
},
|
||||
{
|
||||
Package: "tzdata",
|
||||
Version: "2020a-0+deb10u1",
|
||||
@ -209,7 +213,7 @@ func TestMultiplePackages(t *testing.T) {
|
||||
t.Fatal("Unable to read file contents: ", err)
|
||||
}
|
||||
|
||||
if len(pkgs) != 2 {
|
||||
if len(pkgs) != 3 {
|
||||
t.Fatalf("unexpected number of entries: %d", len(pkgs))
|
||||
}
|
||||
|
||||
|
||||
@ -1,3 +1,5 @@
|
||||
Package:
|
||||
|
||||
Package: libpam-runtime
|
||||
Status: install ok installed
|
||||
Priority: required
|
||||
|
||||
1
syft/pkg/cataloger/deb/test-fixtures/status/empty
Normal file
1
syft/pkg/cataloger/deb/test-fixtures/status/empty
Normal file
@ -0,0 +1 @@
|
||||
Package:
|
||||
@ -1,3 +1,7 @@
|
||||
Package:
|
||||
|
||||
Package: no-version
|
||||
|
||||
Package: tzdata
|
||||
Status: install ok installed
|
||||
Priority: required
|
||||
|
||||
@ -172,8 +172,10 @@ func buildGoPkgInfo(location source.Location, mod *debug.BuildInfo, arch string)
|
||||
if dep == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
pkgs = append(pkgs, newGoBinaryPackage(dep, mod.GoVersion, arch, location, nil))
|
||||
p := newGoBinaryPackage(dep, mod.GoVersion, arch, location, nil)
|
||||
if pkg.IsValid(&p) {
|
||||
pkgs = append(pkgs, p)
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE(jonasagx): this use happened originally while creating unit tests. It might never
|
||||
|
||||
@ -161,6 +161,38 @@ func TestBuildGoPkgInfo(t *testing.T) {
|
||||
mod: nil,
|
||||
expected: []pkg.Package(nil),
|
||||
},
|
||||
{
|
||||
name: "package without name",
|
||||
mod: &debug.BuildInfo{
|
||||
Deps: []*debug.Module{
|
||||
{
|
||||
Path: "github.com/adrg/xdg",
|
||||
},
|
||||
{
|
||||
Path: "",
|
||||
Version: "v0.2.1",
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: []pkg.Package{
|
||||
{
|
||||
Name: "github.com/adrg/xdg",
|
||||
FoundBy: catalogerName,
|
||||
Language: pkg.Go,
|
||||
Type: pkg.GoModulePkg,
|
||||
Locations: source.NewLocationSet(
|
||||
source.Location{
|
||||
Coordinates: source.Coordinates{
|
||||
RealPath: "/a-path",
|
||||
FileSystemID: "layer-id",
|
||||
},
|
||||
},
|
||||
),
|
||||
MetadataType: pkg.GolangBinMetadataType,
|
||||
Metadata: pkg.GolangBinMetadata{},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "buildGoPkgInfo parses a blank mod and returns no packages",
|
||||
mod: &debug.BuildInfo{},
|
||||
|
||||
@ -51,7 +51,7 @@ func (c *PackageCataloger) Catalog(resolver source.FileResolver) ([]pkg.Package,
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("unable to catalog python package=%+v: %w", location.RealPath, err)
|
||||
}
|
||||
if p != nil {
|
||||
if pkg.IsValid(p) {
|
||||
pkgs = append(pkgs, *p)
|
||||
}
|
||||
}
|
||||
|
||||
@ -14,6 +14,21 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
|
||||
fixtures []string
|
||||
expectedPackage pkg.Package
|
||||
}{
|
||||
{
|
||||
name: "egg-file-no-version",
|
||||
fixtures: []string{"test-fixtures/no-version-py3.8.egg-info"},
|
||||
expectedPackage: pkg.Package{
|
||||
Name: "no-version",
|
||||
Type: pkg.PythonPkg,
|
||||
Language: pkg.Python,
|
||||
FoundBy: "python-package-cataloger",
|
||||
MetadataType: pkg.PythonPackageMetadataType,
|
||||
Metadata: pkg.PythonPackageMetadata{
|
||||
Name: "no-version",
|
||||
SitePackagesRootPath: "test-fixtures",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "egg-info directory",
|
||||
fixtures: []string{
|
||||
@ -169,6 +184,9 @@ func TestIgnorePackage(t *testing.T) {
|
||||
{
|
||||
MetadataFixture: "test-fixtures/Python-2.7.egg-info",
|
||||
},
|
||||
{
|
||||
MetadataFixture: "test-fixtures/empty-1.0.0-py3.8.egg-info",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
||||
@ -0,0 +1 @@
|
||||
Name: no-version
|
||||
@ -43,13 +43,19 @@ func parseRpmDB(resolver source.FilePathResolver, dbLocation source.Location, re
|
||||
return nil, err
|
||||
}
|
||||
|
||||
allPkgs := make([]pkg.Package, 0)
|
||||
var allPkgs []pkg.Package
|
||||
|
||||
for _, entry := range pkgList {
|
||||
p, err := newPkg(resolver, dbLocation, entry)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if !pkg.IsValid(p) {
|
||||
continue
|
||||
}
|
||||
|
||||
p.SetID()
|
||||
allPkgs = append(allPkgs, *p)
|
||||
}
|
||||
|
||||
|
||||
@ -68,3 +68,14 @@ func (p *Package) merge(other Package) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsValid checks whether a package has the minimum necessary info
|
||||
// which is a non-empty name.
|
||||
// The nil-check was added as a helper as often, in this code base, packages
|
||||
// move between callers as pointers.
|
||||
// CycloneDX and SPDX define Name as the minimum required info for a valid package:
|
||||
// * https://spdx.github.io/spdx-spec/package-information/#73-package-version-field
|
||||
// * https://cyclonedx.org/docs/1.4/json/#components_items_name
|
||||
func IsValid(p *Package) bool {
|
||||
return p != nil && p.Name != ""
|
||||
}
|
||||
|
||||
@ -442,3 +442,31 @@ func TestPackage_Merge(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsValid(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
given *Package
|
||||
want bool
|
||||
}{
|
||||
{
|
||||
name: "nil",
|
||||
given: nil,
|
||||
want: false,
|
||||
},
|
||||
{
|
||||
name: "has-name",
|
||||
given: &Package{Name: "paul"},
|
||||
want: true,
|
||||
},
|
||||
{
|
||||
name: "has-no-name",
|
||||
given: &Package{},
|
||||
want: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
require.Equal(t, c.want, IsValid(c.given), "when package: %s", c.name)
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user