931: binary cataloger exclusion defaults for ownership by overlap (#1948)

Fixes #931

PR #1948 introduces a new implicit exclusion for binary packages that overlap by file ownership and have certain characteristics:

1) the relationship between packages is OwnershipByFileOverlap
2) the parent package is an "os" package - see changelog for included catalogers
3) the child is a synthetic package generated by the binary cataloger - see changelog for included catalogers
4) the package names are identical

---------

Signed-off-by: Christopher Phillips <christopher.phillips@anchore.com>
This commit is contained in:
Christopher Angelo Phillips 2023-08-08 13:00:52 -04:00 committed by GitHub
parent 2fc65094b7
commit 466da7cbda
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 295 additions and 109 deletions

View File

@ -482,6 +482,10 @@ default-image-pull-source: ""
# - "./out/**/*.json" # - "./out/**/*.json"
exclude: [] exclude: []
# allows users to exclude synthetic binary packages from the sbom
# these packages are removed if an overlap with a non-synthetic package is found
exclude-overlap-by-ownership: true
# os and/or architecture to use when referencing container images (e.g. "windows/armv6" or "arm64") # os and/or architecture to use when referencing container images (e.g. "windows/armv6" or "arm64")
# same as --platform; SYFT_PLATFORM env var # same as --platform; SYFT_PLATFORM env var
platform: "" platform: ""

View File

@ -67,6 +67,7 @@ type Application struct {
Parallelism int `yaml:"parallelism" json:"parallelism" mapstructure:"parallelism"` // the number of catalog workers to run in parallel Parallelism int `yaml:"parallelism" json:"parallelism" mapstructure:"parallelism"` // the number of catalog workers to run in parallel
DefaultImagePullSource string `yaml:"default-image-pull-source" json:"default-image-pull-source" mapstructure:"default-image-pull-source"` // specify default image pull source DefaultImagePullSource string `yaml:"default-image-pull-source" json:"default-image-pull-source" mapstructure:"default-image-pull-source"` // specify default image pull source
BasePath string `yaml:"base-path" json:"base-path" mapstructure:"base-path"` // specify base path for all file paths BasePath string `yaml:"base-path" json:"base-path" mapstructure:"base-path"` // specify base path for all file paths
ExcludeBinaryOverlapByOwnership bool `yaml:"exclude-binary-overlap-by-ownership" json:"exclude-binary-overlap-by-ownership" mapstructure:"exclude-binary-overlap-by-ownership"` // exclude synthetic binary packages owned by os package files
} }
func (cfg Application) ToCatalogerConfig() cataloger.Config { func (cfg Application) ToCatalogerConfig() cataloger.Config {
@ -78,6 +79,7 @@ func (cfg Application) ToCatalogerConfig() cataloger.Config {
}, },
Catalogers: cfg.Catalogers, Catalogers: cfg.Catalogers,
Parallelism: cfg.Parallelism, Parallelism: cfg.Parallelism,
ExcludeBinaryOverlapByOwnership: cfg.ExcludeBinaryOverlapByOwnership,
Golang: golangCataloger.NewGoCatalogerOpts(). Golang: golangCataloger.NewGoCatalogerOpts().
WithSearchLocalModCacheLicenses(cfg.Golang.SearchLocalModCacheLicenses). WithSearchLocalModCacheLicenses(cfg.Golang.SearchLocalModCacheLicenses).
WithLocalModCacheDir(cfg.Golang.LocalModCacheDir). WithLocalModCacheDir(cfg.Golang.LocalModCacheDir).
@ -221,6 +223,7 @@ func loadDefaultValues(v *viper.Viper) {
v.SetDefault("catalogers", nil) v.SetDefault("catalogers", nil)
v.SetDefault("parallelism", 1) v.SetDefault("parallelism", 1)
v.SetDefault("default-image-pull-source", "") v.SetDefault("default-image-pull-source", "")
v.SetDefault("exclude-binary-overlap-by-ownership", true)
// for each field in the configuration struct, see if the field implements the defaultValueLoader interface and invoke it if it does // for each field in the configuration struct, see if the field implements the defaultValueLoader interface and invoke it if it does
value := reflect.ValueOf(Application{}) value := reflect.ValueOf(Application{})

View File

@ -74,11 +74,34 @@ func CatalogPackages(src source.Source, cfg cataloger.Config) (*pkg.Collection,
catalog, relationships, err := cataloger.Catalog(resolver, release, cfg.Parallelism, catalogers...) catalog, relationships, err := cataloger.Catalog(resolver, release, cfg.Parallelism, catalogers...)
relationships = append(relationships, newSourceRelationshipsFromCatalog(src, catalog)...) // apply exclusions to the package catalog
// default config value for this is true
// https://github.com/anchore/syft/issues/931
if cfg.ExcludeBinaryOverlapByOwnership {
for _, r := range relationships {
if cataloger.Exclude(r, catalog) {
catalog.Delete(r.To.ID())
relationships = removeRelationshipsByID(relationships, r.To.ID())
}
}
}
// no need to consider source relationships for os -> binary exclusions
relationships = append(relationships, newSourceRelationshipsFromCatalog(src, catalog)...)
return catalog, relationships, release, err return catalog, relationships, release, err
} }
func removeRelationshipsByID(relationships []artifact.Relationship, id artifact.ID) []artifact.Relationship {
// https://github.com/golang/go/wiki/SliceTricks#filtering-without-allocating
filtered := relationships[:0]
for _, r := range relationships {
if r.To.ID() != id && r.From.ID() != id {
filtered = append(filtered, r)
}
}
return filtered
}
func newSourceRelationshipsFromCatalog(src source.Source, c *pkg.Collection) []artifact.Relationship { func newSourceRelationshipsFromCatalog(src source.Source, c *pkg.Collection) []artifact.Relationship {
relationships := make([]artifact.Relationship, 0) // Should we pre-allocate this by giving catalog a Len() method? relationships := make([]artifact.Relationship, 0) // Should we pre-allocate this by giving catalog a Len() method?
for p := range c.Enumerate() { for p := range c.Enumerate() {

View File

@ -5,9 +5,9 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic" "github.com/anchore/syft/syft/pkg/cataloger/generic"
) )
const catalogerName = "alpmdb-cataloger" const CatalogerName = "alpmdb-cataloger"
func NewAlpmdbCataloger() *generic.Cataloger { func NewAlpmdbCataloger() *generic.Cataloger {
return generic.NewCataloger(catalogerName). return generic.NewCataloger(CatalogerName).
WithParserByGlobs(parseAlpmDB, pkg.AlpmDBGlob) WithParserByGlobs(parseAlpmDB, pkg.AlpmDBGlob)
} }

View File

@ -8,10 +8,10 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic" "github.com/anchore/syft/syft/pkg/cataloger/generic"
) )
const catalogerName = "apkdb-cataloger" const CatalogerName = "apkdb-cataloger"
// NewApkdbCataloger returns a new Alpine DB cataloger object. // NewApkdbCataloger returns a new Alpine DB cataloger object.
func NewApkdbCataloger() *generic.Cataloger { func NewApkdbCataloger() *generic.Cataloger {
return generic.NewCataloger(catalogerName). return generic.NewCataloger(CatalogerName).
WithParserByGlobs(parseApkDB, pkg.ApkDBGlob) WithParserByGlobs(parseApkDB, pkg.ApkDBGlob)
} }

View File

@ -7,7 +7,7 @@ import (
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
) )
const catalogerName = "binary-cataloger" const CatalogerName = "binary-cataloger"
func NewCataloger() *Cataloger { func NewCataloger() *Cataloger {
return &Cataloger{} return &Cataloger{}
@ -22,7 +22,7 @@ type Cataloger struct{}
// Name returns a string that uniquely describes the Cataloger // Name returns a string that uniquely describes the Cataloger
func (c Cataloger) Name() string { func (c Cataloger) Name() string {
return catalogerName return CatalogerName
} }
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages // Catalog is given an object to resolve file references and content, this function returns any discovered Packages

View File

@ -31,7 +31,7 @@ func newPackage(classifier classifier, location file.Location, matchMetadata map
), ),
Type: pkg.BinaryPkg, Type: pkg.BinaryPkg,
CPEs: cpes, CPEs: cpes,
FoundBy: catalogerName, FoundBy: CatalogerName,
MetadataType: pkg.BinaryMetadataType, MetadataType: pkg.BinaryMetadataType,
Metadata: pkg.BinaryMetadata{ Metadata: pkg.BinaryMetadata{
Matches: []pkg.ClassifierMatch{ Matches: []pkg.ClassifierMatch{

View File

@ -8,7 +8,6 @@ import (
) )
// TODO: these field naming vs helper function naming schemes are inconsistent. // TODO: these field naming vs helper function naming schemes are inconsistent.
type Config struct { type Config struct {
Search SearchConfig Search SearchConfig
Golang golang.GoCatalogerOpts Golang golang.GoCatalogerOpts
@ -16,15 +15,7 @@ type Config struct {
Python python.CatalogerConfig Python python.CatalogerConfig
Catalogers []string Catalogers []string
Parallelism int Parallelism int
} ExcludeBinaryOverlapByOwnership bool
func DefaultConfig() Config {
return Config{
Search: DefaultSearchConfig(),
Parallelism: 1,
LinuxKernel: kernel.DefaultLinuxCatalogerConfig(),
Python: python.DefaultCatalogerConfig(),
}
} }
func (c Config) Java() java.Config { func (c Config) Java() java.Config {

View File

@ -7,11 +7,11 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic" "github.com/anchore/syft/syft/pkg/cataloger/generic"
) )
const catalogerName = "dpkgdb-cataloger" const CatalogerName = "dpkgdb-cataloger"
// NewDpkgdbCataloger returns a new Deb package cataloger capable of parsing DPKG status DB files. // NewDpkgdbCataloger returns a new Deb package cataloger capable of parsing DPKG status DB files.
func NewDpkgdbCataloger() *generic.Cataloger { func NewDpkgdbCataloger() *generic.Cataloger {
return generic.NewCataloger(catalogerName). return generic.NewCataloger(CatalogerName).
// note: these globs have been intentionally split up in order to improve search performance, // note: these globs have been intentionally split up in order to improve search performance,
// please do NOT combine into: "**/var/lib/dpkg/{status,status.d/*}" // please do NOT combine into: "**/var/lib/dpkg/{status,status.d/*}"
WithParserByGlobs(parseDpkgDB, "**/var/lib/dpkg/status", "**/var/lib/dpkg/status.d/*", "**/lib/opkg/info/*.control", "**/lib/opkg/status") WithParserByGlobs(parseDpkgDB, "**/var/lib/dpkg/status", "**/var/lib/dpkg/status.d/*", "**/lib/opkg/info/*.control", "**/lib/opkg/status")

View File

@ -12,7 +12,7 @@ import (
) )
const ( const (
catalogerName = "nix-store-cataloger" CatalogerName = "nix-store-cataloger"
nixStoreGlob = "**/nix/store/*" nixStoreGlob = "**/nix/store/*"
) )
@ -24,7 +24,7 @@ func NewStoreCataloger() *StoreCataloger {
} }
func (c *StoreCataloger) Name() string { func (c *StoreCataloger) Name() string {
return catalogerName return CatalogerName
} }
func (c *StoreCataloger) Catalog(resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { func (c *StoreCataloger) Catalog(resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {

View File

@ -24,7 +24,7 @@ func TestCataloger_Catalog(t *testing.T) {
Version: "2.34-210", Version: "2.34-210",
PURL: "pkg:nix/glibc@2.34-210?output=bin&outputhash=h0cnbmfcn93xm5dg2x27ixhag1cwndga", PURL: "pkg:nix/glibc@2.34-210?output=bin&outputhash=h0cnbmfcn93xm5dg2x27ixhag1cwndga",
Locations: file.NewLocationSet(file.NewLocation("nix/store/h0cnbmfcn93xm5dg2x27ixhag1cwndga-glibc-2.34-210-bin")), Locations: file.NewLocationSet(file.NewLocation("nix/store/h0cnbmfcn93xm5dg2x27ixhag1cwndga-glibc-2.34-210-bin")),
FoundBy: catalogerName, FoundBy: CatalogerName,
Type: pkg.NixPkg, Type: pkg.NixPkg,
MetadataType: pkg.NixStoreMetadataType, MetadataType: pkg.NixStoreMetadataType,
Metadata: pkg.NixStoreMetadata{ Metadata: pkg.NixStoreMetadata{

View File

@ -10,7 +10,7 @@ func newNixStorePackage(storePath nixStorePath, locations ...file.Location) pkg.
p := pkg.Package{ p := pkg.Package{
Name: storePath.name, Name: storePath.name,
Version: storePath.version, Version: storePath.version,
FoundBy: catalogerName, FoundBy: CatalogerName,
Locations: file.NewLocationSet(locations...), Locations: file.NewLocationSet(locations...),
Type: pkg.NixPkg, Type: pkg.NixPkg,
PURL: packageURL(storePath), PURL: packageURL(storePath),

View File

@ -0,0 +1,55 @@
package cataloger
import (
"golang.org/x/exp/slices"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/alpm"
"github.com/anchore/syft/syft/pkg/cataloger/apkdb"
"github.com/anchore/syft/syft/pkg/cataloger/binary"
"github.com/anchore/syft/syft/pkg/cataloger/deb"
"github.com/anchore/syft/syft/pkg/cataloger/nix"
"github.com/anchore/syft/syft/pkg/cataloger/rpm"
)
var (
osCatalogerTypes = []string{
apkdb.CatalogerName,
alpm.CatalogerName,
deb.CatalogerName,
nix.CatalogerName,
rpm.DBCatalogerName,
rpm.FileCatalogerName,
}
binaryCatalogerTypes = []string{binary.CatalogerName}
)
// Exclude will remove packages from a collection given the following properties are true
// 1) the relationship between packages is OwnershipByFileOverlap
// 2) the parent is an "os" package
// 3) the child is a synthetic package generated by the binary cataloger
// 4) the package names are identical
// This exclude was implemented as a way to help resolve: https://github.com/anchore/syft/issues/931
func Exclude(r artifact.Relationship, c *pkg.Collection) bool {
if artifact.OwnershipByFileOverlapRelationship != r.Type {
return false
}
parent := c.Package(r.From.ID())
if parent == nil {
return false
}
parentInExclusion := slices.Contains(osCatalogerTypes, parent.FoundBy)
if !parentInExclusion {
return false
}
child := c.Package(r.To.ID())
if child == nil {
return false
}
return slices.Contains(binaryCatalogerTypes, child.FoundBy)
}

View File

@ -0,0 +1,78 @@
package cataloger
import (
"testing"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/apkdb"
"github.com/anchore/syft/syft/pkg/cataloger/binary"
)
func TestExclude(t *testing.T) {
packageA := pkg.Package{Name: "package-a", Type: pkg.ApkPkg, FoundBy: apkdb.CatalogerName}
packageB := pkg.Package{Name: "package-a", Type: pkg.PythonPkg, FoundBy: "language-cataloger"}
packageC := pkg.Package{Name: "package-a", Type: pkg.BinaryPkg, FoundBy: binary.CatalogerName}
packageD := pkg.Package{Name: "package-d", Type: pkg.BinaryPkg, FoundBy: binary.CatalogerName}
for _, p := range []*pkg.Package{&packageA, &packageB, &packageC, &packageD} {
p := p
p.SetID()
}
tests := []struct {
name string
relationship artifact.Relationship
packages *pkg.Collection
shouldExclude bool
}{
{
name: "no exclusions from os -> python",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageB,
},
packages: pkg.NewCollection(packageA, packageB),
shouldExclude: false,
},
{
name: "exclusions from os -> binary",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageC,
},
packages: pkg.NewCollection(packageA, packageC),
shouldExclude: true,
},
{
name: "no exclusions from python -> binary",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageB,
To: packageC,
},
packages: pkg.NewCollection(packageB, packageC),
shouldExclude: false,
},
{
name: "no exclusions for different package names",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageD,
},
packages: pkg.NewCollection(packageA, packageD),
shouldExclude: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
if !Exclude(test.relationship, test.packages) && test.shouldExclude {
t.Errorf("expected to exclude relationship %+v", test.relationship)
}
})
}
}

View File

@ -11,6 +11,11 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic" "github.com/anchore/syft/syft/pkg/cataloger/generic"
) )
const (
DBCatalogerName = "rpm-db-cataloger"
FileCatalogerName = "rpm-file-cataloger"
)
// NewRpmDBCataloger returns a new RPM DB cataloger object. // NewRpmDBCataloger returns a new RPM DB cataloger object.
func NewRpmDBCataloger() *generic.Cataloger { func NewRpmDBCataloger() *generic.Cataloger {
// check if a sqlite driver is available // check if a sqlite driver is available
@ -18,14 +23,14 @@ func NewRpmDBCataloger() *generic.Cataloger {
log.Warnf("sqlite driver is not available, newer RPM databases might not be cataloged") log.Warnf("sqlite driver is not available, newer RPM databases might not be cataloged")
} }
return generic.NewCataloger("rpm-db-cataloger"). return generic.NewCataloger(DBCatalogerName).
WithParserByGlobs(parseRpmDB, pkg.RpmDBGlob). WithParserByGlobs(parseRpmDB, pkg.RpmDBGlob).
WithParserByGlobs(parseRpmManifest, pkg.RpmManifestGlob) WithParserByGlobs(parseRpmManifest, pkg.RpmManifestGlob)
} }
// NewFileCataloger returns a new RPM file cataloger object. // NewFileCataloger returns a new RPM file cataloger object.
func NewFileCataloger() *generic.Cataloger { func NewFileCataloger() *generic.Cataloger {
return generic.NewCataloger("rpm-file-cataloger"). return generic.NewCataloger(FileCatalogerName).
WithParserByGlobs(parseRpm, "**/*.rpm") WithParserByGlobs(parseRpm, "**/*.rpm")
} }

View File

@ -104,7 +104,7 @@ func TestPackagesCmdFlags(t *testing.T) {
name: "squashed-scope-flag-hidden-packages", name: "squashed-scope-flag-hidden-packages",
args: []string{"packages", "-o", "json", "-s", "squashed", hiddenPackagesImage}, args: []string{"packages", "-o", "json", "-s", "squashed", hiddenPackagesImage},
assertions: []traitAssertion{ assertions: []traitAssertion{
assertPackageCount(163), assertPackageCount(162),
assertNotInOutput("vsftpd"), // hidden package assertNotInOutput("vsftpd"), // hidden package
assertSuccessfulReturnCode, assertSuccessfulReturnCode,
}, },
@ -113,7 +113,7 @@ func TestPackagesCmdFlags(t *testing.T) {
name: "all-layers-scope-flag", name: "all-layers-scope-flag",
args: []string{"packages", "-o", "json", "-s", "all-layers", hiddenPackagesImage}, args: []string{"packages", "-o", "json", "-s", "all-layers", hiddenPackagesImage},
assertions: []traitAssertion{ assertions: []traitAssertion{
assertPackageCount(164), // packages are now deduplicated for this case assertPackageCount(163), // packages are now deduplicated for this case
assertInOutput("all-layers"), assertInOutput("all-layers"),
assertInOutput("vsftpd"), // hidden package assertInOutput("vsftpd"), // hidden package
assertSuccessfulReturnCode, assertSuccessfulReturnCode,
@ -126,7 +126,7 @@ func TestPackagesCmdFlags(t *testing.T) {
"SYFT_PACKAGE_CATALOGER_SCOPE": "all-layers", "SYFT_PACKAGE_CATALOGER_SCOPE": "all-layers",
}, },
assertions: []traitAssertion{ assertions: []traitAssertion{
assertPackageCount(164), // packages are now deduplicated for this case assertPackageCount(163), // packages are now deduplicated for this case
assertInOutput("all-layers"), assertInOutput("all-layers"),
assertInOutput("vsftpd"), // hidden package assertInOutput("vsftpd"), // hidden package
assertSuccessfulReturnCode, assertSuccessfulReturnCode,

View File

@ -56880,40 +56880,6 @@
] ]
} }
}, },
{
"id": "875f4d287d1bdcfd",
"name": "python",
"version": "3.6.8",
"type": "binary",
"foundBy": "binary-cataloger",
"locations": [
{
"path": "/usr/lib64/libpython3.6m.so.1.0",
"layerID": "sha256:eb29745b8228e1e97c01b1d5c2554a319c00a94d8dd5746a3904222ad65a13f8"
}
],
"licenses": [],
"language": "",
"cpes": [
"cpe:2.3:a:python_software_foundation:python:3.6.8:*:*:*:*:*:*:*",
"cpe:2.3:a:python:python:3.6.8:*:*:*:*:*:*:*",
"cpe:2.3:a:python:python:3.6.8:*:*:*:*:*:*:*"
],
"purl": "pkg:generic/python@3.6.8",
"metadataType": "BinaryMetadata",
"metadata": {
"matches": [
{
"classifier": "python-binary-lib",
"location": {
"path": "/usr/lib64/libpython3.6m.so.1.0",
"layerID": "sha256:eb29745b8228e1e97c01b1d5c2554a319c00a94d8dd5746a3904222ad65a13f8",
"virtualPath": "/usr/lib64/libpython3.6m.so.1.0"
}
}
]
}
},
{ {
"id": "e57db3737a1d260f", "id": "e57db3737a1d260f",
"name": "python3-dnf", "name": "python3-dnf",

View File

@ -23,7 +23,7 @@ func BenchmarkImagePackageCatalogers(b *testing.B) {
tarPath := imagetest.GetFixtureImageTarPath(b, fixtureImageName) tarPath := imagetest.GetFixtureImageTarPath(b, fixtureImageName)
var pc *pkg.Collection var pc *pkg.Collection
for _, c := range cataloger.ImageCatalogers(cataloger.DefaultConfig()) { for _, c := range cataloger.ImageCatalogers(defaultConfig()) {
// in case of future alteration where state is persisted, assume no dependency is safe to reuse // in case of future alteration where state is persisted, assume no dependency is safe to reuse
userInput := "docker-archive:" + tarPath userInput := "docker-archive:" + tarPath
detection, err := source.Detect(userInput, source.DefaultDetectConfig()) detection, err := source.Detect(userInput, source.DefaultDetectConfig())
@ -260,7 +260,7 @@ func TestPkgCoverageCatalogerConfiguration(t *testing.T) {
assert.Equal(t, definedLanguages, observedLanguages) assert.Equal(t, definedLanguages, observedLanguages)
// Verify that rust isn't actually an image cataloger // Verify that rust isn't actually an image cataloger
c := cataloger.DefaultConfig() c := defaultConfig()
c.Catalogers = []string{"rust"} c.Catalogers = []string{"rust"}
assert.Len(t, cataloger.ImageCatalogers(c), 0) assert.Len(t, cataloger.ImageCatalogers(c), 0)
} }

View File

@ -26,7 +26,10 @@ import (
// encode-decode-encode loop which will detect lossy behavior in both directions. // encode-decode-encode loop which will detect lossy behavior in both directions.
func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { func TestEncodeDecodeEncodeCycleComparison(t *testing.T) {
// use second image for relationships // use second image for relationships
images := []string{"image-pkg-coverage", "image-owning-package"} images := []string{
"image-pkg-coverage",
"image-owning-package",
}
tests := []struct { tests := []struct {
formatOption sbom.FormatID formatOption sbom.FormatID
redactor func(in []byte) []byte redactor func(in []byte) []byte

View File

@ -22,13 +22,13 @@ func TestPackageDeduplication(t *testing.T) {
}{ }{
{ {
scope: source.AllLayersScope, scope: source.AllLayersScope,
packageCount: 174, // without deduplication this would be 618 packageCount: 172, // without deduplication this would be 618
instanceCount: map[string]int{ instanceCount: map[string]int{
"basesystem": 1, "basesystem": 1,
"wget": 1, "wget": 1,
"curl": 2, // upgraded in the image "curl": 2, // upgraded in the image
"vsftpd": 1, "vsftpd": 1,
"httpd": 2, // rpm, binary "httpd": 1, // rpm, - we exclude binary
}, },
locationCount: map[string]int{ locationCount: map[string]int{
"basesystem-10.0-7.el7.centos": 4, "basesystem-10.0-7.el7.centos": 4,
@ -37,18 +37,18 @@ func TestPackageDeduplication(t *testing.T) {
"wget-1.14-18.el7_6.1": 3, "wget-1.14-18.el7_6.1": 3,
"vsftpd-3.0.2-29.el7_9": 2, "vsftpd-3.0.2-29.el7_9": 2,
"httpd-2.4.6-97.el7.centos.5": 1, "httpd-2.4.6-97.el7.centos.5": 1,
"httpd-2.4.6": 1, // binary // "httpd-2.4.6": 1, // binary
}, },
}, },
{ {
scope: source.SquashedScope, scope: source.SquashedScope,
packageCount: 172, packageCount: 170,
instanceCount: map[string]int{ instanceCount: map[string]int{
"basesystem": 1, "basesystem": 1,
"wget": 1, "wget": 1,
"curl": 1, // upgraded, but the most recent "curl": 1, // upgraded, but the most recent
"vsftpd": 1, "vsftpd": 1,
"httpd": 2, // rpm, binary "httpd": 1, // rpm, binary is now excluded by overlap
}, },
locationCount: map[string]int{ locationCount: map[string]int{
"basesystem-10.0-7.el7.centos": 1, "basesystem-10.0-7.el7.centos": 1,
@ -56,7 +56,7 @@ func TestPackageDeduplication(t *testing.T) {
"wget-1.14-18.el7_6.1": 1, "wget-1.14-18.el7_6.1": 1,
"vsftpd-3.0.2-29.el7_9": 1, "vsftpd-3.0.2-29.el7_9": 1,
"httpd-2.4.6-97.el7.centos.5": 1, "httpd-2.4.6-97.el7.centos.5": 1,
"httpd-2.4.6": 1, // binary // "httpd-2.4.6": 1, // binary (excluded)
}, },
}, },
} }
@ -64,7 +64,6 @@ func TestPackageDeduplication(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(string(tt.scope), func(t *testing.T) { t.Run(string(tt.scope), func(t *testing.T) {
sbom, _ := catalogFixtureImage(t, "image-vertical-package-dups", tt.scope, nil) sbom, _ := catalogFixtureImage(t, "image-vertical-package-dups", tt.scope, nil)
for _, p := range sbom.Artifacts.Packages.Sorted() { for _, p := range sbom.Artifacts.Packages.Sorted() {
if p.Type == pkg.BinaryPkg { if p.Type == pkg.BinaryPkg {
assert.NotEmpty(t, p.Name) assert.NotEmpty(t, p.Name)

View File

@ -7,11 +7,11 @@ import (
"github.com/anchore/syft/syft/formats/syftjson" "github.com/anchore/syft/syft/formats/syftjson"
syftjsonModel "github.com/anchore/syft/syft/formats/syftjson/model" syftjsonModel "github.com/anchore/syft/syft/formats/syftjson/model"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
) )
func TestPackageOwnershipRelationships(t *testing.T) { func TestPackageOwnershipRelationships(t *testing.T) {
// ensure that the json encoder is applying artifact ownership with an image that has expected ownership relationships // ensure that the json encoder is applying artifact ownership with an image that has expected ownership relationships
tests := []struct { tests := []struct {
fixture string fixture string
@ -45,3 +45,43 @@ func TestPackageOwnershipRelationships(t *testing.T) {
} }
} }
func TestPackageOwnershipExclusions(t *testing.T) {
// ensure that the json encoder is excluding packages by artifact ownership with an image that has expected ownership relationships
tests := []struct {
name string
fixture string
}{
{
name: "busybox binary is filtered based on ownership relationship",
fixture: "image-os-binary-overlap",
},
}
for _, test := range tests {
t.Run(test.fixture, func(t *testing.T) {
sbom, _ := catalogFixtureImage(t, test.fixture, source.SquashedScope, nil)
binaryPackages := make([]pkg.Package, 0)
apkPackages := make([]pkg.Package, 0)
for p := range sbom.Artifacts.Packages.Enumerate() {
if p.Type == pkg.BinaryPkg && p.Name == "busybox" {
binaryPackages = append(binaryPackages, p)
}
if p.Type == pkg.ApkPkg && p.Name == "busybox" {
apkPackages = append(apkPackages, p)
}
}
if len(binaryPackages) != 0 {
packageNames := make([]string, 0)
for _, p := range binaryPackages {
packageNames = append(packageNames, p.Name)
}
t.Errorf("expected to find no binary packages but found %d packages: %v", len(binaryPackages), packageNames)
}
if len(apkPackages) == 0 {
t.Errorf("expected to find apk packages but found none")
}
})
}
}

View File

@ -0,0 +1,7 @@
FROM alpine:latest
# syft should not longer show the binary package for this image:
# https://github.com/anchore/syft/issues/931
# busybox 1.36.1 binary <-- not reported synthetic package overlap
# busybox 1.36.1-r0 apk
RUN apk update && apk add busybox

View File

@ -8,6 +8,8 @@ import (
"github.com/anchore/stereoscope/pkg/imagetest" "github.com/anchore/stereoscope/pkg/imagetest"
"github.com/anchore/syft/syft" "github.com/anchore/syft/syft"
"github.com/anchore/syft/syft/pkg/cataloger" "github.com/anchore/syft/syft/pkg/cataloger"
"github.com/anchore/syft/syft/pkg/cataloger/kernel"
"github.com/anchore/syft/syft/pkg/cataloger/python"
"github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/sbom"
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
) )
@ -24,7 +26,7 @@ func catalogFixtureImage(t *testing.T, fixtureImageName string, scope source.Sco
theSource.Close() theSource.Close()
}) })
c := cataloger.DefaultConfig() c := defaultConfig()
c.Catalogers = catalogerCfg c.Catalogers = catalogerCfg
c.Search.Scope = scope c.Search.Scope = scope
@ -52,6 +54,16 @@ func catalogFixtureImage(t *testing.T, fixtureImageName string, scope source.Sco
}, theSource }, theSource
} }
func defaultConfig() cataloger.Config {
return cataloger.Config{
Search: cataloger.DefaultSearchConfig(),
Parallelism: 1,
LinuxKernel: kernel.DefaultLinuxCatalogerConfig(),
Python: python.DefaultCatalogerConfig(),
ExcludeBinaryOverlapByOwnership: true,
}
}
func catalogDirectory(t *testing.T, dir string) (sbom.SBOM, source.Source) { func catalogDirectory(t *testing.T, dir string) (sbom.SBOM, source.Source) {
userInput := "dir:" + dir userInput := "dir:" + dir
detection, err := source.Detect(userInput, source.DefaultDetectConfig()) detection, err := source.Detect(userInput, source.DefaultDetectConfig())
@ -63,7 +75,7 @@ func catalogDirectory(t *testing.T, dir string) (sbom.SBOM, source.Source) {
}) })
// TODO: this would be better with functional options (after/during API refactor) // TODO: this would be better with functional options (after/during API refactor)
c := cataloger.DefaultConfig() c := defaultConfig()
c.Search.Scope = source.AllLayersScope c.Search.Scope = source.AllLayersScope
pkgCatalog, relationships, actualDistro, err := syft.CatalogPackages(theSource, c) pkgCatalog, relationships, actualDistro, err := syft.CatalogPackages(theSource, c)
if err != nil { if err != nil {