chore: update feature to include config to restore previous behavior

Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
Christopher Phillips 2025-09-26 14:16:49 -04:00
parent 9b60b3e33d
commit 6fa1831484
No known key found for this signature in database
9 changed files with 160 additions and 11 deletions

View File

@ -19,6 +19,7 @@ import (
"github.com/anchore/syft/syft/file/cataloger/executable" "github.com/anchore/syft/syft/file/cataloger/executable"
"github.com/anchore/syft/syft/file/cataloger/filecontent" "github.com/anchore/syft/syft/file/cataloger/filecontent"
"github.com/anchore/syft/syft/pkg/cataloger/binary" "github.com/anchore/syft/syft/pkg/cataloger/binary"
"github.com/anchore/syft/syft/pkg/cataloger/debian"
"github.com/anchore/syft/syft/pkg/cataloger/dotnet" "github.com/anchore/syft/syft/pkg/cataloger/dotnet"
"github.com/anchore/syft/syft/pkg/cataloger/golang" "github.com/anchore/syft/syft/pkg/cataloger/golang"
"github.com/anchore/syft/syft/pkg/cataloger/java" "github.com/anchore/syft/syft/pkg/cataloger/java"
@ -44,6 +45,7 @@ type Catalog struct {
Enrich []string `yaml:"enrich" json:"enrich" mapstructure:"enrich"` Enrich []string `yaml:"enrich" json:"enrich" mapstructure:"enrich"`
// ecosystem-specific cataloger configuration // ecosystem-specific cataloger configuration
Debian debianConfig `yaml:"debian" json:"debian" mapstructure:"debian"`
Dotnet dotnetConfig `yaml:"dotnet" json:"dotnet" mapstructure:"dotnet"` Dotnet dotnetConfig `yaml:"dotnet" json:"dotnet" mapstructure:"dotnet"`
Golang golangConfig `yaml:"golang" json:"golang" mapstructure:"golang"` Golang golangConfig `yaml:"golang" json:"golang" mapstructure:"golang"`
Java javaConfig `yaml:"java" json:"java" mapstructure:"java"` Java javaConfig `yaml:"java" json:"java" mapstructure:"java"`
@ -76,6 +78,7 @@ func DefaultCatalog() Catalog {
Scope: source.SquashedScope.String(), Scope: source.SquashedScope.String(),
Package: defaultPackageConfig(), Package: defaultPackageConfig(),
License: defaultLicenseConfig(), License: defaultLicenseConfig(),
Debian: defaultDebianConfig(),
LinuxKernel: defaultLinuxKernelConfig(), LinuxKernel: defaultLinuxKernelConfig(),
Nix: defaultNixConfig(), Nix: defaultNixConfig(),
Dotnet: defaultDotnetConfig(), Dotnet: defaultDotnetConfig(),
@ -170,6 +173,8 @@ func (cfg Catalog) ToPackagesConfig() pkgcataloging.Config {
} }
return pkgcataloging.Config{ return pkgcataloging.Config{
Binary: binary.DefaultClassifierCatalogerConfig(), Binary: binary.DefaultClassifierCatalogerConfig(),
Debian: debian.DefaultCatalogerConfig().
WithIncludeDeInstalled(cfg.Debian.IncludeDeInstalled),
Dotnet: dotnet.DefaultCatalogerConfig(). Dotnet: dotnet.DefaultCatalogerConfig().
WithDepPackagesMustHaveDLL(cfg.Dotnet.DepPackagesMustHaveDLL). WithDepPackagesMustHaveDLL(cfg.Dotnet.DepPackagesMustHaveDLL).
WithDepPackagesMustClaimDLL(cfg.Dotnet.DepPackagesMustClaimDLL). WithDepPackagesMustClaimDLL(cfg.Dotnet.DepPackagesMustClaimDLL).

View File

@ -0,0 +1,26 @@
package options
import (
"github.com/anchore/clio"
"github.com/anchore/syft/syft/pkg/cataloger/debian"
)
type debianConfig struct {
IncludeDeInstalled bool `yaml:"include-deinstalled" json:"include-deinstalled" mapstructure:"include-deinstalled"`
}
var _ interface {
clio.FieldDescriber
} = (*debianConfig)(nil)
func (o *debianConfig) DescribeFields(descriptions clio.FieldDescriptionSet) {
descriptions.Add(&o.IncludeDeInstalled, `include debian packages that have been removed but not purged (deinstall status)
by default these packages are excluded from the SBOM`)
}
func defaultDebianConfig() debianConfig {
def := debian.DefaultCatalogerConfig()
return debianConfig{
IncludeDeInstalled: def.IncludeDeInstalled,
}
}

View File

@ -59,13 +59,23 @@ func DefaultPackageTaskFactories() Factories {
// OS package installed catalogers /////////////////////////////////////////////////////////////////////////// // OS package installed catalogers ///////////////////////////////////////////////////////////////////////////
newSimplePackageTaskFactory(arch.NewDBCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "alpm", "archlinux"), newSimplePackageTaskFactory(arch.NewDBCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "alpm", "archlinux"),
newSimplePackageTaskFactory(alpine.NewDBCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "apk", "alpine"), newSimplePackageTaskFactory(alpine.NewDBCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "apk", "alpine"),
newSimplePackageTaskFactory(debian.NewDBCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "dpkg", "debian"), newPackageTaskFactory(
func(cfg CatalogingFactoryConfig) pkg.Cataloger {
return debian.NewDBCatalogerWithOpts(cfg.PackagesConfig.Debian)
},
pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "dpkg", "debian",
),
newSimplePackageTaskFactory(gentoo.NewPortageCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "portage", "gentoo"), newSimplePackageTaskFactory(gentoo.NewPortageCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "portage", "gentoo"),
newSimplePackageTaskFactory(redhat.NewDBCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "rpm", "redhat"), newSimplePackageTaskFactory(redhat.NewDBCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "rpm", "redhat"),
// OS package declared catalogers /////////////////////////////////////////////////////////////////////////// // OS package declared catalogers ///////////////////////////////////////////////////////////////////////////
newSimplePackageTaskFactory(redhat.NewArchiveCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.OSTag, "linux", "rpm", "redhat"), newSimplePackageTaskFactory(redhat.NewArchiveCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.OSTag, "linux", "rpm", "redhat"),
newSimplePackageTaskFactory(debian.NewArchiveCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.OSTag, "linux", "deb", "debian"), newPackageTaskFactory(
func(cfg CatalogingFactoryConfig) pkg.Cataloger {
return debian.NewArchiveCatalogerWithOpts(cfg.PackagesConfig.Debian)
},
pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.OSTag, "linux", "deb", "debian",
),
// language-specific package installed catalogers /////////////////////////////////////////////////////////////////////////// // language-specific package installed catalogers ///////////////////////////////////////////////////////////////////////////
newSimplePackageTaskFactory(cpp.NewConanInfoCataloger, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.LanguageTag, "cpp", "conan"), newSimplePackageTaskFactory(cpp.NewConanInfoCataloger, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.LanguageTag, "cpp", "conan"),

View File

@ -2,6 +2,7 @@ package pkgcataloging
import ( import (
"github.com/anchore/syft/syft/pkg/cataloger/binary" "github.com/anchore/syft/syft/pkg/cataloger/binary"
"github.com/anchore/syft/syft/pkg/cataloger/debian"
"github.com/anchore/syft/syft/pkg/cataloger/dotnet" "github.com/anchore/syft/syft/pkg/cataloger/dotnet"
"github.com/anchore/syft/syft/pkg/cataloger/golang" "github.com/anchore/syft/syft/pkg/cataloger/golang"
"github.com/anchore/syft/syft/pkg/cataloger/java" "github.com/anchore/syft/syft/pkg/cataloger/java"
@ -13,6 +14,7 @@ import (
type Config struct { type Config struct {
Binary binary.ClassifierCatalogerConfig `yaml:"binary" json:"binary" mapstructure:"binary"` Binary binary.ClassifierCatalogerConfig `yaml:"binary" json:"binary" mapstructure:"binary"`
Debian debian.CatalogerConfig `yaml:"debian" json:"debian" mapstructure:"debian"`
Dotnet dotnet.CatalogerConfig `yaml:"dotnet" json:"dotnet" mapstructure:"dotnet"` Dotnet dotnet.CatalogerConfig `yaml:"dotnet" json:"dotnet" mapstructure:"dotnet"`
Golang golang.CatalogerConfig `yaml:"golang" json:"golang" mapstructure:"golang"` Golang golang.CatalogerConfig `yaml:"golang" json:"golang" mapstructure:"golang"`
JavaArchive java.ArchiveCatalogerConfig `yaml:"java-archive" json:"java-archive" mapstructure:"java-archive"` JavaArchive java.ArchiveCatalogerConfig `yaml:"java-archive" json:"java-archive" mapstructure:"java-archive"`
@ -25,6 +27,7 @@ type Config struct {
func DefaultConfig() Config { func DefaultConfig() Config {
return Config{ return Config{
Binary: binary.DefaultClassifierCatalogerConfig(), Binary: binary.DefaultClassifierCatalogerConfig(),
Debian: debian.DefaultCatalogerConfig(),
Dotnet: dotnet.DefaultCatalogerConfig(), Dotnet: dotnet.DefaultCatalogerConfig(),
Golang: golang.DefaultCatalogerConfig(), Golang: golang.DefaultCatalogerConfig(),
JavaArchive: java.DefaultArchiveCatalogerConfig(), JavaArchive: java.DefaultArchiveCatalogerConfig(),
@ -73,3 +76,8 @@ func (c Config) WithJavaArchiveConfig(cfg java.ArchiveCatalogerConfig) Config {
c.JavaArchive = cfg c.JavaArchive = cfg
return c return c
} }
func (c Config) WithDebianConfig(cfg debian.CatalogerConfig) Config {
c.Debian = cfg
return c
}

View File

@ -14,12 +14,27 @@ func NewDBCataloger() pkg.Cataloger {
return generic.NewCataloger("dpkg-db-cataloger"). return generic.NewCataloger("dpkg-db-cataloger").
// note: these globs have been intentionally split up in order to improve search performance, // note: these globs have been intentionally split up in order to improve search performance,
// please do NOT combine into: "**/var/lib/dpkg/{status,status.d/*}" // please do NOT combine into: "**/var/lib/dpkg/{status,status.d/*}"
WithParserByGlobs(parseDpkgDB, "**/lib/dpkg/status", "**/lib/dpkg/status.d/*", "**/lib/opkg/info/*.control", "**/lib/opkg/status"). WithParserByGlobs(newDpkgDBParser(CatalogerConfig{IncludeDeInstalled: true}), "**/lib/dpkg/status", "**/lib/dpkg/status.d/*", "**/lib/opkg/info/*.control", "**/lib/opkg/status").
WithProcessors(dependency.Processor(dbEntryDependencySpecifier))
}
// NewDBCatalogerWithOpts returns a new Deb package cataloger capable of parsing DPKG status DB flat-file stores with custom configuration.
func NewDBCatalogerWithOpts(cfg CatalogerConfig) pkg.Cataloger {
return generic.NewCataloger("dpkg-db-cataloger").
// note: these globs have been intentionally split up in order to improve search performance,
// please do NOT combine into: "**/var/lib/dpkg/{status,status.d/*}"
WithParserByGlobs(newDpkgDBParser(cfg), "**/lib/dpkg/status", "**/lib/dpkg/status.d/*", "**/lib/opkg/info/*.control", "**/lib/opkg/status").
WithProcessors(dependency.Processor(dbEntryDependencySpecifier)) WithProcessors(dependency.Processor(dbEntryDependencySpecifier))
} }
// NewArchiveCataloger returns a new Debian package cataloger object capable of parsing .deb archive files // NewArchiveCataloger returns a new Debian package cataloger object capable of parsing .deb archive files
func NewArchiveCataloger() pkg.Cataloger { func NewArchiveCataloger() pkg.Cataloger {
return generic.NewCataloger("deb-archive-cataloger"). return generic.NewCataloger("deb-archive-cataloger").
WithParserByGlobs(parseDebArchive, "**/*.deb") WithParserByGlobs(newDebArchiveParser(CatalogerConfig{IncludeDeInstalled: true}), "**/*.deb")
}
// NewArchiveCatalogerWithOpts returns a new Debian package cataloger object capable of parsing .deb archive files with custom configuration.
func NewArchiveCatalogerWithOpts(cfg CatalogerConfig) pkg.Cataloger {
return generic.NewCataloger("deb-archive-cataloger").
WithParserByGlobs(newDebArchiveParser(cfg), "**/*.deb")
} }

View File

@ -0,0 +1,16 @@
package debian
type CatalogerConfig struct {
IncludeDeInstalled bool `yaml:"include-deinstalled" json:"include-deinstalled" mapstructure:"include-deinstalled"`
}
func DefaultCatalogerConfig() CatalogerConfig {
return CatalogerConfig{
IncludeDeInstalled: false,
}
}
func (c CatalogerConfig) WithIncludeDeInstalled(include bool) CatalogerConfig {
c.IncludeDeInstalled = include
return c
}

View File

@ -21,6 +21,12 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic" "github.com/anchore/syft/syft/pkg/cataloger/generic"
) )
func newDebArchiveParser(cfg CatalogerConfig) generic.Parser {
return func(ctx context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
return parseDebArchiveWithConfig(ctx, resolver, env, reader, cfg)
}
}
// parseDebArchive parses a Debian package archive (.deb) file and returns the packages it contains. // parseDebArchive parses a Debian package archive (.deb) file and returns the packages it contains.
// A .deb file is an ar archive containing three main files: // A .deb file is an ar archive containing three main files:
// - debian-binary: Version of the .deb format (usually "2.0") // - debian-binary: Version of the .deb format (usually "2.0")
@ -29,6 +35,10 @@ import (
// //
// This function extracts and processes the control information to create package metadata. // This function extracts and processes the control information to create package metadata.
func parseDebArchive(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { func parseDebArchive(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
return parseDebArchiveWithConfig(ctx, nil, nil, reader, DefaultCatalogerConfig())
}
func parseDebArchiveWithConfig(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser, cfg CatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) {
arReader := ar.NewReader(reader) arReader := ar.NewReader(reader)
var metadata *pkg.DpkgArchiveEntry var metadata *pkg.DpkgArchiveEntry

View File

@ -33,9 +33,14 @@ var (
sourceRegexp = regexp.MustCompile(`(?P<name>\S+)( \((?P<version>.*)\))?`) sourceRegexp = regexp.MustCompile(`(?P<name>\S+)( \((?P<version>.*)\))?`)
) )
// parseDpkgDB reads a dpkg database "status" file (and surrounding data files) and returns the packages and relationships found. func newDpkgDBParser(cfg CatalogerConfig) generic.Parser {
func parseDpkgDB(ctx context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { return func(ctx context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
metadata, err := parseDpkgStatus(reader) return parseDpkgDBWithConfig(ctx, resolver, env, reader, cfg)
}
}
func parseDpkgDBWithConfig(ctx context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser, cfg CatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) {
metadata, err := parseDpkgStatusWithConfig(reader, cfg)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("unable to catalog dpkg DB=%q: %w", reader.RealPath, err) return nil, nil, fmt.Errorf("unable to catalog dpkg DB=%q: %w", reader.RealPath, err)
} }
@ -77,14 +82,13 @@ func findDpkgInfoFiles(name string, resolver file.Resolver, dbLocation file.Loca
return locations return locations
} }
// parseDpkgStatus is a parser function for Debian DB status contents, returning all Debian packages listed. func parseDpkgStatusWithConfig(reader io.Reader, cfg CatalogerConfig) ([]pkg.DpkgDBEntry, error) {
func parseDpkgStatus(reader io.Reader) ([]pkg.DpkgDBEntry, error) {
buffedReader := bufio.NewReader(reader) buffedReader := bufio.NewReader(reader)
var metadata []pkg.DpkgDBEntry var metadata []pkg.DpkgDBEntry
continueProcessing := true continueProcessing := true
for continueProcessing { for continueProcessing {
entry, err := parseDpkgStatusEntry(buffedReader) entry, err := parseDpkgStatusEntryWithConfig(buffedReader, cfg)
if err != nil { if err != nil {
if errors.Is(err, errEndOfPackages) { if errors.Is(err, errEndOfPackages) {
continueProcessing = false continueProcessing = false
@ -121,6 +125,10 @@ type dpkgExtractedMetadata struct {
// parseDpkgStatusEntry returns an individual Dpkg entry, or returns errEndOfPackages if there are no more packages to parse from the reader. // parseDpkgStatusEntry returns an individual Dpkg entry, or returns errEndOfPackages if there are no more packages to parse from the reader.
func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgDBEntry, error) { func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgDBEntry, error) {
return parseDpkgStatusEntryWithConfig(reader, DefaultCatalogerConfig())
}
func parseDpkgStatusEntryWithConfig(reader *bufio.Reader, cfg CatalogerConfig) (*pkg.DpkgDBEntry, error) {
var retErr error var retErr error
dpkgFields, err := extractAllFields(reader) dpkgFields, err := extractAllFields(reader)
if err != nil { if err != nil {
@ -140,7 +148,7 @@ func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgDBEntry, error) {
} }
// Skip entries which have been removed but not purged, e.g. "rc" status in dpkg -l // Skip entries which have been removed but not purged, e.g. "rc" status in dpkg -l
if strings.Contains(raw.Status, deinstallStatus) { if !cfg.IncludeDeInstalled && strings.Contains(raw.Status, deinstallStatus) {
return nil, nil return nil, nil
} }

View File

@ -494,3 +494,54 @@ func abstractRelationships(t testing.TB, relationships []artifact.Relationship)
return abstracted return abstracted
} }
func Test_parseDpkgStatus_deinstall(t *testing.T) {
tests := []struct {
name string
includeDeinstall bool
expectedCount int
}{
{
name: "exclude deinstalled packages (default)",
includeDeinstall: false,
expectedCount: 1,
},
{
name: "include deinstalled packages",
includeDeinstall: true,
expectedCount: 2,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
fixture, err := os.Open("test-fixtures/var/lib/dpkg/status.d/deinstall")
require.NoError(t, err)
defer fixture.Close()
cfg := CatalogerConfig{
IncludeDeInstalled: tt.includeDeinstall,
}
entries, err := parseDpkgStatusWithConfig(fixture, cfg)
require.NoError(t, err)
assert.Len(t, entries, tt.expectedCount, "expected %d entries", tt.expectedCount)
if tt.includeDeinstall {
var foundDeinstalled bool
for _, entry := range entries {
if entry.Package == "linux-image-6.8.0-1029-aws" {
foundDeinstalled = true
break
}
}
assert.True(t, foundDeinstalled, "should find deinstalled package when includeDeinstall=true")
} else {
for _, entry := range entries {
assert.NotEqual(t, "linux-image-6.8.0-1029-aws", entry.Package, "should not find deinstalled package when includeDeinstall=false")
}
}
})
}
}