From 6fa18314849ccbe48cf403dc87c88860cb15aeb4 Mon Sep 17 00:00:00 2001 From: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> Date: Fri, 26 Sep 2025 14:16:49 -0400 Subject: [PATCH] chore: update feature to include config to restore previous behavior Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> --- cmd/syft/internal/options/catalog.go | 5 ++ cmd/syft/internal/options/debian.go | 26 ++++++++++ internal/task/package_tasks.go | 14 ++++- syft/cataloging/pkgcataloging/config.go | 8 +++ syft/pkg/cataloger/debian/cataloger.go | 19 ++++++- syft/pkg/cataloger/debian/config.go | 16 ++++++ .../pkg/cataloger/debian/parse_deb_archive.go | 10 ++++ syft/pkg/cataloger/debian/parse_dpkg_db.go | 22 +++++--- .../cataloger/debian/parse_dpkg_db_test.go | 51 +++++++++++++++++++ 9 files changed, 160 insertions(+), 11 deletions(-) create mode 100644 cmd/syft/internal/options/debian.go create mode 100644 syft/pkg/cataloger/debian/config.go diff --git a/cmd/syft/internal/options/catalog.go b/cmd/syft/internal/options/catalog.go index 652d6a40d..454ab4f7d 100644 --- a/cmd/syft/internal/options/catalog.go +++ b/cmd/syft/internal/options/catalog.go @@ -19,6 +19,7 @@ import ( "github.com/anchore/syft/syft/file/cataloger/executable" "github.com/anchore/syft/syft/file/cataloger/filecontent" "github.com/anchore/syft/syft/pkg/cataloger/binary" + "github.com/anchore/syft/syft/pkg/cataloger/debian" "github.com/anchore/syft/syft/pkg/cataloger/dotnet" "github.com/anchore/syft/syft/pkg/cataloger/golang" "github.com/anchore/syft/syft/pkg/cataloger/java" @@ -44,6 +45,7 @@ type Catalog struct { Enrich []string `yaml:"enrich" json:"enrich" mapstructure:"enrich"` // ecosystem-specific cataloger configuration + Debian debianConfig `yaml:"debian" json:"debian" mapstructure:"debian"` Dotnet dotnetConfig `yaml:"dotnet" json:"dotnet" mapstructure:"dotnet"` Golang golangConfig `yaml:"golang" json:"golang" mapstructure:"golang"` Java javaConfig `yaml:"java" json:"java" mapstructure:"java"` @@ -76,6 +78,7 @@ func DefaultCatalog() Catalog { Scope: source.SquashedScope.String(), Package: defaultPackageConfig(), License: defaultLicenseConfig(), + Debian: defaultDebianConfig(), LinuxKernel: defaultLinuxKernelConfig(), Nix: defaultNixConfig(), Dotnet: defaultDotnetConfig(), @@ -170,6 +173,8 @@ func (cfg Catalog) ToPackagesConfig() pkgcataloging.Config { } return pkgcataloging.Config{ Binary: binary.DefaultClassifierCatalogerConfig(), + Debian: debian.DefaultCatalogerConfig(). + WithIncludeDeInstalled(cfg.Debian.IncludeDeInstalled), Dotnet: dotnet.DefaultCatalogerConfig(). WithDepPackagesMustHaveDLL(cfg.Dotnet.DepPackagesMustHaveDLL). WithDepPackagesMustClaimDLL(cfg.Dotnet.DepPackagesMustClaimDLL). diff --git a/cmd/syft/internal/options/debian.go b/cmd/syft/internal/options/debian.go new file mode 100644 index 000000000..a822fb52f --- /dev/null +++ b/cmd/syft/internal/options/debian.go @@ -0,0 +1,26 @@ +package options + +import ( + "github.com/anchore/clio" + "github.com/anchore/syft/syft/pkg/cataloger/debian" +) + +type debianConfig struct { + IncludeDeInstalled bool `yaml:"include-deinstalled" json:"include-deinstalled" mapstructure:"include-deinstalled"` +} + +var _ interface { + clio.FieldDescriber +} = (*debianConfig)(nil) + +func (o *debianConfig) DescribeFields(descriptions clio.FieldDescriptionSet) { + descriptions.Add(&o.IncludeDeInstalled, `include debian packages that have been removed but not purged (deinstall status) +by default these packages are excluded from the SBOM`) +} + +func defaultDebianConfig() debianConfig { + def := debian.DefaultCatalogerConfig() + return debianConfig{ + IncludeDeInstalled: def.IncludeDeInstalled, + } +} \ No newline at end of file diff --git a/internal/task/package_tasks.go b/internal/task/package_tasks.go index 6ba932724..903078e5e 100644 --- a/internal/task/package_tasks.go +++ b/internal/task/package_tasks.go @@ -59,13 +59,23 @@ func DefaultPackageTaskFactories() Factories { // OS package installed catalogers /////////////////////////////////////////////////////////////////////////// newSimplePackageTaskFactory(arch.NewDBCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "alpm", "archlinux"), newSimplePackageTaskFactory(alpine.NewDBCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "apk", "alpine"), - newSimplePackageTaskFactory(debian.NewDBCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "dpkg", "debian"), + newPackageTaskFactory( + func(cfg CatalogingFactoryConfig) pkg.Cataloger { + return debian.NewDBCatalogerWithOpts(cfg.PackagesConfig.Debian) + }, + pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "dpkg", "debian", + ), newSimplePackageTaskFactory(gentoo.NewPortageCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "portage", "gentoo"), newSimplePackageTaskFactory(redhat.NewDBCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.OSTag, "linux", "rpm", "redhat"), // OS package declared catalogers /////////////////////////////////////////////////////////////////////////// newSimplePackageTaskFactory(redhat.NewArchiveCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.OSTag, "linux", "rpm", "redhat"), - newSimplePackageTaskFactory(debian.NewArchiveCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.OSTag, "linux", "deb", "debian"), + newPackageTaskFactory( + func(cfg CatalogingFactoryConfig) pkg.Cataloger { + return debian.NewArchiveCatalogerWithOpts(cfg.PackagesConfig.Debian) + }, + pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.OSTag, "linux", "deb", "debian", + ), // language-specific package installed catalogers /////////////////////////////////////////////////////////////////////////// newSimplePackageTaskFactory(cpp.NewConanInfoCataloger, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.LanguageTag, "cpp", "conan"), diff --git a/syft/cataloging/pkgcataloging/config.go b/syft/cataloging/pkgcataloging/config.go index e5bac0cb8..9cdd3e4fb 100644 --- a/syft/cataloging/pkgcataloging/config.go +++ b/syft/cataloging/pkgcataloging/config.go @@ -2,6 +2,7 @@ package pkgcataloging import ( "github.com/anchore/syft/syft/pkg/cataloger/binary" + "github.com/anchore/syft/syft/pkg/cataloger/debian" "github.com/anchore/syft/syft/pkg/cataloger/dotnet" "github.com/anchore/syft/syft/pkg/cataloger/golang" "github.com/anchore/syft/syft/pkg/cataloger/java" @@ -13,6 +14,7 @@ import ( type Config struct { Binary binary.ClassifierCatalogerConfig `yaml:"binary" json:"binary" mapstructure:"binary"` + Debian debian.CatalogerConfig `yaml:"debian" json:"debian" mapstructure:"debian"` Dotnet dotnet.CatalogerConfig `yaml:"dotnet" json:"dotnet" mapstructure:"dotnet"` Golang golang.CatalogerConfig `yaml:"golang" json:"golang" mapstructure:"golang"` JavaArchive java.ArchiveCatalogerConfig `yaml:"java-archive" json:"java-archive" mapstructure:"java-archive"` @@ -25,6 +27,7 @@ type Config struct { func DefaultConfig() Config { return Config{ Binary: binary.DefaultClassifierCatalogerConfig(), + Debian: debian.DefaultCatalogerConfig(), Dotnet: dotnet.DefaultCatalogerConfig(), Golang: golang.DefaultCatalogerConfig(), JavaArchive: java.DefaultArchiveCatalogerConfig(), @@ -73,3 +76,8 @@ func (c Config) WithJavaArchiveConfig(cfg java.ArchiveCatalogerConfig) Config { c.JavaArchive = cfg return c } + +func (c Config) WithDebianConfig(cfg debian.CatalogerConfig) Config { + c.Debian = cfg + return c +} diff --git a/syft/pkg/cataloger/debian/cataloger.go b/syft/pkg/cataloger/debian/cataloger.go index 183b9d95c..d98bb00dd 100644 --- a/syft/pkg/cataloger/debian/cataloger.go +++ b/syft/pkg/cataloger/debian/cataloger.go @@ -14,12 +14,27 @@ func NewDBCataloger() pkg.Cataloger { return generic.NewCataloger("dpkg-db-cataloger"). // note: these globs have been intentionally split up in order to improve search performance, // please do NOT combine into: "**/var/lib/dpkg/{status,status.d/*}" - WithParserByGlobs(parseDpkgDB, "**/lib/dpkg/status", "**/lib/dpkg/status.d/*", "**/lib/opkg/info/*.control", "**/lib/opkg/status"). + WithParserByGlobs(newDpkgDBParser(CatalogerConfig{IncludeDeInstalled: true}), "**/lib/dpkg/status", "**/lib/dpkg/status.d/*", "**/lib/opkg/info/*.control", "**/lib/opkg/status"). + WithProcessors(dependency.Processor(dbEntryDependencySpecifier)) +} + +// NewDBCatalogerWithOpts returns a new Deb package cataloger capable of parsing DPKG status DB flat-file stores with custom configuration. +func NewDBCatalogerWithOpts(cfg CatalogerConfig) pkg.Cataloger { + return generic.NewCataloger("dpkg-db-cataloger"). + // note: these globs have been intentionally split up in order to improve search performance, + // please do NOT combine into: "**/var/lib/dpkg/{status,status.d/*}" + WithParserByGlobs(newDpkgDBParser(cfg), "**/lib/dpkg/status", "**/lib/dpkg/status.d/*", "**/lib/opkg/info/*.control", "**/lib/opkg/status"). WithProcessors(dependency.Processor(dbEntryDependencySpecifier)) } // NewArchiveCataloger returns a new Debian package cataloger object capable of parsing .deb archive files func NewArchiveCataloger() pkg.Cataloger { return generic.NewCataloger("deb-archive-cataloger"). - WithParserByGlobs(parseDebArchive, "**/*.deb") + WithParserByGlobs(newDebArchiveParser(CatalogerConfig{IncludeDeInstalled: true}), "**/*.deb") +} + +// NewArchiveCatalogerWithOpts returns a new Debian package cataloger object capable of parsing .deb archive files with custom configuration. +func NewArchiveCatalogerWithOpts(cfg CatalogerConfig) pkg.Cataloger { + return generic.NewCataloger("deb-archive-cataloger"). + WithParserByGlobs(newDebArchiveParser(cfg), "**/*.deb") } diff --git a/syft/pkg/cataloger/debian/config.go b/syft/pkg/cataloger/debian/config.go new file mode 100644 index 000000000..6073ee64f --- /dev/null +++ b/syft/pkg/cataloger/debian/config.go @@ -0,0 +1,16 @@ +package debian + +type CatalogerConfig struct { + IncludeDeInstalled bool `yaml:"include-deinstalled" json:"include-deinstalled" mapstructure:"include-deinstalled"` +} + +func DefaultCatalogerConfig() CatalogerConfig { + return CatalogerConfig{ + IncludeDeInstalled: false, + } +} + +func (c CatalogerConfig) WithIncludeDeInstalled(include bool) CatalogerConfig { + c.IncludeDeInstalled = include + return c +} \ No newline at end of file diff --git a/syft/pkg/cataloger/debian/parse_deb_archive.go b/syft/pkg/cataloger/debian/parse_deb_archive.go index 8b78a2147..96a7a591a 100644 --- a/syft/pkg/cataloger/debian/parse_deb_archive.go +++ b/syft/pkg/cataloger/debian/parse_deb_archive.go @@ -21,6 +21,12 @@ import ( "github.com/anchore/syft/syft/pkg/cataloger/generic" ) +func newDebArchiveParser(cfg CatalogerConfig) generic.Parser { + return func(ctx context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { + return parseDebArchiveWithConfig(ctx, resolver, env, reader, cfg) + } +} + // parseDebArchive parses a Debian package archive (.deb) file and returns the packages it contains. // A .deb file is an ar archive containing three main files: // - debian-binary: Version of the .deb format (usually "2.0") @@ -29,6 +35,10 @@ import ( // // This function extracts and processes the control information to create package metadata. func parseDebArchive(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { + return parseDebArchiveWithConfig(ctx, nil, nil, reader, DefaultCatalogerConfig()) +} + +func parseDebArchiveWithConfig(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser, cfg CatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) { arReader := ar.NewReader(reader) var metadata *pkg.DpkgArchiveEntry diff --git a/syft/pkg/cataloger/debian/parse_dpkg_db.go b/syft/pkg/cataloger/debian/parse_dpkg_db.go index 27dc0978d..2f026ef0d 100644 --- a/syft/pkg/cataloger/debian/parse_dpkg_db.go +++ b/syft/pkg/cataloger/debian/parse_dpkg_db.go @@ -33,9 +33,14 @@ var ( sourceRegexp = regexp.MustCompile(`(?P\S+)( \((?P.*)\))?`) ) -// parseDpkgDB reads a dpkg database "status" file (and surrounding data files) and returns the packages and relationships found. -func parseDpkgDB(ctx context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { - metadata, err := parseDpkgStatus(reader) +func newDpkgDBParser(cfg CatalogerConfig) generic.Parser { + return func(ctx context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { + return parseDpkgDBWithConfig(ctx, resolver, env, reader, cfg) + } +} + +func parseDpkgDBWithConfig(ctx context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser, cfg CatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) { + metadata, err := parseDpkgStatusWithConfig(reader, cfg) if err != nil { return nil, nil, fmt.Errorf("unable to catalog dpkg DB=%q: %w", reader.RealPath, err) } @@ -77,14 +82,13 @@ func findDpkgInfoFiles(name string, resolver file.Resolver, dbLocation file.Loca return locations } -// parseDpkgStatus is a parser function for Debian DB status contents, returning all Debian packages listed. -func parseDpkgStatus(reader io.Reader) ([]pkg.DpkgDBEntry, error) { +func parseDpkgStatusWithConfig(reader io.Reader, cfg CatalogerConfig) ([]pkg.DpkgDBEntry, error) { buffedReader := bufio.NewReader(reader) var metadata []pkg.DpkgDBEntry continueProcessing := true for continueProcessing { - entry, err := parseDpkgStatusEntry(buffedReader) + entry, err := parseDpkgStatusEntryWithConfig(buffedReader, cfg) if err != nil { if errors.Is(err, errEndOfPackages) { continueProcessing = false @@ -121,6 +125,10 @@ type dpkgExtractedMetadata struct { // parseDpkgStatusEntry returns an individual Dpkg entry, or returns errEndOfPackages if there are no more packages to parse from the reader. func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgDBEntry, error) { + return parseDpkgStatusEntryWithConfig(reader, DefaultCatalogerConfig()) +} + +func parseDpkgStatusEntryWithConfig(reader *bufio.Reader, cfg CatalogerConfig) (*pkg.DpkgDBEntry, error) { var retErr error dpkgFields, err := extractAllFields(reader) if err != nil { @@ -140,7 +148,7 @@ func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgDBEntry, error) { } // Skip entries which have been removed but not purged, e.g. "rc" status in dpkg -l - if strings.Contains(raw.Status, deinstallStatus) { + if !cfg.IncludeDeInstalled && strings.Contains(raw.Status, deinstallStatus) { return nil, nil } diff --git a/syft/pkg/cataloger/debian/parse_dpkg_db_test.go b/syft/pkg/cataloger/debian/parse_dpkg_db_test.go index c0659a0e0..1e509d95b 100644 --- a/syft/pkg/cataloger/debian/parse_dpkg_db_test.go +++ b/syft/pkg/cataloger/debian/parse_dpkg_db_test.go @@ -494,3 +494,54 @@ func abstractRelationships(t testing.TB, relationships []artifact.Relationship) return abstracted } + +func Test_parseDpkgStatus_deinstall(t *testing.T) { + tests := []struct { + name string + includeDeinstall bool + expectedCount int + }{ + { + name: "exclude deinstalled packages (default)", + includeDeinstall: false, + expectedCount: 1, + }, + { + name: "include deinstalled packages", + includeDeinstall: true, + expectedCount: 2, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fixture, err := os.Open("test-fixtures/var/lib/dpkg/status.d/deinstall") + require.NoError(t, err) + defer fixture.Close() + + cfg := CatalogerConfig{ + IncludeDeInstalled: tt.includeDeinstall, + } + + entries, err := parseDpkgStatusWithConfig(fixture, cfg) + require.NoError(t, err) + + assert.Len(t, entries, tt.expectedCount, "expected %d entries", tt.expectedCount) + + if tt.includeDeinstall { + var foundDeinstalled bool + for _, entry := range entries { + if entry.Package == "linux-image-6.8.0-1029-aws" { + foundDeinstalled = true + break + } + } + assert.True(t, foundDeinstalled, "should find deinstalled package when includeDeinstall=true") + } else { + for _, entry := range entries { + assert.NotEqual(t, "linux-image-6.8.0-1029-aws", entry.Package, "should not find deinstalled package when includeDeinstall=false") + } + } + }) + } +}