diff --git a/syft/pkg/cataloger/debian/package.go b/syft/pkg/cataloger/debian/package.go index 88102da6e..364963516 100644 --- a/syft/pkg/cataloger/debian/package.go +++ b/syft/pkg/cataloger/debian/package.go @@ -13,6 +13,7 @@ import ( "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/license" "github.com/anchore/syft/syft/linux" "github.com/anchore/syft/syft/pkg" ) @@ -23,21 +24,26 @@ const ( docsPath = "/usr/share/doc" ) -func newDpkgPackage(ctx context.Context, d pkg.DpkgDBEntry, dbLocation file.Location, resolver file.Resolver, release *linux.Release, evidence ...file.Location) pkg.Package { - // TODO: separate pr to license refactor, but explore extracting dpkg-specific license parsing into a separate function - var licenses []pkg.License +func newDpkgPackage(ctx context.Context, d dpkgExtractedMetadata, dbLocation file.Location, resolver file.Resolver, release *linux.Release, evidence ...file.Location) pkg.Package { + // the License field is empty for standard Debian dpkg entries (licenses live in copyright files), + // but opkg/ipkg derivatives carry it inline in the status DB — extract it here so those packages + // report licenses without requiring per-package copyright lookups. The license is not persisted on + // the final entry, so convert the raw metadata into the entry just-in-time here. + licenses := extractDeclaredLicenses(ctx, d.License, dbLocation) + + entry := d.toDpkgEntry() locations := file.NewLocationSet(dbLocation) locations.Add(evidence...) p := pkg.Package{ - Name: d.Package, - Version: d.Version, + Name: entry.Package, + Version: entry.Version, Licenses: pkg.NewLicenseSet(licenses...), Locations: locations, - PURL: packageURL(d, release), + PURL: packageURL(entry, release), Type: pkg.DebPkg, - Metadata: d, + Metadata: entry, } if resolver != nil { @@ -55,6 +61,21 @@ func newDpkgPackage(ctx context.Context, d pkg.DpkgDBEntry, dbLocation file.Loca return p } +// extractDeclaredLicenses converts a License field from the status DB into a license set. Returns nil +// for empty input so standard dpkg entries (which never declare License inline) incur no allocation. +// Mirrors the alpine cataloger's approach: keep the value whole if it parses as a valid SPDX expression, +// otherwise split on whitespace to handle space-separated lists. +func extractDeclaredLicenses(ctx context.Context, raw string, dbLocation file.Location) []pkg.License { + if raw == "" { + return nil + } + licenseStrings := []string{raw} + if _, err := license.ParseExpression(raw); err != nil { + licenseStrings = strings.Fields(raw) + } + return pkg.NewLicensesFromLocationWithContext(ctx, dbLocation, licenseStrings...) +} + func newDebArchivePackage(ctx context.Context, location file.Location, metadata pkg.DpkgArchiveEntry, licenseStrings []string) pkg.Package { p := pkg.Package{ Name: metadata.Package, diff --git a/syft/pkg/cataloger/debian/package_test.go b/syft/pkg/cataloger/debian/package_test.go index a52659739..227ebc9d5 100644 --- a/syft/pkg/cataloger/debian/package_test.go +++ b/syft/pkg/cataloger/debian/package_test.go @@ -1,10 +1,14 @@ package debian import ( + "context" + "sort" "testing" "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/require" + "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/linux" "github.com/anchore/syft/syft/pkg" ) @@ -111,3 +115,97 @@ func Test_packageURL(t *testing.T) { }) } } + +func Test_extractDeclaredLicenses(t *testing.T) { + ctx := context.Background() + dbLocation := file.NewLocation("/var/lib/opkg/status") + + tests := []struct { + name string + raw string + expected []string + }{ + { + name: "empty input returns nil", + raw: "", + expected: nil, + }, + { + name: "single SPDX identifier kept whole", + raw: "MIT", + expected: []string{"MIT"}, + }, + { + name: "valid SPDX expression kept whole", + raw: "Apache-2.0 OR MIT", + expected: []string{"Apache-2.0 OR MIT"}, + }, + { + name: "non-expression space-separated list is split", + raw: "GPL-2.0 BSD-3-Clause", + expected: []string{"GPL-2.0", "BSD-3-Clause"}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + got := extractDeclaredLicenses(ctx, test.raw, dbLocation) + var gotValues []string + for _, l := range got { + gotValues = append(gotValues, l.Value) + } + // NewLicensesFromLocationWithContext does not guarantee output order + sort.Strings(gotValues) + want := append([]string(nil), test.expected...) + sort.Strings(want) + if diff := cmp.Diff(want, gotValues); diff != "" { + t.Errorf("unexpected licenses (-want +got):\n%s", diff) + } + }) + } +} + +func Test_newDpkgPackage_declaredLicense(t *testing.T) { + // the License field is not persisted on pkg.DpkgDBEntry, so this guards that the inline opkg/ipkg license + // declared on the raw metadata still flows into the built package's license set + tests := []struct { + name string + metadata dpkgExtractedMetadata + expected []string + }{ + { + name: "no declared license", + metadata: dpkgExtractedMetadata{Package: "apt", Version: "1.8.2"}, + expected: nil, + }, + { + name: "inline license flows to package", + metadata: dpkgExtractedMetadata{Package: "dropbear", Version: "2024.85-r0", License: "MIT"}, + expected: []string{"MIT"}, + }, + { + name: "space-separated licenses split into the set", + metadata: dpkgExtractedMetadata{Package: "busybox", Version: "1.36.1", License: "GPL-2.0 BSD-3-Clause"}, + expected: []string{"BSD-3-Clause", "GPL-2.0"}, + }, + { + name: "valid SPDX expression kept whole", + metadata: dpkgExtractedMetadata{Package: "curl", Version: "8.5.0", License: "Apache-2.0 OR MIT"}, + expected: []string{"Apache-2.0 OR MIT"}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + p := newDpkgPackage(context.Background(), test.metadata, file.NewLocation("/var/lib/opkg/status"), nil, nil) + + var got []string + for _, l := range p.Licenses.ToSlice() { + got = append(got, l.Value) + } + // the license set does not guarantee output order + sort.Strings(got) + require.Equal(t, test.expected, got) + }) + } +} diff --git a/syft/pkg/cataloger/debian/parse_deb_archive.go b/syft/pkg/cataloger/debian/parse_deb_archive.go index 0f977c05e..b3b5f923f 100644 --- a/syft/pkg/cataloger/debian/parse_deb_archive.go +++ b/syft/pkg/cataloger/debian/parse_deb_archive.go @@ -131,7 +131,7 @@ func processControlTar(dcReader io.ReadCloser) (*pkg.DpkgArchiveEntry, error) { if len(entries) == 0 { return nil, fmt.Errorf("no package entries found in control file") } - entry := pkg.DpkgArchiveEntry(entries[0]) + entry := pkg.DpkgArchiveEntry(entries[0].toDpkgEntry()) metadata = &entry case "md5sums": // parseDpkgMD5Info already streams via bufio.Scanner diff --git a/syft/pkg/cataloger/debian/parse_dpkg_db.go b/syft/pkg/cataloger/debian/parse_dpkg_db.go index 1a2e72e50..be9f04448 100644 --- a/syft/pkg/cataloger/debian/parse_dpkg_db.go +++ b/syft/pkg/cataloger/debian/parse_dpkg_db.go @@ -42,7 +42,7 @@ func parseDpkgDB(ctx context.Context, resolver file.Resolver, env *generic.Envir dbLoc := reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation) var pkgs []pkg.Package - _ = sync.CollectSlice(&ctx, cataloging.ExecutorFile, sync.ToSeq(metadata), func(m pkg.DpkgDBEntry) (pkg.Package, error) { + _ = sync.CollectSlice(&ctx, cataloging.ExecutorFile, sync.ToSeq(metadata), func(m dpkgExtractedMetadata) (pkg.Package, error) { return newDpkgPackage(ctx, m, dbLoc, resolver, env.LinuxRelease, findDpkgInfoFiles(m.Package, resolver, reader.Location)...), nil }, &pkgs) @@ -77,10 +77,12 @@ func findDpkgInfoFiles(name string, resolver file.Resolver, dbLocation file.Loca return locations } -// parseDpkgStatus is a parser function for Debian DB status contents, returning all Debian packages listed. -func parseDpkgStatus(reader io.Reader) ([]pkg.DpkgDBEntry, error) { +// parseDpkgStatus is a parser function for Debian DB status contents, returning the raw metadata for all Debian +// packages listed. Conversion to pkg.DpkgDBEntry is deferred to the package-building stage so that fields which +// are not part of the final entry (e.g. License) remain available to the caller. +func parseDpkgStatus(reader io.Reader) ([]dpkgExtractedMetadata, error) { buffedReader := bufio.NewReader(reader) - var metadata []pkg.DpkgDBEntry + var metadata []dpkgExtractedMetadata continueProcessing := true for continueProcessing { @@ -117,10 +119,41 @@ type dpkgExtractedMetadata struct { Depends string `mapstructure:"Depends"` PreDepends string `mapstructure:"PreDepends"` // note: original doc is Pre-Depends Status string `mapstructure:"Status"` + License string `mapstructure:"License"` + Conffiles string `mapstructure:"Conffiles"` +} + +func (d *dpkgExtractedMetadata) toDpkgEntry() pkg.DpkgDBEntry { + entry := pkg.DpkgDBEntry{ + Package: d.Package, + Source: d.Source, + Version: d.Version, + SourceVersion: d.SourceVersion, + Architecture: d.Architecture, + Maintainer: d.Maintainer, + InstalledSize: d.InstalledSize, + Description: d.Description, + Provides: splitPkgList(d.Provides), + Depends: splitPkgList(d.Depends), + PreDepends: splitPkgList(d.PreDepends), + // note: licenses and conffiles are handled separately + } + + // there may be an optional conffiles section that we should persist as files + if d.Conffiles != "" { + entry.Files = parseDpkgConffileInfo(strings.NewReader(d.Conffiles)) + } + + if entry.Files == nil { + // ensure the default value for a collection is never nil since this may be shown as JSON + entry.Files = make([]pkg.DpkgFileRecord, 0) + } + + return entry } // parseDpkgStatusEntry returns an individual Dpkg entry, or returns errEndOfPackages if there are no more packages to parse from the reader. -func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgDBEntry, error) { +func parseDpkgStatusEntry(reader *bufio.Reader) (*dpkgExtractedMetadata, error) { var retErr error dpkgFields, err := extractAllFields(reader) if err != nil { @@ -154,33 +187,7 @@ func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgDBEntry, error) { return nil, retErr } - entry := pkg.DpkgDBEntry{ - Package: raw.Package, - Source: raw.Source, - Version: raw.Version, - SourceVersion: raw.SourceVersion, - Architecture: raw.Architecture, - Maintainer: raw.Maintainer, - InstalledSize: raw.InstalledSize, - Description: raw.Description, - Provides: splitPkgList(raw.Provides), - Depends: splitPkgList(raw.Depends), - PreDepends: splitPkgList(raw.PreDepends), - } - - // there may be an optional conffiles section that we should persist as files - if conffilesSection, exists := dpkgFields["Conffiles"]; exists && conffilesSection != nil { - if sectionStr, ok := conffilesSection.(string); ok { - entry.Files = parseDpkgConffileInfo(strings.NewReader(sectionStr)) - } - } - - if entry.Files == nil { - // ensure the default value for a collection is never nil since this may be shown as JSON - entry.Files = make([]pkg.DpkgFileRecord, 0) - } - - return &entry, retErr + return &raw, retErr } func splitPkgList(pkgList string) (ret []string) { diff --git a/syft/pkg/cataloger/debian/parse_dpkg_db_test.go b/syft/pkg/cataloger/debian/parse_dpkg_db_test.go index 1cca3d9f6..e423c0a08 100644 --- a/syft/pkg/cataloger/debian/parse_dpkg_db_test.go +++ b/syft/pkg/cataloger/debian/parse_dpkg_db_test.go @@ -20,9 +20,12 @@ import ( func Test_parseDpkgStatus(t *testing.T) { tests := []struct { - name string - expected []pkg.DpkgDBEntry - fixturePath string + name string + expected []pkg.DpkgDBEntry + // expectedLicenses is the raw License value parsed for each entry (parallel to expected). License is not + // persisted on pkg.DpkgDBEntry, so it is asserted separately from the raw extracted metadata. + expectedLicenses []string + fixturePath string }{ { name: "single package", @@ -237,6 +240,37 @@ func Test_parseDpkgStatus(t *testing.T) { }, }, }, + { + name: "opkg status with license field", + fixturePath: "testdata/var/lib/opkg/status", + expected: []pkg.DpkgDBEntry{ + { + Package: "dropbear", + Version: "2024.85-r0", + Architecture: "x86_64", + Description: "Small SSH server and client.", + Depends: []string{"libc", "zlib"}, + Files: []pkg.DpkgFileRecord{}, + }, + { + Package: "busybox", + Version: "1.36.1-r3", + Architecture: "x86_64", + Description: "Single executable providing many common UNIX utilities.", + Depends: []string{"libc"}, + Files: []pkg.DpkgFileRecord{}, + }, + { + Package: "kernel-modules", + Version: "6.6.0-r0", + Architecture: "x86_64", + Description: "Loadable kernel modules with mixed licensing.", + Depends: []string{"kmod"}, + Files: []pkg.DpkgFileRecord{}, + }, + }, + expectedLicenses: []string{"MIT", "GPL-2.0-only", "GPL-2.0 BSD-3-Clause"}, + }, { name: "deinstall status packages are ignored", fixturePath: "testdata/var/lib/dpkg/status.d/deinstall", @@ -278,12 +312,24 @@ func Test_parseDpkgStatus(t *testing.T) { reader := bufio.NewReader(f) - entries, err := parseDpkgStatus(reader) + raw, err := parseDpkgStatus(reader) require.NoError(t, err) + // convert the raw metadata into the final entries just-in-time, mirroring the package-building stage + var entries []pkg.DpkgDBEntry + var licenses []string + for _, r := range raw { + entries = append(entries, r.toDpkgEntry()) + licenses = append(licenses, r.License) + } + if diff := cmp.Diff(test.expected, entries); diff != "" { t.Errorf("unexpected entry (-want +got):\n%s", diff) } + + if test.expectedLicenses != nil { + require.Equal(t, test.expectedLicenses, licenses) + } }) } } diff --git a/syft/pkg/cataloger/debian/testdata/var/lib/opkg/status b/syft/pkg/cataloger/debian/testdata/var/lib/opkg/status new file mode 100644 index 000000000..ed4b15038 --- /dev/null +++ b/syft/pkg/cataloger/debian/testdata/var/lib/opkg/status @@ -0,0 +1,30 @@ +Package: dropbear +Version: 2024.85-r0 +Depends: libc, zlib +Status: install user installed +Architecture: x86_64 +Installed-Time: 1714492800 +License: MIT +Section: net +Description: Small SSH server and client. + +Package: busybox +Version: 1.36.1-r3 +Depends: libc +Status: install user installed +Architecture: x86_64 +Installed-Time: 1714492800 +License: GPL-2.0-only +Section: base +Description: Single executable providing many common UNIX utilities. + +Package: kernel-modules +Version: 6.6.0-r0 +Depends: kmod +Status: install user installed +Architecture: x86_64 +Installed-Time: 1714492800 +License: GPL-2.0 BSD-3-Clause +Section: kernel +Description: Loadable kernel modules with mixed licensing. +