mirror of
https://github.com/anchore/syft.git
synced 2026-06-17 09:48:24 +02:00
fix(dpkg): extract License field for opkg/ipkg entries (#4963)
* fix(dpkg): extract License field for opkg/ipkg entries opkg and ipkg use the dpkg cataloger but declare the package License inline in the status DB (unlike Debian dpkg, where licenses live in copyright files). The cataloger silently dropped the License field at mapstructure decode time, so all opkg-managed packages reported empty licenses. This adds the field to the intermediate decode struct and the public DpkgDBEntry, and populates licenses in newDpkgPackage using the alpine cataloger's pattern: try license.ParseExpression first to keep valid SPDX expressions whole, fall back to whitespace splitting for space-separated lists. Standard Debian dpkg status files never carry a License field per Debian policy, so the new path is a no-op for them; the existing copyright-file lookup in addLicenses is unaffected. Closes #4940 Signed-off-by: David Dashti <47575784+Dashtid@users.noreply.github.com> * remove license from dpkg metadata struct Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> * restore format snapshot files Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> * add additional tests Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> --------- Signed-off-by: David Dashti <47575784+Dashtid@users.noreply.github.com> Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> Co-authored-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
parent
00ca43d24a
commit
cff5a05681
@ -13,6 +13,7 @@ import (
|
||||
"github.com/anchore/syft/internal"
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/license"
|
||||
"github.com/anchore/syft/syft/linux"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
)
|
||||
@ -23,21 +24,26 @@ const (
|
||||
docsPath = "/usr/share/doc"
|
||||
)
|
||||
|
||||
func newDpkgPackage(ctx context.Context, d pkg.DpkgDBEntry, dbLocation file.Location, resolver file.Resolver, release *linux.Release, evidence ...file.Location) pkg.Package {
|
||||
// TODO: separate pr to license refactor, but explore extracting dpkg-specific license parsing into a separate function
|
||||
var licenses []pkg.License
|
||||
func newDpkgPackage(ctx context.Context, d dpkgExtractedMetadata, dbLocation file.Location, resolver file.Resolver, release *linux.Release, evidence ...file.Location) pkg.Package {
|
||||
// the License field is empty for standard Debian dpkg entries (licenses live in copyright files),
|
||||
// but opkg/ipkg derivatives carry it inline in the status DB — extract it here so those packages
|
||||
// report licenses without requiring per-package copyright lookups. The license is not persisted on
|
||||
// the final entry, so convert the raw metadata into the entry just-in-time here.
|
||||
licenses := extractDeclaredLicenses(ctx, d.License, dbLocation)
|
||||
|
||||
entry := d.toDpkgEntry()
|
||||
|
||||
locations := file.NewLocationSet(dbLocation)
|
||||
locations.Add(evidence...)
|
||||
|
||||
p := pkg.Package{
|
||||
Name: d.Package,
|
||||
Version: d.Version,
|
||||
Name: entry.Package,
|
||||
Version: entry.Version,
|
||||
Licenses: pkg.NewLicenseSet(licenses...),
|
||||
Locations: locations,
|
||||
PURL: packageURL(d, release),
|
||||
PURL: packageURL(entry, release),
|
||||
Type: pkg.DebPkg,
|
||||
Metadata: d,
|
||||
Metadata: entry,
|
||||
}
|
||||
|
||||
if resolver != nil {
|
||||
@ -55,6 +61,21 @@ func newDpkgPackage(ctx context.Context, d pkg.DpkgDBEntry, dbLocation file.Loca
|
||||
return p
|
||||
}
|
||||
|
||||
// extractDeclaredLicenses converts a License field from the status DB into a license set. Returns nil
|
||||
// for empty input so standard dpkg entries (which never declare License inline) incur no allocation.
|
||||
// Mirrors the alpine cataloger's approach: keep the value whole if it parses as a valid SPDX expression,
|
||||
// otherwise split on whitespace to handle space-separated lists.
|
||||
func extractDeclaredLicenses(ctx context.Context, raw string, dbLocation file.Location) []pkg.License {
|
||||
if raw == "" {
|
||||
return nil
|
||||
}
|
||||
licenseStrings := []string{raw}
|
||||
if _, err := license.ParseExpression(raw); err != nil {
|
||||
licenseStrings = strings.Fields(raw)
|
||||
}
|
||||
return pkg.NewLicensesFromLocationWithContext(ctx, dbLocation, licenseStrings...)
|
||||
}
|
||||
|
||||
func newDebArchivePackage(ctx context.Context, location file.Location, metadata pkg.DpkgArchiveEntry, licenseStrings []string) pkg.Package {
|
||||
p := pkg.Package{
|
||||
Name: metadata.Package,
|
||||
|
||||
@ -1,10 +1,14 @@
|
||||
package debian
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/linux"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
)
|
||||
@ -111,3 +115,97 @@ func Test_packageURL(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_extractDeclaredLicenses(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dbLocation := file.NewLocation("/var/lib/opkg/status")
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
raw string
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "empty input returns nil",
|
||||
raw: "",
|
||||
expected: nil,
|
||||
},
|
||||
{
|
||||
name: "single SPDX identifier kept whole",
|
||||
raw: "MIT",
|
||||
expected: []string{"MIT"},
|
||||
},
|
||||
{
|
||||
name: "valid SPDX expression kept whole",
|
||||
raw: "Apache-2.0 OR MIT",
|
||||
expected: []string{"Apache-2.0 OR MIT"},
|
||||
},
|
||||
{
|
||||
name: "non-expression space-separated list is split",
|
||||
raw: "GPL-2.0 BSD-3-Clause",
|
||||
expected: []string{"GPL-2.0", "BSD-3-Clause"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
got := extractDeclaredLicenses(ctx, test.raw, dbLocation)
|
||||
var gotValues []string
|
||||
for _, l := range got {
|
||||
gotValues = append(gotValues, l.Value)
|
||||
}
|
||||
// NewLicensesFromLocationWithContext does not guarantee output order
|
||||
sort.Strings(gotValues)
|
||||
want := append([]string(nil), test.expected...)
|
||||
sort.Strings(want)
|
||||
if diff := cmp.Diff(want, gotValues); diff != "" {
|
||||
t.Errorf("unexpected licenses (-want +got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_newDpkgPackage_declaredLicense(t *testing.T) {
|
||||
// the License field is not persisted on pkg.DpkgDBEntry, so this guards that the inline opkg/ipkg license
|
||||
// declared on the raw metadata still flows into the built package's license set
|
||||
tests := []struct {
|
||||
name string
|
||||
metadata dpkgExtractedMetadata
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "no declared license",
|
||||
metadata: dpkgExtractedMetadata{Package: "apt", Version: "1.8.2"},
|
||||
expected: nil,
|
||||
},
|
||||
{
|
||||
name: "inline license flows to package",
|
||||
metadata: dpkgExtractedMetadata{Package: "dropbear", Version: "2024.85-r0", License: "MIT"},
|
||||
expected: []string{"MIT"},
|
||||
},
|
||||
{
|
||||
name: "space-separated licenses split into the set",
|
||||
metadata: dpkgExtractedMetadata{Package: "busybox", Version: "1.36.1", License: "GPL-2.0 BSD-3-Clause"},
|
||||
expected: []string{"BSD-3-Clause", "GPL-2.0"},
|
||||
},
|
||||
{
|
||||
name: "valid SPDX expression kept whole",
|
||||
metadata: dpkgExtractedMetadata{Package: "curl", Version: "8.5.0", License: "Apache-2.0 OR MIT"},
|
||||
expected: []string{"Apache-2.0 OR MIT"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
p := newDpkgPackage(context.Background(), test.metadata, file.NewLocation("/var/lib/opkg/status"), nil, nil)
|
||||
|
||||
var got []string
|
||||
for _, l := range p.Licenses.ToSlice() {
|
||||
got = append(got, l.Value)
|
||||
}
|
||||
// the license set does not guarantee output order
|
||||
sort.Strings(got)
|
||||
require.Equal(t, test.expected, got)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -131,7 +131,7 @@ func processControlTar(dcReader io.ReadCloser) (*pkg.DpkgArchiveEntry, error) {
|
||||
if len(entries) == 0 {
|
||||
return nil, fmt.Errorf("no package entries found in control file")
|
||||
}
|
||||
entry := pkg.DpkgArchiveEntry(entries[0])
|
||||
entry := pkg.DpkgArchiveEntry(entries[0].toDpkgEntry())
|
||||
metadata = &entry
|
||||
case "md5sums":
|
||||
// parseDpkgMD5Info already streams via bufio.Scanner
|
||||
|
||||
@ -42,7 +42,7 @@ func parseDpkgDB(ctx context.Context, resolver file.Resolver, env *generic.Envir
|
||||
|
||||
dbLoc := reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)
|
||||
var pkgs []pkg.Package
|
||||
_ = sync.CollectSlice(&ctx, cataloging.ExecutorFile, sync.ToSeq(metadata), func(m pkg.DpkgDBEntry) (pkg.Package, error) {
|
||||
_ = sync.CollectSlice(&ctx, cataloging.ExecutorFile, sync.ToSeq(metadata), func(m dpkgExtractedMetadata) (pkg.Package, error) {
|
||||
return newDpkgPackage(ctx, m, dbLoc, resolver, env.LinuxRelease, findDpkgInfoFiles(m.Package, resolver, reader.Location)...), nil
|
||||
}, &pkgs)
|
||||
|
||||
@ -77,10 +77,12 @@ func findDpkgInfoFiles(name string, resolver file.Resolver, dbLocation file.Loca
|
||||
return locations
|
||||
}
|
||||
|
||||
// parseDpkgStatus is a parser function for Debian DB status contents, returning all Debian packages listed.
|
||||
func parseDpkgStatus(reader io.Reader) ([]pkg.DpkgDBEntry, error) {
|
||||
// parseDpkgStatus is a parser function for Debian DB status contents, returning the raw metadata for all Debian
|
||||
// packages listed. Conversion to pkg.DpkgDBEntry is deferred to the package-building stage so that fields which
|
||||
// are not part of the final entry (e.g. License) remain available to the caller.
|
||||
func parseDpkgStatus(reader io.Reader) ([]dpkgExtractedMetadata, error) {
|
||||
buffedReader := bufio.NewReader(reader)
|
||||
var metadata []pkg.DpkgDBEntry
|
||||
var metadata []dpkgExtractedMetadata
|
||||
|
||||
continueProcessing := true
|
||||
for continueProcessing {
|
||||
@ -117,10 +119,41 @@ type dpkgExtractedMetadata struct {
|
||||
Depends string `mapstructure:"Depends"`
|
||||
PreDepends string `mapstructure:"PreDepends"` // note: original doc is Pre-Depends
|
||||
Status string `mapstructure:"Status"`
|
||||
License string `mapstructure:"License"`
|
||||
Conffiles string `mapstructure:"Conffiles"`
|
||||
}
|
||||
|
||||
func (d *dpkgExtractedMetadata) toDpkgEntry() pkg.DpkgDBEntry {
|
||||
entry := pkg.DpkgDBEntry{
|
||||
Package: d.Package,
|
||||
Source: d.Source,
|
||||
Version: d.Version,
|
||||
SourceVersion: d.SourceVersion,
|
||||
Architecture: d.Architecture,
|
||||
Maintainer: d.Maintainer,
|
||||
InstalledSize: d.InstalledSize,
|
||||
Description: d.Description,
|
||||
Provides: splitPkgList(d.Provides),
|
||||
Depends: splitPkgList(d.Depends),
|
||||
PreDepends: splitPkgList(d.PreDepends),
|
||||
// note: licenses and conffiles are handled separately
|
||||
}
|
||||
|
||||
// there may be an optional conffiles section that we should persist as files
|
||||
if d.Conffiles != "" {
|
||||
entry.Files = parseDpkgConffileInfo(strings.NewReader(d.Conffiles))
|
||||
}
|
||||
|
||||
if entry.Files == nil {
|
||||
// ensure the default value for a collection is never nil since this may be shown as JSON
|
||||
entry.Files = make([]pkg.DpkgFileRecord, 0)
|
||||
}
|
||||
|
||||
return entry
|
||||
}
|
||||
|
||||
// parseDpkgStatusEntry returns an individual Dpkg entry, or returns errEndOfPackages if there are no more packages to parse from the reader.
|
||||
func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgDBEntry, error) {
|
||||
func parseDpkgStatusEntry(reader *bufio.Reader) (*dpkgExtractedMetadata, error) {
|
||||
var retErr error
|
||||
dpkgFields, err := extractAllFields(reader)
|
||||
if err != nil {
|
||||
@ -154,33 +187,7 @@ func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgDBEntry, error) {
|
||||
return nil, retErr
|
||||
}
|
||||
|
||||
entry := pkg.DpkgDBEntry{
|
||||
Package: raw.Package,
|
||||
Source: raw.Source,
|
||||
Version: raw.Version,
|
||||
SourceVersion: raw.SourceVersion,
|
||||
Architecture: raw.Architecture,
|
||||
Maintainer: raw.Maintainer,
|
||||
InstalledSize: raw.InstalledSize,
|
||||
Description: raw.Description,
|
||||
Provides: splitPkgList(raw.Provides),
|
||||
Depends: splitPkgList(raw.Depends),
|
||||
PreDepends: splitPkgList(raw.PreDepends),
|
||||
}
|
||||
|
||||
// there may be an optional conffiles section that we should persist as files
|
||||
if conffilesSection, exists := dpkgFields["Conffiles"]; exists && conffilesSection != nil {
|
||||
if sectionStr, ok := conffilesSection.(string); ok {
|
||||
entry.Files = parseDpkgConffileInfo(strings.NewReader(sectionStr))
|
||||
}
|
||||
}
|
||||
|
||||
if entry.Files == nil {
|
||||
// ensure the default value for a collection is never nil since this may be shown as JSON
|
||||
entry.Files = make([]pkg.DpkgFileRecord, 0)
|
||||
}
|
||||
|
||||
return &entry, retErr
|
||||
return &raw, retErr
|
||||
}
|
||||
|
||||
func splitPkgList(pkgList string) (ret []string) {
|
||||
|
||||
@ -22,6 +22,9 @@ func Test_parseDpkgStatus(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
expected []pkg.DpkgDBEntry
|
||||
// expectedLicenses is the raw License value parsed for each entry (parallel to expected). License is not
|
||||
// persisted on pkg.DpkgDBEntry, so it is asserted separately from the raw extracted metadata.
|
||||
expectedLicenses []string
|
||||
fixturePath string
|
||||
}{
|
||||
{
|
||||
@ -237,6 +240,37 @@ func Test_parseDpkgStatus(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "opkg status with license field",
|
||||
fixturePath: "testdata/var/lib/opkg/status",
|
||||
expected: []pkg.DpkgDBEntry{
|
||||
{
|
||||
Package: "dropbear",
|
||||
Version: "2024.85-r0",
|
||||
Architecture: "x86_64",
|
||||
Description: "Small SSH server and client.",
|
||||
Depends: []string{"libc", "zlib"},
|
||||
Files: []pkg.DpkgFileRecord{},
|
||||
},
|
||||
{
|
||||
Package: "busybox",
|
||||
Version: "1.36.1-r3",
|
||||
Architecture: "x86_64",
|
||||
Description: "Single executable providing many common UNIX utilities.",
|
||||
Depends: []string{"libc"},
|
||||
Files: []pkg.DpkgFileRecord{},
|
||||
},
|
||||
{
|
||||
Package: "kernel-modules",
|
||||
Version: "6.6.0-r0",
|
||||
Architecture: "x86_64",
|
||||
Description: "Loadable kernel modules with mixed licensing.",
|
||||
Depends: []string{"kmod"},
|
||||
Files: []pkg.DpkgFileRecord{},
|
||||
},
|
||||
},
|
||||
expectedLicenses: []string{"MIT", "GPL-2.0-only", "GPL-2.0 BSD-3-Clause"},
|
||||
},
|
||||
{
|
||||
name: "deinstall status packages are ignored",
|
||||
fixturePath: "testdata/var/lib/dpkg/status.d/deinstall",
|
||||
@ -278,12 +312,24 @@ func Test_parseDpkgStatus(t *testing.T) {
|
||||
|
||||
reader := bufio.NewReader(f)
|
||||
|
||||
entries, err := parseDpkgStatus(reader)
|
||||
raw, err := parseDpkgStatus(reader)
|
||||
require.NoError(t, err)
|
||||
|
||||
// convert the raw metadata into the final entries just-in-time, mirroring the package-building stage
|
||||
var entries []pkg.DpkgDBEntry
|
||||
var licenses []string
|
||||
for _, r := range raw {
|
||||
entries = append(entries, r.toDpkgEntry())
|
||||
licenses = append(licenses, r.License)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(test.expected, entries); diff != "" {
|
||||
t.Errorf("unexpected entry (-want +got):\n%s", diff)
|
||||
}
|
||||
|
||||
if test.expectedLicenses != nil {
|
||||
require.Equal(t, test.expectedLicenses, licenses)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
30
syft/pkg/cataloger/debian/testdata/var/lib/opkg/status
vendored
Normal file
30
syft/pkg/cataloger/debian/testdata/var/lib/opkg/status
vendored
Normal file
@ -0,0 +1,30 @@
|
||||
Package: dropbear
|
||||
Version: 2024.85-r0
|
||||
Depends: libc, zlib
|
||||
Status: install user installed
|
||||
Architecture: x86_64
|
||||
Installed-Time: 1714492800
|
||||
License: MIT
|
||||
Section: net
|
||||
Description: Small SSH server and client.
|
||||
|
||||
Package: busybox
|
||||
Version: 1.36.1-r3
|
||||
Depends: libc
|
||||
Status: install user installed
|
||||
Architecture: x86_64
|
||||
Installed-Time: 1714492800
|
||||
License: GPL-2.0-only
|
||||
Section: base
|
||||
Description: Single executable providing many common UNIX utilities.
|
||||
|
||||
Package: kernel-modules
|
||||
Version: 6.6.0-r0
|
||||
Depends: kmod
|
||||
Status: install user installed
|
||||
Architecture: x86_64
|
||||
Installed-Time: 1714492800
|
||||
License: GPL-2.0 BSD-3-Clause
|
||||
Section: kernel
|
||||
Description: Loadable kernel modules with mixed licensing.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user