fix(dpkg): extract License field for opkg/ipkg entries (#4963)

* fix(dpkg): extract License field for opkg/ipkg entries

opkg and ipkg use the dpkg cataloger but declare the package License
inline in the status DB (unlike Debian dpkg, where licenses live in
copyright files). The cataloger silently dropped the License field at
mapstructure decode time, so all opkg-managed packages reported empty
licenses.

This adds the field to the intermediate decode struct and the public
DpkgDBEntry, and populates licenses in newDpkgPackage using the alpine
cataloger's pattern: try license.ParseExpression first to keep valid
SPDX expressions whole, fall back to whitespace splitting for
space-separated lists.

Standard Debian dpkg status files never carry a License field per
Debian policy, so the new path is a no-op for them; the existing
copyright-file lookup in addLicenses is unaffected.

Closes #4940

Signed-off-by: David Dashti <47575784+Dashtid@users.noreply.github.com>

* remove license from dpkg metadata struct

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* restore format snapshot files

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add additional tests

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

---------

Signed-off-by: David Dashti <47575784+Dashtid@users.noreply.github.com>
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
Co-authored-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
David Dashti 2026-06-15 22:15:32 +02:00 committed by GitHub
parent 00ca43d24a
commit cff5a05681
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 246 additions and 44 deletions

View File

@ -13,6 +13,7 @@ import (
"github.com/anchore/syft/internal" "github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/license"
"github.com/anchore/syft/syft/linux" "github.com/anchore/syft/syft/linux"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
) )
@ -23,21 +24,26 @@ const (
docsPath = "/usr/share/doc" docsPath = "/usr/share/doc"
) )
func newDpkgPackage(ctx context.Context, d pkg.DpkgDBEntry, dbLocation file.Location, resolver file.Resolver, release *linux.Release, evidence ...file.Location) pkg.Package { func newDpkgPackage(ctx context.Context, d dpkgExtractedMetadata, dbLocation file.Location, resolver file.Resolver, release *linux.Release, evidence ...file.Location) pkg.Package {
// TODO: separate pr to license refactor, but explore extracting dpkg-specific license parsing into a separate function // the License field is empty for standard Debian dpkg entries (licenses live in copyright files),
var licenses []pkg.License // but opkg/ipkg derivatives carry it inline in the status DB — extract it here so those packages
// report licenses without requiring per-package copyright lookups. The license is not persisted on
// the final entry, so convert the raw metadata into the entry just-in-time here.
licenses := extractDeclaredLicenses(ctx, d.License, dbLocation)
entry := d.toDpkgEntry()
locations := file.NewLocationSet(dbLocation) locations := file.NewLocationSet(dbLocation)
locations.Add(evidence...) locations.Add(evidence...)
p := pkg.Package{ p := pkg.Package{
Name: d.Package, Name: entry.Package,
Version: d.Version, Version: entry.Version,
Licenses: pkg.NewLicenseSet(licenses...), Licenses: pkg.NewLicenseSet(licenses...),
Locations: locations, Locations: locations,
PURL: packageURL(d, release), PURL: packageURL(entry, release),
Type: pkg.DebPkg, Type: pkg.DebPkg,
Metadata: d, Metadata: entry,
} }
if resolver != nil { if resolver != nil {
@ -55,6 +61,21 @@ func newDpkgPackage(ctx context.Context, d pkg.DpkgDBEntry, dbLocation file.Loca
return p return p
} }
// extractDeclaredLicenses converts a License field from the status DB into a license set. Returns nil
// for empty input so standard dpkg entries (which never declare License inline) incur no allocation.
// Mirrors the alpine cataloger's approach: keep the value whole if it parses as a valid SPDX expression,
// otherwise split on whitespace to handle space-separated lists.
func extractDeclaredLicenses(ctx context.Context, raw string, dbLocation file.Location) []pkg.License {
if raw == "" {
return nil
}
licenseStrings := []string{raw}
if _, err := license.ParseExpression(raw); err != nil {
licenseStrings = strings.Fields(raw)
}
return pkg.NewLicensesFromLocationWithContext(ctx, dbLocation, licenseStrings...)
}
func newDebArchivePackage(ctx context.Context, location file.Location, metadata pkg.DpkgArchiveEntry, licenseStrings []string) pkg.Package { func newDebArchivePackage(ctx context.Context, location file.Location, metadata pkg.DpkgArchiveEntry, licenseStrings []string) pkg.Package {
p := pkg.Package{ p := pkg.Package{
Name: metadata.Package, Name: metadata.Package,

View File

@ -1,10 +1,14 @@
package debian package debian
import ( import (
"context"
"sort"
"testing" "testing"
"github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/linux" "github.com/anchore/syft/syft/linux"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
) )
@ -111,3 +115,97 @@ func Test_packageURL(t *testing.T) {
}) })
} }
} }
func Test_extractDeclaredLicenses(t *testing.T) {
ctx := context.Background()
dbLocation := file.NewLocation("/var/lib/opkg/status")
tests := []struct {
name string
raw string
expected []string
}{
{
name: "empty input returns nil",
raw: "",
expected: nil,
},
{
name: "single SPDX identifier kept whole",
raw: "MIT",
expected: []string{"MIT"},
},
{
name: "valid SPDX expression kept whole",
raw: "Apache-2.0 OR MIT",
expected: []string{"Apache-2.0 OR MIT"},
},
{
name: "non-expression space-separated list is split",
raw: "GPL-2.0 BSD-3-Clause",
expected: []string{"GPL-2.0", "BSD-3-Clause"},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
got := extractDeclaredLicenses(ctx, test.raw, dbLocation)
var gotValues []string
for _, l := range got {
gotValues = append(gotValues, l.Value)
}
// NewLicensesFromLocationWithContext does not guarantee output order
sort.Strings(gotValues)
want := append([]string(nil), test.expected...)
sort.Strings(want)
if diff := cmp.Diff(want, gotValues); diff != "" {
t.Errorf("unexpected licenses (-want +got):\n%s", diff)
}
})
}
}
func Test_newDpkgPackage_declaredLicense(t *testing.T) {
// the License field is not persisted on pkg.DpkgDBEntry, so this guards that the inline opkg/ipkg license
// declared on the raw metadata still flows into the built package's license set
tests := []struct {
name string
metadata dpkgExtractedMetadata
expected []string
}{
{
name: "no declared license",
metadata: dpkgExtractedMetadata{Package: "apt", Version: "1.8.2"},
expected: nil,
},
{
name: "inline license flows to package",
metadata: dpkgExtractedMetadata{Package: "dropbear", Version: "2024.85-r0", License: "MIT"},
expected: []string{"MIT"},
},
{
name: "space-separated licenses split into the set",
metadata: dpkgExtractedMetadata{Package: "busybox", Version: "1.36.1", License: "GPL-2.0 BSD-3-Clause"},
expected: []string{"BSD-3-Clause", "GPL-2.0"},
},
{
name: "valid SPDX expression kept whole",
metadata: dpkgExtractedMetadata{Package: "curl", Version: "8.5.0", License: "Apache-2.0 OR MIT"},
expected: []string{"Apache-2.0 OR MIT"},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
p := newDpkgPackage(context.Background(), test.metadata, file.NewLocation("/var/lib/opkg/status"), nil, nil)
var got []string
for _, l := range p.Licenses.ToSlice() {
got = append(got, l.Value)
}
// the license set does not guarantee output order
sort.Strings(got)
require.Equal(t, test.expected, got)
})
}
}

View File

@ -131,7 +131,7 @@ func processControlTar(dcReader io.ReadCloser) (*pkg.DpkgArchiveEntry, error) {
if len(entries) == 0 { if len(entries) == 0 {
return nil, fmt.Errorf("no package entries found in control file") return nil, fmt.Errorf("no package entries found in control file")
} }
entry := pkg.DpkgArchiveEntry(entries[0]) entry := pkg.DpkgArchiveEntry(entries[0].toDpkgEntry())
metadata = &entry metadata = &entry
case "md5sums": case "md5sums":
// parseDpkgMD5Info already streams via bufio.Scanner // parseDpkgMD5Info already streams via bufio.Scanner

View File

@ -42,7 +42,7 @@ func parseDpkgDB(ctx context.Context, resolver file.Resolver, env *generic.Envir
dbLoc := reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation) dbLoc := reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)
var pkgs []pkg.Package var pkgs []pkg.Package
_ = sync.CollectSlice(&ctx, cataloging.ExecutorFile, sync.ToSeq(metadata), func(m pkg.DpkgDBEntry) (pkg.Package, error) { _ = sync.CollectSlice(&ctx, cataloging.ExecutorFile, sync.ToSeq(metadata), func(m dpkgExtractedMetadata) (pkg.Package, error) {
return newDpkgPackage(ctx, m, dbLoc, resolver, env.LinuxRelease, findDpkgInfoFiles(m.Package, resolver, reader.Location)...), nil return newDpkgPackage(ctx, m, dbLoc, resolver, env.LinuxRelease, findDpkgInfoFiles(m.Package, resolver, reader.Location)...), nil
}, &pkgs) }, &pkgs)
@ -77,10 +77,12 @@ func findDpkgInfoFiles(name string, resolver file.Resolver, dbLocation file.Loca
return locations return locations
} }
// parseDpkgStatus is a parser function for Debian DB status contents, returning all Debian packages listed. // parseDpkgStatus is a parser function for Debian DB status contents, returning the raw metadata for all Debian
func parseDpkgStatus(reader io.Reader) ([]pkg.DpkgDBEntry, error) { // packages listed. Conversion to pkg.DpkgDBEntry is deferred to the package-building stage so that fields which
// are not part of the final entry (e.g. License) remain available to the caller.
func parseDpkgStatus(reader io.Reader) ([]dpkgExtractedMetadata, error) {
buffedReader := bufio.NewReader(reader) buffedReader := bufio.NewReader(reader)
var metadata []pkg.DpkgDBEntry var metadata []dpkgExtractedMetadata
continueProcessing := true continueProcessing := true
for continueProcessing { for continueProcessing {
@ -117,10 +119,41 @@ type dpkgExtractedMetadata struct {
Depends string `mapstructure:"Depends"` Depends string `mapstructure:"Depends"`
PreDepends string `mapstructure:"PreDepends"` // note: original doc is Pre-Depends PreDepends string `mapstructure:"PreDepends"` // note: original doc is Pre-Depends
Status string `mapstructure:"Status"` Status string `mapstructure:"Status"`
License string `mapstructure:"License"`
Conffiles string `mapstructure:"Conffiles"`
}
func (d *dpkgExtractedMetadata) toDpkgEntry() pkg.DpkgDBEntry {
entry := pkg.DpkgDBEntry{
Package: d.Package,
Source: d.Source,
Version: d.Version,
SourceVersion: d.SourceVersion,
Architecture: d.Architecture,
Maintainer: d.Maintainer,
InstalledSize: d.InstalledSize,
Description: d.Description,
Provides: splitPkgList(d.Provides),
Depends: splitPkgList(d.Depends),
PreDepends: splitPkgList(d.PreDepends),
// note: licenses and conffiles are handled separately
}
// there may be an optional conffiles section that we should persist as files
if d.Conffiles != "" {
entry.Files = parseDpkgConffileInfo(strings.NewReader(d.Conffiles))
}
if entry.Files == nil {
// ensure the default value for a collection is never nil since this may be shown as JSON
entry.Files = make([]pkg.DpkgFileRecord, 0)
}
return entry
} }
// parseDpkgStatusEntry returns an individual Dpkg entry, or returns errEndOfPackages if there are no more packages to parse from the reader. // parseDpkgStatusEntry returns an individual Dpkg entry, or returns errEndOfPackages if there are no more packages to parse from the reader.
func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgDBEntry, error) { func parseDpkgStatusEntry(reader *bufio.Reader) (*dpkgExtractedMetadata, error) {
var retErr error var retErr error
dpkgFields, err := extractAllFields(reader) dpkgFields, err := extractAllFields(reader)
if err != nil { if err != nil {
@ -154,33 +187,7 @@ func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgDBEntry, error) {
return nil, retErr return nil, retErr
} }
entry := pkg.DpkgDBEntry{ return &raw, retErr
Package: raw.Package,
Source: raw.Source,
Version: raw.Version,
SourceVersion: raw.SourceVersion,
Architecture: raw.Architecture,
Maintainer: raw.Maintainer,
InstalledSize: raw.InstalledSize,
Description: raw.Description,
Provides: splitPkgList(raw.Provides),
Depends: splitPkgList(raw.Depends),
PreDepends: splitPkgList(raw.PreDepends),
}
// there may be an optional conffiles section that we should persist as files
if conffilesSection, exists := dpkgFields["Conffiles"]; exists && conffilesSection != nil {
if sectionStr, ok := conffilesSection.(string); ok {
entry.Files = parseDpkgConffileInfo(strings.NewReader(sectionStr))
}
}
if entry.Files == nil {
// ensure the default value for a collection is never nil since this may be shown as JSON
entry.Files = make([]pkg.DpkgFileRecord, 0)
}
return &entry, retErr
} }
func splitPkgList(pkgList string) (ret []string) { func splitPkgList(pkgList string) (ret []string) {

View File

@ -22,6 +22,9 @@ func Test_parseDpkgStatus(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
expected []pkg.DpkgDBEntry expected []pkg.DpkgDBEntry
// expectedLicenses is the raw License value parsed for each entry (parallel to expected). License is not
// persisted on pkg.DpkgDBEntry, so it is asserted separately from the raw extracted metadata.
expectedLicenses []string
fixturePath string fixturePath string
}{ }{
{ {
@ -237,6 +240,37 @@ func Test_parseDpkgStatus(t *testing.T) {
}, },
}, },
}, },
{
name: "opkg status with license field",
fixturePath: "testdata/var/lib/opkg/status",
expected: []pkg.DpkgDBEntry{
{
Package: "dropbear",
Version: "2024.85-r0",
Architecture: "x86_64",
Description: "Small SSH server and client.",
Depends: []string{"libc", "zlib"},
Files: []pkg.DpkgFileRecord{},
},
{
Package: "busybox",
Version: "1.36.1-r3",
Architecture: "x86_64",
Description: "Single executable providing many common UNIX utilities.",
Depends: []string{"libc"},
Files: []pkg.DpkgFileRecord{},
},
{
Package: "kernel-modules",
Version: "6.6.0-r0",
Architecture: "x86_64",
Description: "Loadable kernel modules with mixed licensing.",
Depends: []string{"kmod"},
Files: []pkg.DpkgFileRecord{},
},
},
expectedLicenses: []string{"MIT", "GPL-2.0-only", "GPL-2.0 BSD-3-Clause"},
},
{ {
name: "deinstall status packages are ignored", name: "deinstall status packages are ignored",
fixturePath: "testdata/var/lib/dpkg/status.d/deinstall", fixturePath: "testdata/var/lib/dpkg/status.d/deinstall",
@ -278,12 +312,24 @@ func Test_parseDpkgStatus(t *testing.T) {
reader := bufio.NewReader(f) reader := bufio.NewReader(f)
entries, err := parseDpkgStatus(reader) raw, err := parseDpkgStatus(reader)
require.NoError(t, err) require.NoError(t, err)
// convert the raw metadata into the final entries just-in-time, mirroring the package-building stage
var entries []pkg.DpkgDBEntry
var licenses []string
for _, r := range raw {
entries = append(entries, r.toDpkgEntry())
licenses = append(licenses, r.License)
}
if diff := cmp.Diff(test.expected, entries); diff != "" { if diff := cmp.Diff(test.expected, entries); diff != "" {
t.Errorf("unexpected entry (-want +got):\n%s", diff) t.Errorf("unexpected entry (-want +got):\n%s", diff)
} }
if test.expectedLicenses != nil {
require.Equal(t, test.expectedLicenses, licenses)
}
}) })
} }
} }

View File

@ -0,0 +1,30 @@
Package: dropbear
Version: 2024.85-r0
Depends: libc, zlib
Status: install user installed
Architecture: x86_64
Installed-Time: 1714492800
License: MIT
Section: net
Description: Small SSH server and client.
Package: busybox
Version: 1.36.1-r3
Depends: libc
Status: install user installed
Architecture: x86_64
Installed-Time: 1714492800
License: GPL-2.0-only
Section: base
Description: Single executable providing many common UNIX utilities.
Package: kernel-modules
Version: 6.6.0-r0
Depends: kmod
Status: install user installed
Architecture: x86_64
Installed-Time: 1714492800
License: GPL-2.0 BSD-3-Clause
Section: kernel
Description: Loadable kernel modules with mixed licensing.