fix: correctly report supporting evidence for binary packages (#4558)

Signed-off-by: Keith Zantow <kzantow@gmail.com>
This commit is contained in:
Keith Zantow 2026-01-23 13:01:12 -05:00 committed by GitHub
parent 27b1219e98
commit 836f358cd4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 177 additions and 56 deletions

View File

@ -109,16 +109,6 @@ catalogers:
cpes:
- cpe:2.3:a:nodejs:node.js:*:*:*:*:*:*:*:*
type: BinaryPkg
- method: glob
criteria:
- '**/VERSION*'
packages:
- class: go-binary-hint
name: go
purl: pkg:generic/go
cpes:
- cpe:2.3:a:golang:go:*:*:*:*:*:*:*:*
type: BinaryPkg
- method: glob
criteria:
- '**/busybox'

View File

@ -64,6 +64,10 @@ func (c cataloger) Catalog(_ context.Context, resolver file.Resolver) ([]pkg.Pac
var relationships []artifact.Relationship
var errs error
// we do not run these classifiers in parallel currently because: when determining primary vs. supporting evidence,
// we take preference to the classifier that was defined first, if we modify this to run in parallel,
// we need to retain this behavior by giving precedence to the classifier defined first in the c.classifiers list;
// if this is ever made parallel, we will need to account for this to have deterministic behavior
for _, cls := range c.classifiers {
log.WithFields("classifier", cls.Class).Trace("cataloging binaries")
newPkgs, err := catalog(resolver, cls)
@ -101,8 +105,26 @@ func mergePackages(target *pkg.Package, extra *pkg.Package) {
if extra.Type != pkg.BinaryPkg && target.Type == pkg.BinaryPkg {
target.Type = extra.Type
}
// add the locations
target.Locations.Add(extra.Locations.ToSlice()...)
addedEvidence := false
// when merging locations together, we need to maintain primary vs. supporting evidence -
// if we are merging two packages together that have overlapping evidence, e.g. libpython
// which are found by 2 different classifiers, we want to deduplicate evidence for the same
// locations when merging so libpython is not considered primary evidence. This allows cases
// where we find python binary with version info coming from libpython to deduplicate the libpython
// entries, but also surface results for libpython separately as primary evidence when there is
// no python binary referencing it
for _, location := range extra.Locations.ToSlice() {
// if we already have the same location, don't include duplicates
if target.Locations.Contains(location) {
continue
}
addedEvidence = true
target.Locations.Add(location)
}
// only include the additional metadata if we added evidence, as it was likely duplicated e.g. libpython
if !addedEvidence {
return
}
// update the metadata to indicate which classifiers were used
meta, _ := target.Metadata.(pkg.BinarySignature)
if m, ok := extra.Metadata.(pkg.BinarySignature); ok {

View File

@ -6,6 +6,7 @@ import (
"flag"
"fmt"
"io"
"slices"
"strings"
"testing"
@ -547,6 +548,7 @@ func Test_Cataloger_PositiveCases(t *testing.T) {
},
},
{
// no python binary, but we find libpython, which is surfaced as primary evidence
logicalFixture: "python-shared-lib/3.7.4/linux-amd64",
expected: pkg.Package{
Name: "python",
@ -556,7 +558,6 @@ func Test_Cataloger_PositiveCases(t *testing.T) {
Metadata: metadata("python-binary-lib"),
},
},
{
// note: dynamic (non-snippet) test case
logicalFixture: "python-slim-shared-libs/3.11/linux-amd64",
@ -569,7 +570,6 @@ func Test_Cataloger_PositiveCases(t *testing.T) {
Matches: []pkg.ClassifierMatch{
match("python-binary", "python3.11"),
match("python-binary", "libpython3.11.so.1.0"),
match("python-binary-lib", "libpython3.11.so.1.0"),
},
},
},
@ -586,7 +586,6 @@ func Test_Cataloger_PositiveCases(t *testing.T) {
Matches: []pkg.ClassifierMatch{
match("python-binary", "python3.9"),
match("python-binary", "libpython3.9.so.1.0"),
match("python-binary-lib", "libpython3.9.so.1.0"),
},
},
},
@ -618,7 +617,6 @@ func Test_Cataloger_PositiveCases(t *testing.T) {
Matches: []pkg.ClassifierMatch{
match("python-binary", "python3.4"),
match("python-binary", "libpython3.4m.so.1.0"),
match("python-binary-lib", "libpython3.4m.so.1.0"),
},
},
},
@ -734,8 +732,8 @@ func Test_Cataloger_PositiveCases(t *testing.T) {
Name: "go",
Version: "1.15",
PURL: "pkg:generic/go@1.15",
Locations: locations("VERSION"),
Metadata: metadata("go-binary-hint"),
Locations: locations("bin/go", "VERSION"),
Metadata: metadata("go-binary"),
},
},
{
@ -745,8 +743,8 @@ func Test_Cataloger_PositiveCases(t *testing.T) {
Name: "go",
Version: "1.25-d524e1e",
PURL: "pkg:generic/go@1.25-d524e1e",
Locations: locations("VERSION.cache"),
Metadata: metadata("go-binary-hint"),
Locations: locations("bin/go", "VERSION.cache"),
Metadata: metadata("go-binary"),
},
},
{
@ -1823,17 +1821,6 @@ func Test_Cataloger_DefaultClassifiers_PositiveCases_Image(t *testing.T) {
require.NoError(t, err)
for _, p := range packages {
expectedLocations := test.expected.Locations.ToSlice()
gotLocations := p.Locations.ToSlice()
require.Len(t, gotLocations, len(expectedLocations))
for i, expectedLocation := range expectedLocations {
gotLocation := gotLocations[i]
if expectedLocation.RealPath != gotLocation.RealPath {
t.Fatalf("locations do not match; expected: %v got: %v", expectedLocations, gotLocations)
}
}
assertPackagesAreEqual(t, test.expected, p)
}
})
@ -2023,12 +2010,13 @@ func assertPackagesAreEqual(t *testing.T, expected pkg.Package, p pkg.Package) {
gotLocations := p.Locations.ToSlice()
if len(expectedLocations) != len(gotLocations) {
failMessages = append(failMessages, "locations are not equal length")
failMessages = append(failMessages, fmt.Sprintf("locations are not equal: %v != %v", expectedLocations, gotLocations))
} else {
for i, expectedLocation := range expectedLocations {
gotLocation := gotLocations[i]
if expectedLocation.RealPath != gotLocation.RealPath {
failMessages = append(failMessages, fmt.Sprintf("locations do not match; expected: %v got: %v", expectedLocation.RealPath, gotLocation.RealPath))
for _, expectedLocation := range expectedLocations {
if !slices.ContainsFunc(gotLocations, func(gotLocation file.Location) bool {
return gotLocation.RealPath == expectedLocation.RealPath
}) {
failMessages = append(failMessages, fmt.Sprintf("location not found; expected: %v in set: %v", expectedLocation.RealPath, gotLocations))
}
}
}

View File

@ -71,8 +71,14 @@ func DefaultClassifiers() []binutils.Classifier {
{
Class: "go-binary",
FileGlob: "**/go",
EvidenceMatcher: m.FileContentsVersionMatcher(
`(?m)go(?P<version>[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)\x00`),
EvidenceMatcher: binutils.MatchAny(
m.FileContentsVersionMatcher(
`(?m)go(?P<version>[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)\x00`),
binutils.SupportingEvidenceMatcher("../VERSION*",
m.FileContentsVersionMatcher(
`(?m)go(?P<version>[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+|-[_0-9a-z]+)?)\s`),
),
),
Package: "go",
PURL: mustPURL("pkg:generic/go@version"),
CPEs: singleCPE("cpe:2.3:a:golang:go:*:*:*:*:*:*:*:*", cpe.NVDDictionaryLookupSource),
@ -142,15 +148,6 @@ func DefaultClassifiers() []binutils.Classifier {
PURL: mustPURL("pkg:generic/node@version"),
CPEs: singleCPE("cpe:2.3:a:nodejs:node.js:*:*:*:*:*:*:*:*", cpe.NVDDictionaryLookupSource),
},
{
Class: "go-binary-hint",
FileGlob: "**/VERSION*",
EvidenceMatcher: m.FileContentsVersionMatcher(
`(?m)go(?P<version>[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?(-[0-9a-f]{7})?)`),
Package: "go",
PURL: mustPURL("pkg:generic/go@version"),
CPEs: singleCPE("cpe:2.3:a:golang:go:*:*:*:*:*:*:*:*", cpe.NVDDictionaryLookupSource),
},
{
Class: "busybox-binary",
FileGlob: "**/busybox",

View File

@ -0,0 +1 @@
no version in this binary

View File

@ -0,0 +1 @@
no version in this binary

View File

@ -10,7 +10,7 @@ import (
)
func Test_BranchingMatcher(t *testing.T) {
matchingTest := FileContentsVersionMatcher("", `my-verison:(?<version>\d+\.\d+)`)
matchingTest := FileContentsVersionMatcher("", `my-version:(?<version>\d+\.\d+)`)
notMatchingTest := MatchPath("**/not-version*")
tests := []struct {

View File

@ -9,6 +9,7 @@ import (
"fmt"
"io"
"maps"
"path"
"regexp"
"strconv"
"strings"
@ -273,9 +274,7 @@ func SharedLibraryLookup(sharedLibraryPattern string, sharedLibraryMatcher Evide
}
for _, p := range pkgs {
// set the source binary as the first location
locationSet := file.NewLocationSet(context.Location)
locationSet.Add(p.Locations.ToSlice()...)
p.Locations = locationSet
makePrimaryLocation(&p, context.Location)
meta, _ := p.Metadata.(pkg.BinarySignature)
p.Metadata = pkg.BinarySignature{
Matches: append([]pkg.ClassifierMatch{
@ -309,6 +308,28 @@ func MatchPath(path string) EvidenceMatcher {
}
}
// SupportingEvidenceMatcher defines an evidence matcher that searches for secondary evidence with path globs
// relative to a primary file, for example: a VERSION file in the same or a parent directory to another binary
func SupportingEvidenceMatcher(relativePathGlob string, evidenceMatcher EvidenceMatcher) EvidenceMatcher {
return func(classifier Classifier, context MatcherContext) ([]pkg.Package, error) {
f := path.Dir(context.Location.RealPath)
f = path.Join(f, relativePathGlob)
f = path.Clean(f)
// this would ideally be RelativeFileByPath but with a glob search:
relativeFiles, err := context.Resolver.FilesByGlob(f)
if err != nil {
return nil, err
}
for _, relativeFile := range relativeFiles {
evidence, err := collectSupportingEvidence(classifier, context, relativeFile, evidenceMatcher)
if evidence != nil || err != nil {
return evidence, err
}
}
return nil, nil
}
}
func getReader(context MatcherContext) (unionreader.UnionReader, error) {
if context.GetReader != nil {
return context.GetReader(context)
@ -368,3 +389,43 @@ func sharedLibraries(context MatcherContext) ([]string, error) {
return nil, nil
}
func makePrimaryLocation(p *pkg.Package, primaryLocation file.Location) {
locationSet := file.NewLocationSet(primaryLocation.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation))
for _, l := range p.Locations.ToSlice() {
if locationSet.Contains(l) { // no need for duplicate locations
continue
}
locationSet.Add(l.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation))
}
p.Locations = locationSet
}
func collectSupportingEvidence(classifier Classifier, context MatcherContext, relativeFile file.Location, evidenceMatcher EvidenceMatcher) ([]pkg.Package, error) {
rdr, err := context.Resolver.FileContentsByLocation(relativeFile)
if err != nil {
return nil, err
}
defer internal.CloseAndLogError(rdr, relativeFile.Path())
ur, err := unionreader.GetUnionReader(rdr)
if err != nil {
return nil, err
}
newContext := MatcherContext{
Resolver: context.Resolver,
Location: relativeFile,
GetReader: func(_ MatcherContext) (unionreader.UnionReader, error) {
return ur, nil
},
}
packages, err := evidenceMatcher(classifier, newContext)
if err != nil {
return nil, err
}
for i := range packages {
p := &(packages[i])
// relative files are supporting evidence, like a VERSION file near a go binary, mark the results as supporting
makePrimaryLocation(p, context.Location)
}
return packages, nil
}

View File

@ -12,6 +12,8 @@ import (
"github.com/anchore/syft/syft/cpe"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/unionreader"
"github.com/anchore/syft/syft/source"
"github.com/anchore/syft/syft/source/directorysource"
)
func Test_ClassifierCPEs(t *testing.T) {
@ -27,7 +29,7 @@ func Test_ClassifierCPEs(t *testing.T) {
classifier: Classifier{
Package: "some-app",
FileGlob: "**/version.txt",
EvidenceMatcher: FileContentsVersionMatcher("cataloger-name", `(?m)my-verison:(?P<version>[0-9.]+)`),
EvidenceMatcher: FileContentsVersionMatcher("cataloger-name", `(?m)my-version:(?P<version>[0-9.]+)`),
CPEs: []cpe.CPE{},
},
cpes: nil,
@ -38,7 +40,7 @@ func Test_ClassifierCPEs(t *testing.T) {
classifier: Classifier{
Package: "some-app",
FileGlob: "**/version.txt",
EvidenceMatcher: FileContentsVersionMatcher("cataloger-name", `(?m)my-verison:(?P<version>[0-9.]+)`),
EvidenceMatcher: FileContentsVersionMatcher("cataloger-name", `(?m)my-version:(?P<version>[0-9.]+)`),
CPEs: []cpe.CPE{
cpe.Must("cpe:2.3:a:some:app:*:*:*:*:*:*:*:*", cpe.GeneratedSource),
},
@ -53,7 +55,7 @@ func Test_ClassifierCPEs(t *testing.T) {
classifier: Classifier{
Package: "some-app",
FileGlob: "**/version.txt",
EvidenceMatcher: FileContentsVersionMatcher("cataloger-name", `(?m)my-verison:(?P<version>[0-9.]+)`),
EvidenceMatcher: FileContentsVersionMatcher("cataloger-name", `(?m)my-version:(?P<version>[0-9.]+)`),
CPEs: []cpe.CPE{
cpe.Must("cpe:2.3:a:some:app:*:*:*:*:*:*:*:*", cpe.GeneratedSource),
cpe.Must("cpe:2.3:a:some:apps:*:*:*:*:*:*:*:*", cpe.GeneratedSource),
@ -183,3 +185,61 @@ func TestFileContentsVersionMatcher(t *testing.T) {
})
}
}
func Test_SupportingEvidenceMatcher(t *testing.T) {
tests := []struct {
name string
classifier Classifier
expected string
}{
{
name: "simple version string regexp",
classifier: Classifier{
FileGlob: "**/some-binary",
EvidenceMatcher: SupportingEvidenceMatcher("../version.txt",
FileContentsVersionMatcher("cataloger-name", `(?m)my-version:(?P<version>[0-9.]+)`)),
Package: "some-binary",
},
expected: "1.8",
},
{
name: "not matching version string regexp",
classifier: Classifier{
FileGlob: "**/some-binary",
EvidenceMatcher: SupportingEvidenceMatcher("../version.txt",
FileContentsVersionMatcher("cataloger-name", `(?m)my-version:(?P<version>abdd)`)),
Package: "some-binary",
},
expected: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s, err := directorysource.NewFromPath("test-fixtures")
require.NoError(t, err)
r, err := s.FileResolver(source.AllLayersScope)
require.NoError(t, err)
results, err := r.FilesByGlob(tt.classifier.FileGlob)
require.NoError(t, err)
for _, result := range results {
got, err := tt.classifier.EvidenceMatcher(tt.classifier, MatcherContext{
Resolver: r,
Location: result,
GetReader: func(ctx MatcherContext) (unionreader.UnionReader, error) {
return getReader(ctx)
},
})
require.NoError(t, err)
if tt.expected != "" {
require.NotEmpty(t, got)
require.Equal(t, tt.expected, got[0].Version)
} else {
require.Empty(t, got)
}
}
})
}
}

View File

@ -0,0 +1 @@
a binary

View File

@ -1 +1 @@
my-verison:1.8
my-version:1.8