From 836f358cd4c413be291c5e4e15a96e93e1739e72 Mon Sep 17 00:00:00 2001 From: Keith Zantow Date: Fri, 23 Jan 2026 13:01:12 -0500 Subject: [PATCH] fix: correctly report supporting evidence for binary packages (#4558) Signed-off-by: Keith Zantow --- syft/pkg/cataloger/binary/capabilities.yaml | 10 --- .../cataloger/binary/classifier_cataloger.go | 26 ++++++- .../binary/classifier_cataloger_test.go | 36 ++++------ syft/pkg/cataloger/binary/classifiers.go | 19 +++--- .../snippets/go-version-hint/1.15/any/VERSION | 2 +- .../snippets/go-version-hint/1.15/any/bin/go | 1 + .../snippets/go-version-hint/1.25/any/bin/go | 1 + .../binutils/branching_matcher_test.go | 2 +- .../cataloger/internal/binutils/classifier.go | 67 ++++++++++++++++++- .../internal/binutils/classifier_test.go | 66 +++++++++++++++++- .../binutils/test-fixtures/subdir/some-binary | 1 + .../binutils/test-fixtures/version.txt | 2 +- 12 files changed, 177 insertions(+), 56 deletions(-) create mode 100644 syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/go-version-hint/1.15/any/bin/go create mode 100644 syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/go-version-hint/1.25/any/bin/go create mode 100644 syft/pkg/cataloger/internal/binutils/test-fixtures/subdir/some-binary diff --git a/syft/pkg/cataloger/binary/capabilities.yaml b/syft/pkg/cataloger/binary/capabilities.yaml index b55e0f082..0b3a36e51 100644 --- a/syft/pkg/cataloger/binary/capabilities.yaml +++ b/syft/pkg/cataloger/binary/capabilities.yaml @@ -109,16 +109,6 @@ catalogers: cpes: - cpe:2.3:a:nodejs:node.js:*:*:*:*:*:*:*:* type: BinaryPkg - - method: glob - criteria: - - '**/VERSION*' - packages: - - class: go-binary-hint - name: go - purl: pkg:generic/go - cpes: - - cpe:2.3:a:golang:go:*:*:*:*:*:*:*:* - type: BinaryPkg - method: glob criteria: - '**/busybox' diff --git a/syft/pkg/cataloger/binary/classifier_cataloger.go b/syft/pkg/cataloger/binary/classifier_cataloger.go index 4587a338c..3b5ba3146 100644 --- a/syft/pkg/cataloger/binary/classifier_cataloger.go +++ b/syft/pkg/cataloger/binary/classifier_cataloger.go @@ -64,6 +64,10 @@ func (c cataloger) Catalog(_ context.Context, resolver file.Resolver) ([]pkg.Pac var relationships []artifact.Relationship var errs error + // we do not run these classifiers in parallel currently because: when determining primary vs. supporting evidence, + // we take preference to the classifier that was defined first, if we modify this to run in parallel, + // we need to retain this behavior by giving precedence to the classifier defined first in the c.classifiers list; + // if this is ever made parallel, we will need to account for this to have deterministic behavior for _, cls := range c.classifiers { log.WithFields("classifier", cls.Class).Trace("cataloging binaries") newPkgs, err := catalog(resolver, cls) @@ -101,8 +105,26 @@ func mergePackages(target *pkg.Package, extra *pkg.Package) { if extra.Type != pkg.BinaryPkg && target.Type == pkg.BinaryPkg { target.Type = extra.Type } - // add the locations - target.Locations.Add(extra.Locations.ToSlice()...) + addedEvidence := false + // when merging locations together, we need to maintain primary vs. supporting evidence - + // if we are merging two packages together that have overlapping evidence, e.g. libpython + // which are found by 2 different classifiers, we want to deduplicate evidence for the same + // locations when merging so libpython is not considered primary evidence. This allows cases + // where we find python binary with version info coming from libpython to deduplicate the libpython + // entries, but also surface results for libpython separately as primary evidence when there is + // no python binary referencing it + for _, location := range extra.Locations.ToSlice() { + // if we already have the same location, don't include duplicates + if target.Locations.Contains(location) { + continue + } + addedEvidence = true + target.Locations.Add(location) + } + // only include the additional metadata if we added evidence, as it was likely duplicated e.g. libpython + if !addedEvidence { + return + } // update the metadata to indicate which classifiers were used meta, _ := target.Metadata.(pkg.BinarySignature) if m, ok := extra.Metadata.(pkg.BinarySignature); ok { diff --git a/syft/pkg/cataloger/binary/classifier_cataloger_test.go b/syft/pkg/cataloger/binary/classifier_cataloger_test.go index 061a91017..584ea30fe 100644 --- a/syft/pkg/cataloger/binary/classifier_cataloger_test.go +++ b/syft/pkg/cataloger/binary/classifier_cataloger_test.go @@ -6,6 +6,7 @@ import ( "flag" "fmt" "io" + "slices" "strings" "testing" @@ -547,6 +548,7 @@ func Test_Cataloger_PositiveCases(t *testing.T) { }, }, { + // no python binary, but we find libpython, which is surfaced as primary evidence logicalFixture: "python-shared-lib/3.7.4/linux-amd64", expected: pkg.Package{ Name: "python", @@ -556,7 +558,6 @@ func Test_Cataloger_PositiveCases(t *testing.T) { Metadata: metadata("python-binary-lib"), }, }, - { // note: dynamic (non-snippet) test case logicalFixture: "python-slim-shared-libs/3.11/linux-amd64", @@ -569,7 +570,6 @@ func Test_Cataloger_PositiveCases(t *testing.T) { Matches: []pkg.ClassifierMatch{ match("python-binary", "python3.11"), match("python-binary", "libpython3.11.so.1.0"), - match("python-binary-lib", "libpython3.11.so.1.0"), }, }, }, @@ -586,7 +586,6 @@ func Test_Cataloger_PositiveCases(t *testing.T) { Matches: []pkg.ClassifierMatch{ match("python-binary", "python3.9"), match("python-binary", "libpython3.9.so.1.0"), - match("python-binary-lib", "libpython3.9.so.1.0"), }, }, }, @@ -618,7 +617,6 @@ func Test_Cataloger_PositiveCases(t *testing.T) { Matches: []pkg.ClassifierMatch{ match("python-binary", "python3.4"), match("python-binary", "libpython3.4m.so.1.0"), - match("python-binary-lib", "libpython3.4m.so.1.0"), }, }, }, @@ -734,8 +732,8 @@ func Test_Cataloger_PositiveCases(t *testing.T) { Name: "go", Version: "1.15", PURL: "pkg:generic/go@1.15", - Locations: locations("VERSION"), - Metadata: metadata("go-binary-hint"), + Locations: locations("bin/go", "VERSION"), + Metadata: metadata("go-binary"), }, }, { @@ -745,8 +743,8 @@ func Test_Cataloger_PositiveCases(t *testing.T) { Name: "go", Version: "1.25-d524e1e", PURL: "pkg:generic/go@1.25-d524e1e", - Locations: locations("VERSION.cache"), - Metadata: metadata("go-binary-hint"), + Locations: locations("bin/go", "VERSION.cache"), + Metadata: metadata("go-binary"), }, }, { @@ -1823,17 +1821,6 @@ func Test_Cataloger_DefaultClassifiers_PositiveCases_Image(t *testing.T) { require.NoError(t, err) for _, p := range packages { - expectedLocations := test.expected.Locations.ToSlice() - gotLocations := p.Locations.ToSlice() - require.Len(t, gotLocations, len(expectedLocations)) - - for i, expectedLocation := range expectedLocations { - gotLocation := gotLocations[i] - if expectedLocation.RealPath != gotLocation.RealPath { - t.Fatalf("locations do not match; expected: %v got: %v", expectedLocations, gotLocations) - } - } - assertPackagesAreEqual(t, test.expected, p) } }) @@ -2023,12 +2010,13 @@ func assertPackagesAreEqual(t *testing.T, expected pkg.Package, p pkg.Package) { gotLocations := p.Locations.ToSlice() if len(expectedLocations) != len(gotLocations) { - failMessages = append(failMessages, "locations are not equal length") + failMessages = append(failMessages, fmt.Sprintf("locations are not equal: %v != %v", expectedLocations, gotLocations)) } else { - for i, expectedLocation := range expectedLocations { - gotLocation := gotLocations[i] - if expectedLocation.RealPath != gotLocation.RealPath { - failMessages = append(failMessages, fmt.Sprintf("locations do not match; expected: %v got: %v", expectedLocation.RealPath, gotLocation.RealPath)) + for _, expectedLocation := range expectedLocations { + if !slices.ContainsFunc(gotLocations, func(gotLocation file.Location) bool { + return gotLocation.RealPath == expectedLocation.RealPath + }) { + failMessages = append(failMessages, fmt.Sprintf("location not found; expected: %v in set: %v", expectedLocation.RealPath, gotLocations)) } } } diff --git a/syft/pkg/cataloger/binary/classifiers.go b/syft/pkg/cataloger/binary/classifiers.go index 600c1f3d4..a2b20f81f 100644 --- a/syft/pkg/cataloger/binary/classifiers.go +++ b/syft/pkg/cataloger/binary/classifiers.go @@ -71,8 +71,14 @@ func DefaultClassifiers() []binutils.Classifier { { Class: "go-binary", FileGlob: "**/go", - EvidenceMatcher: m.FileContentsVersionMatcher( - `(?m)go(?P[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)\x00`), + EvidenceMatcher: binutils.MatchAny( + m.FileContentsVersionMatcher( + `(?m)go(?P[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)\x00`), + binutils.SupportingEvidenceMatcher("../VERSION*", + m.FileContentsVersionMatcher( + `(?m)go(?P[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+|-[_0-9a-z]+)?)\s`), + ), + ), Package: "go", PURL: mustPURL("pkg:generic/go@version"), CPEs: singleCPE("cpe:2.3:a:golang:go:*:*:*:*:*:*:*:*", cpe.NVDDictionaryLookupSource), @@ -142,15 +148,6 @@ func DefaultClassifiers() []binutils.Classifier { PURL: mustPURL("pkg:generic/node@version"), CPEs: singleCPE("cpe:2.3:a:nodejs:node.js:*:*:*:*:*:*:*:*", cpe.NVDDictionaryLookupSource), }, - { - Class: "go-binary-hint", - FileGlob: "**/VERSION*", - EvidenceMatcher: m.FileContentsVersionMatcher( - `(?m)go(?P[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?(-[0-9a-f]{7})?)`), - Package: "go", - PURL: mustPURL("pkg:generic/go@version"), - CPEs: singleCPE("cpe:2.3:a:golang:go:*:*:*:*:*:*:*:*", cpe.NVDDictionaryLookupSource), - }, { Class: "busybox-binary", FileGlob: "**/busybox", diff --git a/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/go-version-hint/1.15/any/VERSION b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/go-version-hint/1.15/any/VERSION index 5bedbed9f..1c595336d 100644 --- a/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/go-version-hint/1.15/any/VERSION +++ b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/go-version-hint/1.15/any/VERSION @@ -1 +1 @@ -go1.15-beta2 \ No newline at end of file +go1.15 Fri 2003 \ No newline at end of file diff --git a/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/go-version-hint/1.15/any/bin/go b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/go-version-hint/1.15/any/bin/go new file mode 100644 index 000000000..ae382e1e6 --- /dev/null +++ b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/go-version-hint/1.15/any/bin/go @@ -0,0 +1 @@ +no version in this binary \ No newline at end of file diff --git a/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/go-version-hint/1.25/any/bin/go b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/go-version-hint/1.25/any/bin/go new file mode 100644 index 000000000..ae382e1e6 --- /dev/null +++ b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/go-version-hint/1.25/any/bin/go @@ -0,0 +1 @@ +no version in this binary \ No newline at end of file diff --git a/syft/pkg/cataloger/internal/binutils/branching_matcher_test.go b/syft/pkg/cataloger/internal/binutils/branching_matcher_test.go index faf6a7633..b20e329e1 100644 --- a/syft/pkg/cataloger/internal/binutils/branching_matcher_test.go +++ b/syft/pkg/cataloger/internal/binutils/branching_matcher_test.go @@ -10,7 +10,7 @@ import ( ) func Test_BranchingMatcher(t *testing.T) { - matchingTest := FileContentsVersionMatcher("", `my-verison:(?\d+\.\d+)`) + matchingTest := FileContentsVersionMatcher("", `my-version:(?\d+\.\d+)`) notMatchingTest := MatchPath("**/not-version*") tests := []struct { diff --git a/syft/pkg/cataloger/internal/binutils/classifier.go b/syft/pkg/cataloger/internal/binutils/classifier.go index e01ea9666..f523d755f 100644 --- a/syft/pkg/cataloger/internal/binutils/classifier.go +++ b/syft/pkg/cataloger/internal/binutils/classifier.go @@ -9,6 +9,7 @@ import ( "fmt" "io" "maps" + "path" "regexp" "strconv" "strings" @@ -273,9 +274,7 @@ func SharedLibraryLookup(sharedLibraryPattern string, sharedLibraryMatcher Evide } for _, p := range pkgs { // set the source binary as the first location - locationSet := file.NewLocationSet(context.Location) - locationSet.Add(p.Locations.ToSlice()...) - p.Locations = locationSet + makePrimaryLocation(&p, context.Location) meta, _ := p.Metadata.(pkg.BinarySignature) p.Metadata = pkg.BinarySignature{ Matches: append([]pkg.ClassifierMatch{ @@ -309,6 +308,28 @@ func MatchPath(path string) EvidenceMatcher { } } +// SupportingEvidenceMatcher defines an evidence matcher that searches for secondary evidence with path globs +// relative to a primary file, for example: a VERSION file in the same or a parent directory to another binary +func SupportingEvidenceMatcher(relativePathGlob string, evidenceMatcher EvidenceMatcher) EvidenceMatcher { + return func(classifier Classifier, context MatcherContext) ([]pkg.Package, error) { + f := path.Dir(context.Location.RealPath) + f = path.Join(f, relativePathGlob) + f = path.Clean(f) + // this would ideally be RelativeFileByPath but with a glob search: + relativeFiles, err := context.Resolver.FilesByGlob(f) + if err != nil { + return nil, err + } + for _, relativeFile := range relativeFiles { + evidence, err := collectSupportingEvidence(classifier, context, relativeFile, evidenceMatcher) + if evidence != nil || err != nil { + return evidence, err + } + } + return nil, nil + } +} + func getReader(context MatcherContext) (unionreader.UnionReader, error) { if context.GetReader != nil { return context.GetReader(context) @@ -368,3 +389,43 @@ func sharedLibraries(context MatcherContext) ([]string, error) { return nil, nil } + +func makePrimaryLocation(p *pkg.Package, primaryLocation file.Location) { + locationSet := file.NewLocationSet(primaryLocation.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)) + for _, l := range p.Locations.ToSlice() { + if locationSet.Contains(l) { // no need for duplicate locations + continue + } + locationSet.Add(l.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)) + } + p.Locations = locationSet +} + +func collectSupportingEvidence(classifier Classifier, context MatcherContext, relativeFile file.Location, evidenceMatcher EvidenceMatcher) ([]pkg.Package, error) { + rdr, err := context.Resolver.FileContentsByLocation(relativeFile) + if err != nil { + return nil, err + } + defer internal.CloseAndLogError(rdr, relativeFile.Path()) + ur, err := unionreader.GetUnionReader(rdr) + if err != nil { + return nil, err + } + newContext := MatcherContext{ + Resolver: context.Resolver, + Location: relativeFile, + GetReader: func(_ MatcherContext) (unionreader.UnionReader, error) { + return ur, nil + }, + } + packages, err := evidenceMatcher(classifier, newContext) + if err != nil { + return nil, err + } + for i := range packages { + p := &(packages[i]) + // relative files are supporting evidence, like a VERSION file near a go binary, mark the results as supporting + makePrimaryLocation(p, context.Location) + } + return packages, nil +} diff --git a/syft/pkg/cataloger/internal/binutils/classifier_test.go b/syft/pkg/cataloger/internal/binutils/classifier_test.go index e0497957d..3d5ddaabe 100644 --- a/syft/pkg/cataloger/internal/binutils/classifier_test.go +++ b/syft/pkg/cataloger/internal/binutils/classifier_test.go @@ -12,6 +12,8 @@ import ( "github.com/anchore/syft/syft/cpe" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/internal/unionreader" + "github.com/anchore/syft/syft/source" + "github.com/anchore/syft/syft/source/directorysource" ) func Test_ClassifierCPEs(t *testing.T) { @@ -27,7 +29,7 @@ func Test_ClassifierCPEs(t *testing.T) { classifier: Classifier{ Package: "some-app", FileGlob: "**/version.txt", - EvidenceMatcher: FileContentsVersionMatcher("cataloger-name", `(?m)my-verison:(?P[0-9.]+)`), + EvidenceMatcher: FileContentsVersionMatcher("cataloger-name", `(?m)my-version:(?P[0-9.]+)`), CPEs: []cpe.CPE{}, }, cpes: nil, @@ -38,7 +40,7 @@ func Test_ClassifierCPEs(t *testing.T) { classifier: Classifier{ Package: "some-app", FileGlob: "**/version.txt", - EvidenceMatcher: FileContentsVersionMatcher("cataloger-name", `(?m)my-verison:(?P[0-9.]+)`), + EvidenceMatcher: FileContentsVersionMatcher("cataloger-name", `(?m)my-version:(?P[0-9.]+)`), CPEs: []cpe.CPE{ cpe.Must("cpe:2.3:a:some:app:*:*:*:*:*:*:*:*", cpe.GeneratedSource), }, @@ -53,7 +55,7 @@ func Test_ClassifierCPEs(t *testing.T) { classifier: Classifier{ Package: "some-app", FileGlob: "**/version.txt", - EvidenceMatcher: FileContentsVersionMatcher("cataloger-name", `(?m)my-verison:(?P[0-9.]+)`), + EvidenceMatcher: FileContentsVersionMatcher("cataloger-name", `(?m)my-version:(?P[0-9.]+)`), CPEs: []cpe.CPE{ cpe.Must("cpe:2.3:a:some:app:*:*:*:*:*:*:*:*", cpe.GeneratedSource), cpe.Must("cpe:2.3:a:some:apps:*:*:*:*:*:*:*:*", cpe.GeneratedSource), @@ -183,3 +185,61 @@ func TestFileContentsVersionMatcher(t *testing.T) { }) } } + +func Test_SupportingEvidenceMatcher(t *testing.T) { + + tests := []struct { + name string + classifier Classifier + expected string + }{ + { + name: "simple version string regexp", + classifier: Classifier{ + FileGlob: "**/some-binary", + EvidenceMatcher: SupportingEvidenceMatcher("../version.txt", + FileContentsVersionMatcher("cataloger-name", `(?m)my-version:(?P[0-9.]+)`)), + Package: "some-binary", + }, + expected: "1.8", + }, + { + name: "not matching version string regexp", + classifier: Classifier{ + FileGlob: "**/some-binary", + EvidenceMatcher: SupportingEvidenceMatcher("../version.txt", + FileContentsVersionMatcher("cataloger-name", `(?m)my-version:(?Pabdd)`)), + Package: "some-binary", + }, + expected: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s, err := directorysource.NewFromPath("test-fixtures") + require.NoError(t, err) + r, err := s.FileResolver(source.AllLayersScope) + require.NoError(t, err) + + results, err := r.FilesByGlob(tt.classifier.FileGlob) + require.NoError(t, err) + for _, result := range results { + got, err := tt.classifier.EvidenceMatcher(tt.classifier, MatcherContext{ + Resolver: r, + Location: result, + GetReader: func(ctx MatcherContext) (unionreader.UnionReader, error) { + return getReader(ctx) + }, + }) + require.NoError(t, err) + if tt.expected != "" { + require.NotEmpty(t, got) + require.Equal(t, tt.expected, got[0].Version) + } else { + require.Empty(t, got) + } + } + }) + } +} diff --git a/syft/pkg/cataloger/internal/binutils/test-fixtures/subdir/some-binary b/syft/pkg/cataloger/internal/binutils/test-fixtures/subdir/some-binary new file mode 100644 index 000000000..34bffea7e --- /dev/null +++ b/syft/pkg/cataloger/internal/binutils/test-fixtures/subdir/some-binary @@ -0,0 +1 @@ +a binary \ No newline at end of file diff --git a/syft/pkg/cataloger/internal/binutils/test-fixtures/version.txt b/syft/pkg/cataloger/internal/binutils/test-fixtures/version.txt index 5baa206f0..6c6b2ca63 100644 --- a/syft/pkg/cataloger/internal/binutils/test-fixtures/version.txt +++ b/syft/pkg/cataloger/internal/binutils/test-fixtures/version.txt @@ -1 +1 @@ -my-verison:1.8 \ No newline at end of file +my-version:1.8 \ No newline at end of file