diff --git a/syft/pkg/cataloger/internal/cpegenerate/generate.go b/syft/pkg/cataloger/internal/cpegenerate/generate.go index 3a8e7ac9d..1ccbf5e25 100644 --- a/syft/pkg/cataloger/internal/cpegenerate/generate.go +++ b/syft/pkg/cataloger/internal/cpegenerate/generate.go @@ -6,9 +6,11 @@ import ( _ "embed" "encoding/json" "fmt" + "regexp" "sort" "strings" "sync" + "unicode" "github.com/scylladb/go-set/strset" @@ -228,6 +230,11 @@ func candidateVendors(p pkg.Package) []string { vendors.union(candidateVendorsForWordpressPlugin(p)) } + if p.Type == pkg.BinaryPkg && endsWithNumber(p.Name) { + // add binary package digit-suffix variations (e.g. Qt5 -> Qt) + addBinaryPackageDigitVariations(vendors) + } + // We should no longer be generating vendor candidates with these values ["" and "*"] // (since CPEs will match any other value) vendors.removeByValue("") @@ -286,6 +293,9 @@ func candidateProductSet(p pkg.Package) fieldCandidateSet { if prod != "" { products.addValue(prod) } + case p.Type == pkg.BinaryPkg && endsWithNumber(p.Name): + // add binary package digit-suffix variations (e.g. Qt5 -> Qt) + addBinaryPackageDigitVariations(products) } switch p.Metadata.(type) { @@ -404,3 +414,33 @@ func addDelimiterVariations(fields fieldCandidateSet) { } } } + +// removeTrailingDigits removes all trailing digits from a string +func removeTrailingDigits(s string) string { + re := regexp.MustCompile(`\d+$`) + return re.ReplaceAllString(s, "") +} + +// addBinaryPackageDigitVariations adds variations with trailing digits removed for binary packages.For binary package types only, when the name ends with a digit, add a new variation with all suffix-digits removed (e.g. Qt5 -> Qt). This helps generate additional CPE permutations for better vulnerability matching. +func addBinaryPackageDigitVariations(fields fieldCandidateSet) { + candidatesForVariations := fields.copy() + for _, candidate := range candidatesForVariations.values() { + // Check if the candidate ends with a digit + if len(candidate) > 0 && candidate[len(candidate)-1] >= '0' && candidate[len(candidate)-1] <= '9' { + // Create variation with all suffix digits removed + withoutDigits := removeTrailingDigits(candidate) + if withoutDigits != "" && withoutDigits != candidate { + fields.addValue(withoutDigits) + } + } + } +} + +func endsWithNumber(s string) bool { + if len(s) == 0 { + return false + } + r := []rune(s) + last := r[len(r)-1] + return unicode.IsDigit(last) +} diff --git a/syft/pkg/cataloger/internal/cpegenerate/generate_test.go b/syft/pkg/cataloger/internal/cpegenerate/generate_test.go index d78baea1b..3323fbc6c 100644 --- a/syft/pkg/cataloger/internal/cpegenerate/generate_test.go +++ b/syft/pkg/cataloger/internal/cpegenerate/generate_test.go @@ -1145,3 +1145,67 @@ func TestDictionaryFindIsWired(t *testing.T) { }) } } + +// TestAddBinaryPackageDigitVariations tests the heuristic for binary package types +// where names ending with digits get variations with all suffix-digits removed (e.g. Qt5 -> Qt). +// This improves vulnerability matching for binary packages like Qt6, libfoo123, etc. +func TestAddBinaryPackageDigitVariations(t *testing.T) { + tests := []struct { + name string + packageType pkg.Type + inputCandidates []string + expectedPresent []string // These should be present in the result + expectedAbsent []string // These should NOT be present in the result + }{ + { + name: "Qt5 binary package example", + packageType: pkg.BinaryPkg, + inputCandidates: []string{"Qt5"}, + expectedPresent: []string{"Qt5", "Qt"}, + expectedAbsent: []string{}, + }, + { + name: "package with trailing digits", + packageType: pkg.BinaryPkg, + inputCandidates: []string{"Qt5", "libfoo123", "bar42", "baz"}, + expectedPresent: []string{"Qt5", "Qt", "libfoo123", "libfoo", "bar42", "bar", "baz"}, + expectedAbsent: []string{}, + }, + { + name: "multiple trailing digits", + inputCandidates: []string{"Qt872", "package999"}, + expectedPresent: []string{"Qt872", "Qt", "package999", "package"}, + expectedAbsent: []string{}, + }, + { + name: "package without trailing digits", + inputCandidates: []string{"QtCore", "libfoo", "bar"}, + expectedPresent: []string{"QtCore", "libfoo", "bar"}, + expectedAbsent: []string{"QtCor", "libfo", "ba"}, + }, + { + name: "empty candidate set", + packageType: pkg.BinaryPkg, + inputCandidates: []string{}, + expectedPresent: []string{}, + expectedAbsent: []string{}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + fields := newFieldCandidateSet(test.inputCandidates...) + addBinaryPackageDigitVariations(fields) + + values := fields.uniqueValues() + + for _, expected := range test.expectedPresent { + assert.Contains(t, values, expected, "expected %q to be present", expected) + } + + for _, notExpected := range test.expectedAbsent { + assert.NotContains(t, values, notExpected, "expected %q to be absent", notExpected) + } + }) + } +}