diff --git a/syft/pkg/cataloger/common/cpe/field_candidate.go b/syft/pkg/cataloger/common/cpe/field_candidate.go index 5c4f64a97..db3efb93f 100644 --- a/syft/pkg/cataloger/common/cpe/field_candidate.go +++ b/syft/pkg/cataloger/common/cpe/field_candidate.go @@ -16,7 +16,7 @@ type fieldCandidate struct { type fieldCandidateSet map[fieldCandidate]struct{} -func newFieldCandidateFromSets(sets ...fieldCandidateSet) fieldCandidateSet { +func newFieldCandidateSetFromSets(sets ...fieldCandidateSet) fieldCandidateSet { s := newFieldCandidateSet() for _, set := range sets { s.add(set.list()...) @@ -48,10 +48,15 @@ func (s fieldCandidateSet) add(candidates ...fieldCandidate) { func (s fieldCandidateSet) removeByValue(values ...string) { for _, value := range values { - for candidate := range s { - if candidate.value == value { - delete(s, candidate) - } + s.removeWhere(valueEquals(value)) + } +} + +// removeWhere removes all entries from the fieldCandidateSet for which the condition function returns true. +func (s fieldCandidateSet) removeWhere(condition fieldCandidateCondition) { + for candidate := range s { + if condition(candidate) { + delete(s, candidate) } } } @@ -68,26 +73,29 @@ func (s fieldCandidateSet) union(others ...fieldCandidateSet) { } } -func (s fieldCandidateSet) list(filters ...filterFieldCandidateFn) (results []fieldCandidate) { -candidateLoop: +func (s fieldCandidateSet) list() (results []fieldCandidate) { for c := range s { - for _, fn := range filters { - if fn(c) { - continue candidateLoop - } - } results = append(results, c) } + return results } -func (s fieldCandidateSet) values(filters ...filterFieldCandidateFn) (results []string) { - for _, c := range s.list(filters...) { +func (s fieldCandidateSet) values() (results []string) { + for _, c := range s.list() { results = append(results, c.value) } + return results } -func (s fieldCandidateSet) uniqueValues(filters ...filterFieldCandidateFn) []string { - return strset.New(s.values(filters...)...).List() +func (s fieldCandidateSet) uniqueValues() []string { + return strset.New(s.values()...).List() +} + +func (s fieldCandidateSet) copy() fieldCandidateSet { + newSet := newFieldCandidateSet() + newSet.add(s.list()...) + + return newSet } diff --git a/syft/pkg/cataloger/common/cpe/field_candidate_filter.go b/syft/pkg/cataloger/common/cpe/field_candidate_filter.go index 58633d483..203d0e7fc 100644 --- a/syft/pkg/cataloger/common/cpe/field_candidate_filter.go +++ b/syft/pkg/cataloger/common/cpe/field_candidate_filter.go @@ -1,12 +1,18 @@ package cpe -// filterFieldCandidateFn instances should return true if the given fieldCandidate should be removed from a collection -type filterFieldCandidateFn func(fieldCandidate) bool +// A fieldCandidateCondition returns true if the condition is true for a given fieldCandidate. +type fieldCandidateCondition func(fieldCandidate) bool -func filterOutBySubselection(c fieldCandidate) bool { +func subSelectionsDisallowed(c fieldCandidate) bool { return c.disallowSubSelections } -func filterOutByDelimiterVariations(c fieldCandidate) bool { +func delimiterVariationsDisallowed(c fieldCandidate) bool { return c.disallowDelimiterVariations } + +func valueEquals(v string) fieldCandidateCondition { + return func(candidate fieldCandidate) bool { + return candidate.value == v + } +} diff --git a/syft/pkg/cataloger/common/cpe/field_candidate_test.go b/syft/pkg/cataloger/common/cpe/field_candidate_test.go index 3ad001a49..b091cfa87 100644 --- a/syft/pkg/cataloger/common/cpe/field_candidate_test.go +++ b/syft/pkg/cataloger/common/cpe/field_candidate_test.go @@ -1,6 +1,7 @@ package cpe import ( + "strings" "testing" "github.com/stretchr/testify/assert" @@ -8,10 +9,10 @@ import ( func Test_cpeCandidateValues_filter(t *testing.T) { tests := []struct { - name string - input []fieldCandidate - filters []filterFieldCandidateFn - expect []string + name string + input []fieldCandidate + exclusionConditions []fieldCandidateCondition + expect []string }{ { name: "gocase", @@ -60,8 +61,8 @@ func Test_cpeCandidateValues_filter(t *testing.T) { disallowDelimiterVariations: true, }, }, - filters: []filterFieldCandidateFn{ - filterOutBySubselection, + exclusionConditions: []fieldCandidateCondition{ + subSelectionsDisallowed, }, expect: []string{ "allow anything", @@ -88,8 +89,8 @@ func Test_cpeCandidateValues_filter(t *testing.T) { disallowDelimiterVariations: true, }, }, - filters: []filterFieldCandidateFn{ - filterOutByDelimiterVariations, + exclusionConditions: []fieldCandidateCondition{ + delimiterVariationsDisallowed, }, expect: []string{ "allow anything", @@ -97,7 +98,7 @@ func Test_cpeCandidateValues_filter(t *testing.T) { }, }, { - name: "all filters", + name: "all exclusionConditions", input: []fieldCandidate{ { value: "allow anything", @@ -116,9 +117,9 @@ func Test_cpeCandidateValues_filter(t *testing.T) { disallowDelimiterVariations: true, }, }, - filters: []filterFieldCandidateFn{ - filterOutByDelimiterVariations, - filterOutBySubselection, + exclusionConditions: []fieldCandidateCondition{ + delimiterVariationsDisallowed, + subSelectionsDisallowed, }, expect: []string{ "allow anything", @@ -130,7 +131,12 @@ func Test_cpeCandidateValues_filter(t *testing.T) { t.Run(test.name, func(t *testing.T) { set := newFieldCandidateSet() set.add(test.input...) - assert.ElementsMatch(t, test.expect, set.values(test.filters...)) + + for _, condition := range test.exclusionConditions { + set.removeWhere(condition) + } + + assert.ElementsMatch(t, test.expect, set.values()) }) } } @@ -264,6 +270,7 @@ func Test_cpeFieldCandidateSet_uniqueValues(t *testing.T) { func Test_cpeFieldCandidateSet_removeByValue(t *testing.T) { s := newFieldCandidateSet() + // should be removed s.add(fieldCandidate{ value: "1", @@ -281,13 +288,47 @@ func Test_cpeFieldCandidateSet_removeByValue(t *testing.T) { s.add(fieldCandidate{ value: "1", }) + // should not be removed s.add(fieldCandidate{ value: "2", }) + assert.Len(t, s.values(), 5) s.removeByValue("1") assert.Len(t, s.values(), 1) } + +func Test_cpeFieldCandidateSet_removeByCondition(t *testing.T) { + s := newFieldCandidateSet() + + // should be removed + s.add(fieldCandidate{ + value: "1", + disallowSubSelections: true, + }) + s.add(fieldCandidate{ + value: "hello-world", + }) + + // should not be removed + s.add(fieldCandidate{ + value: "2", + }) + + assert.Len(t, s.values(), 3) + + s.removeWhere(func(candidate fieldCandidate) bool { + return candidate.disallowSubSelections == true + }) + + assert.Len(t, s.values(), 2) + + s.removeWhere(func(candidate fieldCandidate) bool { + return strings.Contains(candidate.value, "-") + }) + + assert.Len(t, s.values(), 1) +} diff --git a/syft/pkg/cataloger/common/cpe/filter.go b/syft/pkg/cataloger/common/cpe/filter.go index 14972a23d..cd08fa74b 100644 --- a/syft/pkg/cataloger/common/cpe/filter.go +++ b/syft/pkg/cataloger/common/cpe/filter.go @@ -16,6 +16,7 @@ var cpeFilters = []filterFn{ disallowJiraClientServerMismatch, disallowJenkinsServerCPEForPluginPackage, disallowJenkinsCPEsNotAssociatedWithJenkins, + disallowNonParseableCPEs, } func filter(cpes []pkg.CPE, p pkg.Package, filters ...filterFn) (result []pkg.CPE) { @@ -32,6 +33,15 @@ cpeLoop: return result } +func disallowNonParseableCPEs(cpe pkg.CPE, _ pkg.Package) bool { + v := cpe.BindToFmtString() + _, err := pkg.NewCPE(v) + + cannotParse := err != nil + + return cannotParse +} + // jenkins plugins should not match against jenkins func disallowJenkinsServerCPEForPluginPackage(cpe pkg.CPE, p pkg.Package) bool { if p.Type == pkg.JenkinsPluginPkg && cpe.Product == jenkinsName { diff --git a/syft/pkg/cataloger/common/cpe/generate.go b/syft/pkg/cataloger/common/cpe/generate.go index 7b82fd519..4f66e1588 100644 --- a/syft/pkg/cataloger/common/cpe/generate.go +++ b/syft/pkg/cataloger/common/cpe/generate.go @@ -162,9 +162,12 @@ func candidateProducts(p pkg.Package) []string { return append(productCandidatesByPkgType.getCandidates(p.Type, p.Name), products.uniqueValues()...) } -func addAllSubSelections(set fieldCandidateSet) { - for _, candidate := range set.values(filterOutBySubselection) { - set.addValue(generateSubSelections(candidate)...) +func addAllSubSelections(fields fieldCandidateSet) { + candidatesForVariations := fields.copy() + candidatesForVariations.removeWhere(subSelectionsDisallowed) + + for _, candidate := range candidatesForVariations.values() { + fields.addValue(generateSubSelections(candidate)...) } } @@ -226,7 +229,10 @@ func scanByHyphenOrUnderscore(data []byte, atEOF bool) (advance int, token []byt } func addDelimiterVariations(fields fieldCandidateSet) { - for _, candidate := range fields.list(filterOutByDelimiterVariations) { + candidatesForVariations := fields.copy() + candidatesForVariations.removeWhere(delimiterVariationsDisallowed) + + for _, candidate := range candidatesForVariations.list() { field := candidate.value hasHyphen := strings.Contains(field, "-") hasUnderscore := strings.Contains(field, "_") diff --git a/syft/pkg/cataloger/common/cpe/generate_test.go b/syft/pkg/cataloger/common/cpe/generate_test.go index f357e3e99..450e1b133 100644 --- a/syft/pkg/cataloger/common/cpe/generate_test.go +++ b/syft/pkg/cataloger/common/cpe/generate_test.go @@ -179,6 +179,43 @@ func TestGeneratePackageCPEs(t *testing.T) { "cpe:2.3:a:sonatype:nexus:3.2:*:*:*:*:*:*:*", }, }, + { + name: "java with URL in metadata", // regression: https://github.com/anchore/grype/issues/417 + p: pkg.Package{ + Name: "wstx-asl", + Version: "3.2.7", + Type: pkg.JavaPkg, + MetadataType: pkg.JavaMetadataType, + Metadata: pkg.JavaMetadata{ + Manifest: &pkg.JavaManifest{ + Main: map[string]string{ + "Ant-Version": "Apache Ant 1.6.5", + "Built-By": "tatu", + "Created-By": "1.4.2_03-b02 (Sun Microsystems Inc.)", + "Implementation-Title": "WoodSToX XML-processor", + "Implementation-Vendor": "woodstox.codehaus.org", + "Implementation-Version": "3.2.7", + "Manifest-Version": "1.0", + "Specification-Title": "StAX 1.0 API", + "Specification-Vendor": "http://jcp.org/en/jsr/detail?id=173", + "Specification-Version": "1.0", + }, + }, + }, + }, + expected: []string{ + "cpe:2.3:a:woodstox_codehaus_org:wstx-asl:3.2.7:*:*:*:*:*:*:*", + "cpe:2.3:a:woodstox_codehaus_org:wstx_asl:3.2.7:*:*:*:*:*:*:*", + "cpe:2.3:a:woodstox-codehaus-org:wstx_asl:3.2.7:*:*:*:*:*:*:*", + "cpe:2.3:a:woodstox-codehaus-org:wstx-asl:3.2.7:*:*:*:*:*:*:*", + "cpe:2.3:a:wstx_asl:wstx-asl:3.2.7:*:*:*:*:*:*:*", + "cpe:2.3:a:wstx-asl:wstx-asl:3.2.7:*:*:*:*:*:*:*", + "cpe:2.3:a:wstx-asl:wstx_asl:3.2.7:*:*:*:*:*:*:*", + "cpe:2.3:a:wstx_asl:wstx_asl:3.2.7:*:*:*:*:*:*:*", + "cpe:2.3:a:wstx:wstx_asl:3.2.7:*:*:*:*:*:*:*", + "cpe:2.3:a:wstx:wstx-asl:3.2.7:*:*:*:*:*:*:*", + }, + }, { name: "jenkins package identified via pkg type", p: pkg.Package{ @@ -488,7 +525,7 @@ func TestGeneratePackageCPEs(t *testing.T) { actualCpeSet.Add(a.BindToFmtString()) } - extra := strset.Difference(expectedCpeSet, actualCpeSet).List() + extra := strset.Difference(actualCpeSet, expectedCpeSet).List() sort.Strings(extra) if len(extra) > 0 { t.Errorf("found extra CPEs:") @@ -497,7 +534,7 @@ func TestGeneratePackageCPEs(t *testing.T) { fmt.Printf(" %q,\n", d) } - missing := strset.Difference(actualCpeSet, expectedCpeSet).List() + missing := strset.Difference(expectedCpeSet, actualCpeSet).List() sort.Strings(missing) if len(missing) > 0 { t.Errorf("missing CPEs:") diff --git a/syft/pkg/cataloger/common/cpe/java.go b/syft/pkg/cataloger/common/cpe/java.go index 67740b92a..0c061d7cd 100644 --- a/syft/pkg/cataloger/common/cpe/java.go +++ b/syft/pkg/cataloger/common/cpe/java.go @@ -46,7 +46,7 @@ func candidateProductsForJava(p pkg.Package) []string { func candidateVendorsForJava(p pkg.Package) fieldCandidateSet { gidVendors := vendorsFromGroupIDs(groupIDsFromJavaPackage(p)) nameVendors := vendorsFromJavaManifestNames(p) - return newFieldCandidateFromSets(gidVendors, nameVendors) + return newFieldCandidateSetFromSets(gidVendors, nameVendors) } func vendorsFromJavaManifestNames(p pkg.Package) fieldCandidateSet {