From 0395c4744581a3d33bf80656f092b19dd75b32fb Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Mon, 27 Sep 2021 18:31:59 -0400 Subject: [PATCH] Add vendor + product known good CPE field values (#517) * add better infrastructure around CPE candidate additions Signed-off-by: Alex Goodman * add test cases for CPE candidate additions Signed-off-by: Alex Goodman * small ergonomic updates Signed-off-by: Christopher Angelo Phillips * clean java OSGI directives from group ID Signed-off-by: Alex Goodman * unquote CPE candidate field values (double quotes only) Signed-off-by: Alex Goodman * add remaining CPE vendor & product additions (+ tests) Signed-off-by: Alex Goodman Co-authored-by: Christopher Angelo Phillips --- .../common/cpe/candidate_by_package_type.go | 186 +++++++++++++++++- .../cpe/candidate_by_package_type_test.go | 155 +++++++++++++++ .../cataloger/common/cpe/field_candidate.go | 13 +- .../common/cpe/field_candidate_test.go | 16 ++ syft/pkg/cataloger/common/cpe/generate.go | 37 +--- .../pkg/cataloger/common/cpe/generate_test.go | 39 ++++ syft/pkg/cataloger/common/cpe/java.go | 23 ++- syft/pkg/cataloger/common/cpe/java_test.go | 4 +- 8 files changed, 428 insertions(+), 45 deletions(-) create mode 100644 syft/pkg/cataloger/common/cpe/candidate_by_package_type_test.go diff --git a/syft/pkg/cataloger/common/cpe/candidate_by_package_type.go b/syft/pkg/cataloger/common/cpe/candidate_by_package_type.go index ecaac6f32..bb008cbc7 100644 --- a/syft/pkg/cataloger/common/cpe/candidate_by_package_type.go +++ b/syft/pkg/cataloger/common/cpe/candidate_by_package_type.go @@ -1,18 +1,188 @@ package cpe -import "github.com/anchore/syft/syft/pkg" +import ( + "github.com/anchore/syft/syft/pkg" +) -// this is a static mapping of known package names (keys) to official cpe names for each package -type candidatesByPackageType map[pkg.Type]map[string][]string +// candidateComposite is a convenience when creating the defaultCandidateAdditions set +type candidateComposite struct { + pkg.Type + candidateKey + candidateAddition +} -func (s candidatesByPackageType) getCandidates(t pkg.Type, key string) []string { - if _, ok := s[t]; !ok { - return nil +// defaultCandidateAdditions is all of the known cases for product and vendor field values that should be used when +// select package information is discovered +var defaultCandidateAdditions = buildCandidateLookup( + []candidateComposite{ + // Java packages + { + pkg.JavaPkg, + candidateKey{PkgName: "springframework"}, + candidateAddition{AdditionalProducts: []string{"spring_framework", "springsource_spring_framework"}}, + }, + { + pkg.JavaPkg, + candidateKey{PkgName: "spring-core"}, + candidateAddition{AdditionalProducts: []string{"spring_framework", "springsource_spring_framework"}}, + }, + { + // example image: docker.io/nuxeo:latest + pkg.JavaPkg, + candidateKey{PkgName: "elasticsearch"}, //, Vendor: "elasticsearch"}, + candidateAddition{AdditionalVendors: []string{"elastic"}}, + }, + { + // example image: docker.io/kaazing-gateway:latest + pkg.JavaPkg, + candidateKey{PkgName: "log4j"}, //, Vendor: "apache-software-foundation"}, + candidateAddition{AdditionalVendors: []string{"apache"}}, + }, + + { + // example image: cassandra:latest + pkg.JavaPkg, + candidateKey{PkgName: "apache-cassandra"}, //, Vendor: "apache"}, + candidateAddition{AdditionalProducts: []string{"cassandra"}}, + }, + + // NPM packages + { + pkg.NpmPkg, + candidateKey{PkgName: "hapi"}, + candidateAddition{AdditionalProducts: []string{"hapi_server_framework"}}, + }, + { + pkg.NpmPkg, + candidateKey{PkgName: "handlebars.js"}, + candidateAddition{AdditionalProducts: []string{"handlebars"}}, + }, + { + pkg.NpmPkg, + candidateKey{PkgName: "is-my-json-valid"}, + candidateAddition{AdditionalProducts: []string{"is_my_json_valid"}}, + }, + { + pkg.NpmPkg, + candidateKey{PkgName: "mustache"}, + candidateAddition{AdditionalProducts: []string{"mustache.js"}}, + }, + + // Gem packages + { + pkg.GemPkg, + candidateKey{PkgName: "Arabic-Prawn"}, + candidateAddition{AdditionalProducts: []string{"arabic_prawn"}}, + }, + { + pkg.GemPkg, + candidateKey{PkgName: "bio-basespace-sdk"}, + candidateAddition{AdditionalProducts: []string{"basespace_ruby_sdk"}}, + }, + { + pkg.GemPkg, + candidateKey{PkgName: "cremefraiche"}, + candidateAddition{AdditionalProducts: []string{"creme_fraiche"}}, + }, + { + pkg.GemPkg, + candidateKey{PkgName: "html-sanitizer"}, + candidateAddition{AdditionalProducts: []string{"html_sanitizer"}}, + }, + { + pkg.GemPkg, + candidateKey{PkgName: "sentry-raven"}, + candidateAddition{AdditionalProducts: []string{"raven-ruby"}}, + }, + { + pkg.GemPkg, + candidateKey{PkgName: "RedCloth"}, + candidateAddition{AdditionalProducts: []string{"redcloth_library"}}, + }, + { + pkg.GemPkg, + candidateKey{PkgName: "VladTheEnterprising"}, + candidateAddition{AdditionalProducts: []string{"vladtheenterprising"}}, + }, + { + pkg.GemPkg, + candidateKey{PkgName: "yajl-ruby"}, + candidateAddition{AdditionalProducts: []string{"yajl-ruby_gem"}}, + }, + // Python packages + { + pkg.PythonPkg, + candidateKey{PkgName: "python-rrdtool"}, + candidateAddition{AdditionalProducts: []string{"rrdtool"}}, + }, + }) + +// buildCandidateLookup is a convenience function for creating the defaultCandidateAdditions set +func buildCandidateLookup(cc []candidateComposite) (ca map[pkg.Type]map[candidateKey]candidateAddition) { + ca = make(map[pkg.Type]map[candidateKey]candidateAddition) + for _, c := range cc { + if _, ok := ca[c.Type]; !ok { + ca[c.Type] = make(map[candidateKey]candidateAddition) + } + ca[c.Type][c.candidateKey] = c.candidateAddition } - value, ok := s[t][key] + + return ca +} + +// candidateKey represents the set of inputs that should be matched on in order to signal more candidate additions to be used. +type candidateKey struct { + Vendor string + PkgName string +} + +// candidateAddition are the specific additions that should be considered during CPE generation (given a specific candidateKey) +type candidateAddition struct { + AdditionalProducts []string + AdditionalVendors []string +} + +// findAdditionalVendors searches all possible vendor additions that could be added during the CPE generation process (given package info + a vendor candidate) +func findAdditionalVendors(allAdditions map[pkg.Type]map[candidateKey]candidateAddition, ty pkg.Type, pkgName, vendor string) (vendors []string) { + additions, ok := allAdditions[ty] if !ok { return nil } - return value + if addition, ok := additions[candidateKey{ + Vendor: vendor, + PkgName: pkgName, + }]; ok { + vendors = append(vendors, addition.AdditionalVendors...) + } + + if addition, ok := additions[candidateKey{ + PkgName: pkgName, + }]; ok { + vendors = append(vendors, addition.AdditionalVendors...) + } + + if addition, ok := additions[candidateKey{ + Vendor: vendor, + }]; ok { + vendors = append(vendors, addition.AdditionalVendors...) + } + + return vendors +} + +// findAdditionalProducts searches all possible product additions that could be added during the CPE generation process (given package info) +func findAdditionalProducts(allAdditions map[pkg.Type]map[candidateKey]candidateAddition, ty pkg.Type, pkgName string) (products []string) { + additions, ok := allAdditions[ty] + if !ok { + return nil + } + + if addition, ok := additions[candidateKey{ + PkgName: pkgName, + }]; ok { + products = append(products, addition.AdditionalProducts...) + } + + return products } diff --git a/syft/pkg/cataloger/common/cpe/candidate_by_package_type_test.go b/syft/pkg/cataloger/common/cpe/candidate_by_package_type_test.go new file mode 100644 index 000000000..497d298a6 --- /dev/null +++ b/syft/pkg/cataloger/common/cpe/candidate_by_package_type_test.go @@ -0,0 +1,155 @@ +package cpe + +import ( + "testing" + + "github.com/anchore/syft/syft/pkg" + "github.com/stretchr/testify/assert" +) + +func Test_additionalProducts(t *testing.T) { + tests := []struct { + name string + allAdditions map[pkg.Type]map[candidateKey]candidateAddition + ty pkg.Type + pkgName string + expected []string + }{ + { + name: "product name addition", + allAdditions: map[pkg.Type]map[candidateKey]candidateAddition{ + pkg.JavaPkg: { + candidateKey{ + PkgName: "spring-core", + }: { + AdditionalProducts: []string{"spring_framework", "springsource_spring_framework"}, + }, + }, + }, + ty: pkg.JavaPkg, + pkgName: "spring-core", + expected: []string{"spring_framework", "springsource_spring_framework"}, + }, + { + name: "no addition found", + allAdditions: map[pkg.Type]map[candidateKey]candidateAddition{ + pkg.JavaPkg: { + candidateKey{ + PkgName: "spring-core", + }: { + AdditionalProducts: []string{"spring_framework", "springsource_spring_framework"}, + }, + }, + }, + ty: pkg.JavaPkg, + pkgName: "nothing", + expected: nil, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + assert.Equal(t, test.expected, findAdditionalProducts(test.allAdditions, test.ty, test.pkgName)) + }) + } +} + +func Test_additionalVendors(t *testing.T) { + tests := []struct { + name string + allAdditions map[pkg.Type]map[candidateKey]candidateAddition + ty pkg.Type + pkgName string + vendor string + expected []string + }{ + { + name: "vendor addition by input vendor", + allAdditions: map[pkg.Type]map[candidateKey]candidateAddition{ + pkg.JavaPkg: { + candidateKey{ + Vendor: "my-vendor", + }: { + AdditionalVendors: []string{"awesome-vendor-addition"}, + }, + // note: the below keys should not be matched + candidateKey{ + PkgName: "my-package-name", + Vendor: "my-vendor", + }: { + AdditionalVendors: []string{"bad-addition"}, + }, + candidateKey{ + PkgName: "my-package-name", + }: { + AdditionalVendors: []string{"bad-addition"}, + }, + }, + }, + ty: pkg.JavaPkg, + pkgName: "NOT-MY-PACKAGE", + vendor: "my-vendor", + expected: []string{"awesome-vendor-addition"}, + }, + { + name: "vendor addition by input package name", + allAdditions: map[pkg.Type]map[candidateKey]candidateAddition{ + pkg.JavaPkg: { + candidateKey{ + PkgName: "my-package-name", + }: { + AdditionalVendors: []string{"awesome-vendor-addition"}, + }, + // note: the below keys should not be matched + candidateKey{ + PkgName: "my-package-name", + Vendor: "my-vendor", + }: { + AdditionalVendors: []string{"bad-addition"}, + }, + candidateKey{ + Vendor: "my-vendor", + }: { + AdditionalVendors: []string{"bad-addition"}, + }, + }, + }, + ty: pkg.JavaPkg, + pkgName: "my-package-name", + vendor: "NOT-MY-VENDOR", + expected: []string{"awesome-vendor-addition"}, + }, + { + name: "vendor addition by input package name + vendor", + allAdditions: map[pkg.Type]map[candidateKey]candidateAddition{ + pkg.JavaPkg: { + candidateKey{ + PkgName: "my-package-name", + Vendor: "my-vendor", + }: { + AdditionalVendors: []string{"awesome-vendor-addition"}, + }, + // note: the below keys should not be matched + candidateKey{ + PkgName: "my-package-name", + }: { + AdditionalVendors: []string{"one-good-addition"}, + }, + candidateKey{ + Vendor: "my-vendor", + }: { + AdditionalVendors: []string{"another-good-addition"}, + }, + }, + }, + ty: pkg.JavaPkg, + pkgName: "my-package-name", + vendor: "my-vendor", + expected: []string{"awesome-vendor-addition", "one-good-addition", "another-good-addition"}, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + assert.Equal(t, test.expected, findAdditionalVendors(test.allAdditions, test.ty, test.pkgName, test.vendor)) + }) + } +} diff --git a/syft/pkg/cataloger/common/cpe/field_candidate.go b/syft/pkg/cataloger/common/cpe/field_candidate.go index db3efb93f..c7abbb1e5 100644 --- a/syft/pkg/cataloger/common/cpe/field_candidate.go +++ b/syft/pkg/cataloger/common/cpe/field_candidate.go @@ -1,6 +1,8 @@ package cpe import ( + "strconv" + "github.com/scylladb/go-set/strset" ) @@ -34,7 +36,7 @@ func (s fieldCandidateSet) addValue(values ...string) { for _, value := range values { // default candidate as an allow-all candidate := fieldCandidate{ - value: value, + value: cleanCandidateField(value), } s[candidate] = struct{}{} } @@ -42,6 +44,7 @@ func (s fieldCandidateSet) addValue(values ...string) { func (s fieldCandidateSet) add(candidates ...fieldCandidate) { for _, candidate := range candidates { + candidate.value = cleanCandidateField(candidate.value) s[candidate] = struct{}{} } } @@ -99,3 +102,11 @@ func (s fieldCandidateSet) copy() fieldCandidateSet { return newSet } + +func cleanCandidateField(field string) string { + cleanedValue, err := strconv.Unquote(field) + if err != nil { + return field + } + return cleanedValue +} diff --git a/syft/pkg/cataloger/common/cpe/field_candidate_test.go b/syft/pkg/cataloger/common/cpe/field_candidate_test.go index b091cfa87..f4f70d22d 100644 --- a/syft/pkg/cataloger/common/cpe/field_candidate_test.go +++ b/syft/pkg/cataloger/common/cpe/field_candidate_test.go @@ -141,6 +141,22 @@ func Test_cpeCandidateValues_filter(t *testing.T) { } } +func Test_cpeFieldCandidateSet_addValue(t *testing.T) { + s := newFieldCandidateSet() + // we should clean all values (unquote strings) + s.addValue(`"string!"`) + assert.ElementsMatch(t, []string{"string!"}, s.values()) +} + +func Test_cpeFieldCandidateSet_add(t *testing.T) { + s := newFieldCandidateSet() + // we should clean all values (unquote strings) + s.add(fieldCandidate{ + value: `"string!"`, + }) + assert.ElementsMatch(t, []string{"string!"}, s.values()) +} + func Test_cpeFieldCandidateSet_clear(t *testing.T) { s := newFieldCandidateSet("1", "2") assert.NotEmpty(t, s.values()) diff --git a/syft/pkg/cataloger/common/cpe/generate.go b/syft/pkg/cataloger/common/cpe/generate.go index 4f66e1588..46febb2df 100644 --- a/syft/pkg/cataloger/common/cpe/generate.go +++ b/syft/pkg/cataloger/common/cpe/generate.go @@ -12,32 +12,6 @@ import ( "github.com/facebookincubator/nvdtools/wfn" ) -var productCandidatesByPkgType = candidatesByPackageType{ - pkg.JavaPkg: { - "springframework": []string{"spring_framework", "springsource_spring_framework"}, - "spring-core": []string{"spring_framework", "springsource_spring_framework"}, - }, - pkg.NpmPkg: { - "hapi": []string{"hapi_server_framework"}, - "handlebars.js": []string{"handlebars"}, - "is-my-json-valid": []string{"is_my_json_valid"}, - "mustache": []string{"mustache.js"}, - }, - pkg.GemPkg: { - "Arabic-Prawn": []string{"arabic_prawn"}, - "bio-basespace-sdk": []string{"basespace_ruby_sdk"}, - "cremefraiche": []string{"creme_fraiche"}, - "html-sanitizer": []string{"html_sanitizer"}, - "sentry-raven": []string{"raven-ruby"}, - "RedCloth": []string{"redcloth_library"}, - "VladTheEnterprising": []string{"vladtheenterprising"}, - "yajl-ruby": []string{"yajl-ruby_gem"}, - }, - pkg.PythonPkg: { - "python-rrdtool": []string{"rrdtool"}, - }, -} - func newCPE(product, vendor, version, targetSW string) wfn.Attributes { cpe := *(wfn.NewAttributesWithAny()) cpe.Part = "a" @@ -129,6 +103,11 @@ func candidateVendors(p pkg.Package) []string { // generate sub-selections of each candidate based on separators (e.g. jenkins-ci -> [jenkins, jenkins-ci]) addAllSubSelections(vendors) + // add more candidates based on the package info for each vendor candidate + for _, vendor := range vendors.uniqueValues() { + vendors.addValue(findAdditionalVendors(defaultCandidateAdditions, p.Type, p.Name, vendor)...) + } + return vendors.uniqueValues() } @@ -158,8 +137,10 @@ func candidateProducts(p pkg.Package) []string { // try swapping hyphens for underscores, vice versa, and removing separators altogether addDelimiterVariations(products) - // prepend any known product names for the given package type and name (note: this is not a replacement) - return append(productCandidatesByPkgType.getCandidates(p.Type, p.Name), products.uniqueValues()...) + // add known candidate additions + products.addValue(findAdditionalProducts(defaultCandidateAdditions, p.Type, p.Name)...) + + return products.uniqueValues() } func addAllSubSelections(fields fieldCandidateSet) { diff --git a/syft/pkg/cataloger/common/cpe/generate_test.go b/syft/pkg/cataloger/common/cpe/generate_test.go index 450e1b133..884c2c244 100644 --- a/syft/pkg/cataloger/common/cpe/generate_test.go +++ b/syft/pkg/cataloger/common/cpe/generate_test.go @@ -552,6 +552,14 @@ func TestCandidateProducts(t *testing.T) { p pkg.Package expected []string }{ + { + name: "apache-cassandra", + p: pkg.Package{ + Name: "apache-cassandra", + Type: pkg.JavaPkg, + }, + expected: []string{"cassandra" /* <-- known good names | default guess --> */, "apache-cassandra", "apache_cassandra"}, + }, { name: "springframework", p: pkg.Package{ @@ -635,6 +643,37 @@ func TestCandidateProducts(t *testing.T) { } } +func TestCandidateVendor(t *testing.T) { + tests := []struct { + name string + p pkg.Package + expected []string + }{ + { + name: "elasticsearch", + p: pkg.Package{ + Name: "elasticsearch", + Type: pkg.JavaPkg, + }, + expected: []string{"elastic" /* <-- known good names | default guess --> */, "elasticsearch"}, + }, + { + name: "log4j", + p: pkg.Package{ + Name: "log4j", + Type: pkg.JavaPkg, + }, + expected: []string{"apache" /* <-- known good names | default guess --> */, "log4j"}, + }, + } + + for _, test := range tests { + t.Run(fmt.Sprintf("%+v %+v", test.p, test.expected), func(t *testing.T) { + assert.ElementsMatch(t, test.expected, candidateVendors(test.p)) + }) + } +} + func Test_generateSubSelections(t *testing.T) { tests := []struct { field string diff --git a/syft/pkg/cataloger/common/cpe/java.go b/syft/pkg/cataloger/common/cpe/java.go index 0c061d7cd..1ca5d8f19 100644 --- a/syft/pkg/cataloger/common/cpe/java.go +++ b/syft/pkg/cataloger/common/cpe/java.go @@ -192,13 +192,13 @@ func groupIDsFromPomProperties(properties *pkg.PomProperties) (groupIDs []string } if startsWithTopLevelDomain(properties.GroupID) { - groupIDs = append(groupIDs, strings.TrimSpace(properties.GroupID)) + groupIDs = append(groupIDs, cleanGroupID(properties.GroupID)) } // sometimes the publisher puts the group ID in the artifact ID field unintentionally if startsWithTopLevelDomain(properties.ArtifactID) && len(strings.Split(properties.ArtifactID, ".")) > 1 { // there is a strong indication that the artifact ID is really a group ID - groupIDs = append(groupIDs, strings.TrimSpace(properties.ArtifactID)) + groupIDs = append(groupIDs, cleanGroupID(properties.ArtifactID)) } return groupIDs @@ -224,13 +224,13 @@ func groupIDsFromPomProject(project *pkg.PomProject) (groupIDs []string) { func addGroupIDsFromGroupIDsAndArtifactID(groupID, artifactID string) (groupIDs []string) { if startsWithTopLevelDomain(groupID) { - groupIDs = append(groupIDs, strings.TrimSpace(groupID)) + groupIDs = append(groupIDs, cleanGroupID(groupID)) } // sometimes the publisher puts the group ID in the artifact ID field unintentionally if startsWithTopLevelDomain(artifactID) && len(strings.Split(artifactID, ".")) > 1 { // there is a strong indication that the artifact ID is really a group ID - groupIDs = append(groupIDs, strings.TrimSpace(artifactID)) + groupIDs = append(groupIDs, cleanGroupID(artifactID)) } return groupIDs } @@ -263,13 +263,13 @@ func getManifestFieldGroupIDs(manifest *pkg.JavaManifest, fields []string) (grou for _, name := range fields { if value, exists := manifest.Main[name]; exists { if startsWithTopLevelDomain(value) { - groupIDs = append(groupIDs, value) + groupIDs = append(groupIDs, cleanGroupID(value)) } } for _, section := range manifest.NamedSections { if value, exists := section[name]; exists { if startsWithTopLevelDomain(value) { - groupIDs = append(groupIDs, value) + groupIDs = append(groupIDs, cleanGroupID(value)) } } } @@ -278,6 +278,17 @@ func getManifestFieldGroupIDs(manifest *pkg.JavaManifest, fields []string) (grou return groupIDs } +func cleanGroupID(groupID string) string { + return strings.TrimSpace(removeOSCIDirectives(groupID)) +} + +func removeOSCIDirectives(groupID string) string { + // for example: + // org.bar;uses:=“org.foo” -> org.bar + // more about OSGI directives see https://spring.io/blog/2008/10/20/understanding-the-osgi-uses-directive/ + return strings.Split(groupID, ";")[0] +} + func startsWithTopLevelDomain(value string) bool { return internal.HasAnyOfPrefixes(value, domains...) } diff --git a/syft/pkg/cataloger/common/cpe/java_test.go b/syft/pkg/cataloger/common/cpe/java_test.go index b7ccd0194..a830f3c1d 100644 --- a/syft/pkg/cataloger/common/cpe/java_test.go +++ b/syft/pkg/cataloger/common/cpe/java_test.go @@ -153,7 +153,7 @@ func Test_groupIDsFromJavaPackage(t *testing.T) { pkg: pkg.Package{ Metadata: pkg.JavaMetadata{ PomProperties: &pkg.PomProperties{ - GroupID: "io.jenkins-ci.plugin.thing", + GroupID: "io.jenkins-ci.plugin.thing;version='[2,3)'", }, }, }, @@ -164,7 +164,7 @@ func Test_groupIDsFromJavaPackage(t *testing.T) { pkg: pkg.Package{ Metadata: pkg.JavaMetadata{ PomProperties: &pkg.PomProperties{ - ArtifactID: "io.jenkins-ci.plugin.thing", + ArtifactID: "io.jenkins-ci.plugin.thing; version='[2,3)' ; org.something.else", }, }, },