From d9de63c83784073408c388b59a34c5ad8285f9a9 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Thu, 22 Apr 2021 08:22:56 -0400 Subject: [PATCH] Enhance CPE generation for java GroupId and filtering (#402) * enhance cpe generation for group id and filtering Signed-off-by: Alex Goodman * rename group id const + add doc comment for HasAnyOfPrefixes Signed-off-by: Alex Goodman --- internal/string_helpers.go | 14 ++ internal/string_helpers_test.go | 65 +++++++++ syft/pkg/cataloger/cpe.go | 76 ++++++---- syft/pkg/cataloger/cpe_test.go | 131 +++++++++++++++++- syft/pkg/cataloger/java/archive_parser.go | 4 +- syft/pkg/java_metadata.go | 20 ++- syft/pkg/java_metadata_test.go | 60 +++++++- .../catalog_packages_cases_test.go | 3 +- test/integration/catalog_packages_test.go | 4 +- 9 files changed, 333 insertions(+), 44 deletions(-) create mode 100644 internal/string_helpers.go create mode 100644 internal/string_helpers_test.go diff --git a/internal/string_helpers.go b/internal/string_helpers.go new file mode 100644 index 000000000..a0539e4ed --- /dev/null +++ b/internal/string_helpers.go @@ -0,0 +1,14 @@ +package internal + +import "strings" + +// HasAnyOfPrefixes returns an indication if the given string has any of the given prefixes. +func HasAnyOfPrefixes(input string, prefixes ...string) bool { + for _, prefix := range prefixes { + if strings.HasPrefix(input, prefix) { + return true + } + } + + return false +} diff --git a/internal/string_helpers_test.go b/internal/string_helpers_test.go new file mode 100644 index 000000000..d15a2f654 --- /dev/null +++ b/internal/string_helpers_test.go @@ -0,0 +1,65 @@ +package internal + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestHasAnyOfPrefixes(t *testing.T) { + tests := []struct { + name string + input string + prefixes []string + expected bool + }{ + { + name: "go case", + input: "this has something", + prefixes: []string{ + "this has", + "that does not have", + }, + expected: true, + }, + { + name: "no match", + input: "this has something", + prefixes: []string{ + "this DOES NOT has", + "that does not have", + }, + expected: false, + }, + { + name: "empty", + input: "this has something", + prefixes: []string{}, + expected: false, + }, + { + name: "positive match last", + input: "this has something", + prefixes: []string{ + "that does not have", + "this has", + }, + expected: true, + }, + { + name: "empty input", + input: "", + prefixes: []string{ + "that does not have", + "this has", + }, + expected: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + assert.Equal(t, test.expected, HasAnyOfPrefixes(test.input, test.prefixes...)) + }) + } +} diff --git a/syft/pkg/cataloger/cpe.go b/syft/pkg/cataloger/cpe.go index f51e08582..0b7fece04 100644 --- a/syft/pkg/cataloger/cpe.go +++ b/syft/pkg/cataloger/cpe.go @@ -10,12 +10,6 @@ import ( "github.com/facebookincubator/nvdtools/wfn" ) -// this is functionally equivalent to "*" and consistent with no input given (thus easier to test) -const any = "" - -// this is a static mapping of known package names (keys) to official cpe names for each package -type candidateStore map[pkg.Type]map[string][]string - var productCandidatesByPkgType = candidateStore{ pkg.JavaPkg: { "springframework": []string{"spring_framework", "springsource_spring_framework"}, @@ -42,6 +36,25 @@ var productCandidatesByPkgType = candidateStore{ }, } +var cpeFilters = []filterFn{ + // nolint: goconst + func(cpe pkg.CPE, p pkg.Package) bool { + // jira / atlassian should not apply to clients + if cpe.Vendor == "atlassian" && cpe.Product == "jira" && strings.Contains(p.Name, "client") { + return true + } + if cpe.Vendor == "jira" && cpe.Product == "jira" && strings.Contains(p.Name, "client") { + return true + } + return false + }, +} + +type filterFn func(cpe pkg.CPE, p pkg.Package) bool + +// this is a static mapping of known package names (keys) to official cpe names for each package +type candidateStore map[pkg.Type]map[string][]string + func (s candidateStore) getCandidates(t pkg.Type, key string) []string { if _, ok := s[t]; !ok { return nil @@ -65,6 +78,20 @@ func newCPE(product, vendor, version, targetSW string) wfn.Attributes { return cpe } +func filterCpes(cpes []pkg.CPE, p pkg.Package, filters ...filterFn) (result []pkg.CPE) { +cpeLoop: + for _, cpe := range cpes { + for _, fn := range filters { + if fn(cpe, p) { + continue cpeLoop + } + } + // all filter functions passed on filtering this CPE + result = append(result, cpe) + } + return result +} + // generatePackageCPEs Create a list of CPEs, trying to guess the vendor, product tuple and setting TargetSoftware if possible func generatePackageCPEs(p pkg.Package) []pkg.CPE { targetSws := candidateTargetSoftwareAttrs(p) @@ -74,8 +101,8 @@ func generatePackageCPEs(p pkg.Package) []pkg.CPE { keys := internal.NewStringSet() cpes := make([]pkg.CPE, 0) for _, product := range products { - for _, vendor := range append([]string{any}, vendors...) { - for _, targetSw := range append([]string{any}, targetSws...) { + for _, vendor := range append([]string{wfn.Any}, vendors...) { + for _, targetSw := range append([]string{wfn.Any}, targetSws...) { // prevent duplicate entries... key := fmt.Sprintf("%s|%s|%s|%s", product, vendor, p.Version, targetSw) if keys.Contains(key) { @@ -90,6 +117,9 @@ func generatePackageCPEs(p pkg.Package) []pkg.CPE { } } + // filter out any known combinations that don't accurately represent this package + cpes = filterCpes(cpes, p, cpeFilters...) + sort.Sort(ByCPESpecificity(cpes)) return cpes @@ -157,6 +187,11 @@ func candidateProducts(p pkg.Package) []string { func candidateProductsForJava(p pkg.Package) []string { if product, _ := productAndVendorFromPomPropertiesGroupID(p); product != "" { + // ignore group ID info from a jenkins plugin, as using this info may imply that this package + // CPE belongs to the cloudbees org (or similar) which is wrong. + if p.Type == pkg.JenkinsPluginPkg && strings.ToLower(product) == "jenkins" { + return nil + } return []string{product} } @@ -177,7 +212,7 @@ func productAndVendorFromPomPropertiesGroupID(p pkg.Package) (string, string) { return "", "" } - if !hasAnyOfPrefixes(groupID, "com", "org") { + if !internal.HasAnyOfPrefixes(groupID, "com", "org") { return "", "" } @@ -209,26 +244,7 @@ func shouldConsiderGroupID(groupID string) bool { return false } - excludedGroupIDs := []string{ - pkg.PomPropertiesGroupIDJiraPlugins, - pkg.PomPropertiesGroupIDJenkinsPlugins, - } + excludedGroupIDs := append([]string{pkg.JiraPluginPomPropertiesGroupID}, pkg.JenkinsPluginPomPropertiesGroupIDs...) - for _, excludedGroupID := range excludedGroupIDs { - if groupID == excludedGroupID { - return false - } - } - - return true -} - -func hasAnyOfPrefixes(input string, prefixes ...string) bool { - for _, prefix := range prefixes { - if strings.HasPrefix(input, prefix) { - return true - } - } - - return false + return !internal.HasAnyOfPrefixes(groupID, excludedGroupIDs...) } diff --git a/syft/pkg/cataloger/cpe_test.go b/syft/pkg/cataloger/cpe_test.go index 91a6fec67..795b1a6fb 100644 --- a/syft/pkg/cataloger/cpe_test.go +++ b/syft/pkg/cataloger/cpe_test.go @@ -220,7 +220,7 @@ func TestGeneratePackageCPEs(t *testing.T) { }, }, { - name: "jenkins package identified via groupId", + name: "cloudbees jenkins package identified via groupId", p: pkg.Package{ Name: "name", Version: "3.2", @@ -242,6 +242,135 @@ func TestGeneratePackageCPEs(t *testing.T) { "cpe:2.3:a:name:name:3.2:*:*:*:*:cloudbees_jenkins:*:*", }, }, + { + name: "jenkins.io package identified via groupId prefix", + p: pkg.Package{ + Name: "name", + Version: "3.2", + FoundBy: "some-analyzer", + Language: pkg.Java, + Type: pkg.JenkinsPluginPkg, + Metadata: pkg.JavaMetadata{ + PomProperties: &pkg.PomProperties{ + GroupID: "io.jenkins.plugins.name.something", + }, + }, + }, + expected: []string{ + "cpe:2.3:a:*:name:3.2:*:*:*:*:*:*:*", + "cpe:2.3:a:*:name:3.2:*:*:*:*:jenkins:*:*", + "cpe:2.3:a:*:name:3.2:*:*:*:*:cloudbees_jenkins:*:*", + "cpe:2.3:a:name:name:3.2:*:*:*:*:*:*:*", + "cpe:2.3:a:name:name:3.2:*:*:*:*:jenkins:*:*", + "cpe:2.3:a:name:name:3.2:*:*:*:*:cloudbees_jenkins:*:*", + }, + }, + { + name: "jenkins.io package identified via groupId", + p: pkg.Package{ + Name: "name", + Version: "3.2", + FoundBy: "some-analyzer", + Language: pkg.Java, + Type: pkg.JenkinsPluginPkg, + Metadata: pkg.JavaMetadata{ + PomProperties: &pkg.PomProperties{ + GroupID: "io.jenkins.plugins", + }, + }, + }, + expected: []string{ + "cpe:2.3:a:*:name:3.2:*:*:*:*:*:*:*", + "cpe:2.3:a:*:name:3.2:*:*:*:*:jenkins:*:*", + "cpe:2.3:a:*:name:3.2:*:*:*:*:cloudbees_jenkins:*:*", + "cpe:2.3:a:name:name:3.2:*:*:*:*:*:*:*", + "cpe:2.3:a:name:name:3.2:*:*:*:*:jenkins:*:*", + "cpe:2.3:a:name:name:3.2:*:*:*:*:cloudbees_jenkins:*:*", + }, + }, + { + name: "jenkins-ci.io package identified via groupId", + p: pkg.Package{ + Name: "name", + Version: "3.2", + FoundBy: "some-analyzer", + Language: pkg.Java, + Type: pkg.JenkinsPluginPkg, + Metadata: pkg.JavaMetadata{ + PomProperties: &pkg.PomProperties{ + GroupID: "io.jenkins-ci.plugins", + }, + }, + }, + expected: []string{ + "cpe:2.3:a:*:name:3.2:*:*:*:*:*:*:*", + "cpe:2.3:a:*:name:3.2:*:*:*:*:jenkins:*:*", + "cpe:2.3:a:*:name:3.2:*:*:*:*:cloudbees_jenkins:*:*", + "cpe:2.3:a:name:name:3.2:*:*:*:*:*:*:*", + "cpe:2.3:a:name:name:3.2:*:*:*:*:jenkins:*:*", + "cpe:2.3:a:name:name:3.2:*:*:*:*:cloudbees_jenkins:*:*", + }, + }, + { + name: "jenkins-ci.org package identified via groupId", + p: pkg.Package{ + Name: "name", + Version: "3.2", + FoundBy: "some-analyzer", + Language: pkg.Java, + Type: pkg.JenkinsPluginPkg, + Metadata: pkg.JavaMetadata{ + PomProperties: &pkg.PomProperties{ + GroupID: "org.jenkins-ci.plugins", + }, + }, + }, + expected: []string{ + "cpe:2.3:a:*:name:3.2:*:*:*:*:*:*:*", + "cpe:2.3:a:*:name:3.2:*:*:*:*:jenkins:*:*", + "cpe:2.3:a:*:name:3.2:*:*:*:*:cloudbees_jenkins:*:*", + "cpe:2.3:a:name:name:3.2:*:*:*:*:*:*:*", + "cpe:2.3:a:name:name:3.2:*:*:*:*:jenkins:*:*", + "cpe:2.3:a:name:name:3.2:*:*:*:*:cloudbees_jenkins:*:*", + }, + }, + { + name: "jira-atlassian filtering", + p: pkg.Package{ + Name: "jira_client_core", + Version: "3.2", + FoundBy: "some-analyzer", + Language: pkg.Java, + Type: pkg.JavaPkg, + MetadataType: pkg.JavaMetadataType, + Metadata: pkg.JavaMetadata{ + PomProperties: &pkg.PomProperties{ + GroupID: "org.atlassian.jira", + ArtifactID: "jira_client_core", + }, + }, + }, + expected: []string{ + "cpe:2.3:a:*:jira:3.2:*:*:*:*:*:*:*", + "cpe:2.3:a:*:jira:3.2:*:*:*:*:java:*:*", + "cpe:2.3:a:*:jira:3.2:*:*:*:*:maven:*:*", + "cpe:2.3:a:*:jira_client_core:3.2:*:*:*:*:*:*:*", + "cpe:2.3:a:*:jira_client_core:3.2:*:*:*:*:java:*:*", + "cpe:2.3:a:*:jira_client_core:3.2:*:*:*:*:maven:*:*", + "cpe:2.3:a:atlassian:jira_client_core:3.2:*:*:*:*:*:*:*", + "cpe:2.3:a:atlassian:jira_client_core:3.2:*:*:*:*:java:*:*", + "cpe:2.3:a:atlassian:jira_client_core:3.2:*:*:*:*:maven:*:*", + "cpe:2.3:a:jira:jira_client_core:3.2:*:*:*:*:*:*:*", + "cpe:2.3:a:jira:jira_client_core:3.2:*:*:*:*:java:*:*", + "cpe:2.3:a:jira:jira_client_core:3.2:*:*:*:*:maven:*:*", + "cpe:2.3:a:jira_client_core:jira:3.2:*:*:*:*:*:*:*", + "cpe:2.3:a:jira_client_core:jira:3.2:*:*:*:*:java:*:*", + "cpe:2.3:a:jira_client_core:jira:3.2:*:*:*:*:maven:*:*", + "cpe:2.3:a:jira_client_core:jira_client_core:3.2:*:*:*:*:*:*:*", + "cpe:2.3:a:jira_client_core:jira_client_core:3.2:*:*:*:*:java:*:*", + "cpe:2.3:a:jira_client_core:jira_client_core:3.2:*:*:*:*:maven:*:*", + }, + }, } for _, test := range tests { diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index 591f0d85f..c07699567 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -106,8 +106,6 @@ func (j *archiveParser) parse() ([]pkg.Package, error) { // lastly, add the parent package to the list (assuming the parent exists) if parentPkg != nil { - // only the parent package gets the type, nested packages may be of a different package type (or not of a package type at all, since they may not be bundled) - parentPkg.Type = j.fileInfo.pkgType() pkgs = append([]pkg.Package{*parentPkg}, pkgs...) } @@ -143,7 +141,7 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) { Name: selectName(manifest, j.fileInfo), Version: selectVersion(manifest, j.fileInfo), Language: pkg.Java, - Type: pkg.JavaPkg, + Type: j.fileInfo.pkgType(), MetadataType: pkg.JavaMetadataType, Metadata: pkg.JavaMetadata{ VirtualPath: j.virtualPath, diff --git a/syft/pkg/java_metadata.go b/syft/pkg/java_metadata.go index 8969b7c36..bac1bc7a8 100644 --- a/syft/pkg/java_metadata.go +++ b/syft/pkg/java_metadata.go @@ -1,6 +1,19 @@ package pkg -import "github.com/package-url/packageurl-go" +import ( + "github.com/anchore/syft/internal" + "github.com/package-url/packageurl-go" +) + +const JiraPluginPomPropertiesGroupID = "com.atlassian.jira.plugins" + +var JenkinsPluginPomPropertiesGroupIDs = []string{ + "io.jenkins.plugins", + "org.jenkins.plugins", + "org.jenkins-ci.plugins", + "io.jenkins-ci.plugins", + "com.cloudbees.jenkins.plugins", +} // JavaMetadata encapsulates all Java ecosystem metadata for a package as well as an (optional) parent relationship. type JavaMetadata struct { @@ -22,7 +35,7 @@ type PomProperties struct { // PkgTypeIndicated returns the package Type indicated by the data contained in the PomProperties. func (p PomProperties) PkgTypeIndicated() Type { - if p.GroupID == PomPropertiesGroupIDJenkinsPlugins { + if internal.HasAnyOfPrefixes(p.GroupID, JenkinsPluginPomPropertiesGroupIDs...) { return JenkinsPluginPkg } @@ -52,6 +65,3 @@ func (m JavaMetadata) PackageURL() string { return "" } - -const PomPropertiesGroupIDJenkinsPlugins = "com.cloudbees.jenkins.plugins" -const PomPropertiesGroupIDJiraPlugins = "com.atlassian.jira.plugins" diff --git a/syft/pkg/java_metadata_test.go b/syft/pkg/java_metadata_test.go index 866686f79..17781d254 100644 --- a/syft/pkg/java_metadata_test.go +++ b/syft/pkg/java_metadata_test.go @@ -1,9 +1,10 @@ package pkg import ( + "testing" + "github.com/sergi/go-diff/diffmatchpatch" "github.com/stretchr/testify/assert" - "testing" ) func TestPomProperties_PkgTypeIndicated(t *testing.T) { @@ -24,7 +25,7 @@ func TestPomProperties_PkgTypeIndicated(t *testing.T) { expectedType: JavaPkg, }, { - name: "jenkins plugin", + name: "cloudbees jenkins plugin", pomProperties: PomProperties{ Path: "some path", Name: "some name", @@ -34,6 +35,61 @@ func TestPomProperties_PkgTypeIndicated(t *testing.T) { }, expectedType: JenkinsPluginPkg, }, + { + name: "jenkins.io plugin", + pomProperties: PomProperties{ + Path: "some path", + Name: "some name", + GroupID: "io.jenkins.plugins", + ArtifactID: "some artifact ID", + Version: "1", + }, + expectedType: JenkinsPluginPkg, + }, + { + name: "jenkins-ci.io plugin", + pomProperties: PomProperties{ + Path: "some path", + Name: "some name", + GroupID: "io.jenkins-ci.plugins", + ArtifactID: "some artifact ID", + Version: "1", + }, + expectedType: JenkinsPluginPkg, + }, + { + name: "jenkins-ci.org plugin", + pomProperties: PomProperties{ + Path: "some path", + Name: "some name", + GroupID: "org.jenkins-ci.plugins", + ArtifactID: "some artifact ID", + Version: "1", + }, + expectedType: JenkinsPluginPkg, + }, + { + name: "jenkins.org plugin", + pomProperties: PomProperties{ + Path: "some path", + Name: "some name", + GroupID: "org.jenkins.plugins", + ArtifactID: "some artifact ID", + Version: "1", + }, + expectedType: JenkinsPluginPkg, + }, + { + name: "jenkins plugin prefix", + pomProperties: PomProperties{ + Path: "some path", + Name: "some name", + GroupID: "com.cloudbees.jenkins.plugins.bluesteel", + ArtifactID: "some artifact ID", + Version: "1", + }, + expectedType: JenkinsPluginPkg, + }, } for _, tc := range cases { diff --git a/test/integration/catalog_packages_cases_test.go b/test/integration/catalog_packages_cases_test.go index b5c05e892..7eb6358fc 100644 --- a/test/integration/catalog_packages_cases_test.go +++ b/test/integration/catalog_packages_cases_test.go @@ -6,6 +6,7 @@ type testCase struct { name string pkgType pkg.Type pkgLanguage pkg.Language + duplicates int pkgInfo map[string]string } @@ -155,7 +156,6 @@ var commonTestCases = []testCase{ pkgLanguage: pkg.Java, pkgInfo: map[string]string{ "example-java-app-maven": "0.1.0", - "example-jenkins-plugin": "1.0-SNAPSHOT", // the jenkins HPI file has a nested JAR of the same name "joda-time": "2.9.2", }, }, @@ -163,6 +163,7 @@ var commonTestCases = []testCase{ name: "find jenkins plugins", pkgType: pkg.JenkinsPluginPkg, pkgLanguage: pkg.Java, + duplicates: 1, // there is a "example-jenkins-plugin" HPI, and nested within that a JAR of the same name pkgInfo: map[string]string{ "example-jenkins-plugin": "1.0-SNAPSHOT", }, diff --git a/test/integration/catalog_packages_test.go b/test/integration/catalog_packages_test.go index 0a7299e18..53d6022ea 100644 --- a/test/integration/catalog_packages_test.go +++ b/test/integration/catalog_packages_test.go @@ -96,7 +96,7 @@ func TestPkgCoverageImage(t *testing.T) { pkgCount++ } - if pkgCount != len(c.pkgInfo) { + if pkgCount != len(c.pkgInfo)+c.duplicates { t.Logf("Discovered packages of type %+v", c.pkgType) for a := range catalog.Enumerate(c.pkgType) { t.Log(" ", a) @@ -175,7 +175,7 @@ func TestPkgCoverageDirectory(t *testing.T) { actualPkgCount++ } - if actualPkgCount != len(test.pkgInfo) { + if actualPkgCount != len(test.pkgInfo)+test.duplicates { for actualPkg := range catalog.Enumerate(test.pkgType) { t.Log(" ", actualPkg) }