Modify CPE vendor candidate generation approach (#484)

* consider additional vendor candidates for ruby, python, rpm, npm, and java

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add java pom.xml processing

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* allow for downstream transform control in cpe generation processing

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* migrate CPE generation logic to dedicated package

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* split java manifest groupID extraction into two tiers

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* extract groupID from pom parent project during CPE generation

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* update java groupID processing tests to cover multi-tier approach

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* fix constructor names for cpe.fieldCandidate

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* rename helper function to startsWithTopLevelDomain

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add nil changes for java manifest sections

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* update comment to reflect parsing maven files

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* split out java description parsing

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* split out pom parent processing

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* simplify vendorsFromGroupIDs and associated tests

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* simplify test type for vendorsFromGroupIDs

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* copy candidate varidations to new instances

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* rename CPE generation string util functions

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add an explanation around fieldCandidate

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* simplify type for the cpe.fieldCandidateSet

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* make CPE filter function names more readable

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* update groupIDsFromJavaManifest to use a guard clause

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* extract groupID extraction from artifactID fields into a separate function

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* bump goreleaser version to combat failure

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2021-09-03 14:21:25 -04:00 committed by GitHub
parent 0799fd9d46
commit abbba3fc19
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
37 changed files with 3208 additions and 1763 deletions

View File

@ -103,7 +103,7 @@ bootstrap-tools: $(TEMPDIR)
GO111MODULE=off GOBIN=$(shell realpath $(TEMPDIR)) go get -u golang.org/x/perf/cmd/benchstat
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(TEMPDIR)/ v1.26.0
curl -sSfL https://raw.githubusercontent.com/wagoodman/go-bouncer/master/bouncer.sh | sh -s -- -b $(TEMPDIR)/ v0.2.0
curl -sfL https://install.goreleaser.com/github.com/goreleaser/goreleaser.sh | sh -s -- -b $(TEMPDIR)/ v0.160.0
curl -sfL https://install.goreleaser.com/github.com/goreleaser/goreleaser.sh | sh -s -- -b $(TEMPDIR)/ v0.177.0
.PHONY: bootstrap-go
bootstrap-go:

2
go.mod
View File

@ -36,6 +36,7 @@ require (
github.com/spf13/pflag v1.0.5
github.com/spf13/viper v1.7.0
github.com/stretchr/testify v1.7.0
github.com/vifraa/gopom v0.1.0
github.com/wagoodman/go-partybus v0.0.0-20210627031916-db1f5573bbc5
github.com/wagoodman/go-progress v0.0.0-20200731105512-1020f39e6240
github.com/wagoodman/jotframe v0.0.0-20200730190914-3517092dd163
@ -43,6 +44,7 @@ require (
github.com/xeipuuv/gojsonschema v1.2.0
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9
golang.org/x/mod v0.3.0
golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d
gopkg.in/yaml.v2 v2.3.0
)

14
go.sum
View File

@ -711,6 +711,7 @@ github.com/stretchr/testify v1.3.1-0.20190311161405-34c6fa2dc709/go.mod h1:M5WIy
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.6.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s=
@ -743,6 +744,8 @@ github.com/valyala/fasthttp v1.2.0/go.mod h1:4vX61m6KN+xDduDNwXrhIAVZaZaZiQ1luJk
github.com/valyala/quicktemplate v1.2.0/go.mod h1:EH+4AkTd43SvgIbQHYu59/cJyxDoOVRUAfrukLPuGJ4=
github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio=
github.com/vdemeester/k8s-pkg-credentialprovider v1.17.4/go.mod h1:inCTmtUdr5KJbreVojo06krnTgaeAz/Z7lynpPk/Q2c=
github.com/vifraa/gopom v0.1.0 h1:v897eVxf6lflkEXzPmKbo4YhX2oS/LGjz7cqjWnSmCU=
github.com/vifraa/gopom v0.1.0/go.mod h1:oPa1dcrGrtlO37WPDBm5SqHAT+wTgF8An1Q71Z6Vv4o=
github.com/vmware/govmomi v0.20.3/go.mod h1:URlwyTFZX72RmxtxuaFL2Uj3fD1JTvZdx59bHWk6aFU=
github.com/wagoodman/go-partybus v0.0.0-20200526224238-eb215533f07d/go.mod h1:JPirS5jde/CF5qIjcK4WX+eQmKXdPc6vcZkJ/P0hfPw=
github.com/wagoodman/go-partybus v0.0.0-20210627031916-db1f5573bbc5 h1:phTLPgMRDYTizrBSKsNSOa2zthoC2KsJsaY/8sg3rD8=
@ -869,8 +872,9 @@ golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/
golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200602114024-627f9648deb9 h1:pNX+40auqi2JqRfOP1akLGtYcn15TUbkhwuCO3foqqM=
golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d h1:LO7XpTYMwTqxjLcGWPijK3vRXg1aWdlNOVOHRq45d7c=
golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20181106182150-f42d05182288/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@ -935,14 +939,18 @@ golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200602225109-6fdc65e7d980 h1:OjiUf46hAmXblsZdnoSXsEUSKU8r1UEzcL5RVZ4gO9Y=
golang.org/x/sys v0.0.0-20200602225109-6fdc65e7d980/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da h1:b3NXsE2LusjYGGjL5bxEVZZORm/YEFFrWFjR8eFrw/c=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=

View File

@ -6,6 +6,7 @@ import (
"github.com/anchore/syft/syft/distro"
"github.com/anchore/syft/syft/event"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common/cpe"
"github.com/anchore/syft/syft/source"
"github.com/hashicorp/go-multierror"
"github.com/wagoodman/go-partybus"
@ -59,7 +60,7 @@ func Catalog(resolver source.FileResolver, theDistro *distro.Distro, catalogers
for _, p := range packages {
// generate CPEs
p.CPEs = generatePackageCPEs(p)
p.CPEs = cpe.Generate(p)
// generate PURL
p.PURL = generatePackageURL(p, theDistro)

View File

@ -0,0 +1,18 @@
package cpe
import "github.com/anchore/syft/syft/pkg"
// this is a static mapping of known package names (keys) to official cpe names for each package
type candidatesByPackageType map[pkg.Type]map[string][]string
func (s candidatesByPackageType) getCandidates(t pkg.Type, key string) []string {
if _, ok := s[t]; !ok {
return nil
}
value, ok := s[t][key]
if !ok {
return nil
}
return value
}

View File

@ -0,0 +1,83 @@
package cpe
import (
"github.com/scylladb/go-set/strset"
)
// fieldCandidate represents a single "guess" for a specific field in a future CPE (vendor, product, target SW, etc).
// When generating these candidates depending on the field the value was sourced from there may be only a subset of
// transforms that should be applied (downstream of extraction). Expressing candidates in this struct allows for this
// flexibility such that downstream transforms can be elected into or skipped over.
type fieldCandidate struct {
value string
disallowSubSelections bool
disallowDelimiterVariations bool
}
type fieldCandidateSet map[fieldCandidate]struct{}
func newFieldCandidateFromSets(sets ...fieldCandidateSet) fieldCandidateSet {
s := newFieldCandidateSet()
for _, set := range sets {
s.add(set.list()...)
}
return s
}
func newFieldCandidateSet(values ...string) fieldCandidateSet {
s := make(fieldCandidateSet)
s.addValue(values...)
return s
}
func (s fieldCandidateSet) addValue(values ...string) {
for _, value := range values {
// default candidate as an allow-all
candidate := fieldCandidate{
value: value,
}
s[candidate] = struct{}{}
}
}
func (s fieldCandidateSet) add(candidates ...fieldCandidate) {
for _, candidate := range candidates {
s[candidate] = struct{}{}
}
}
func (s fieldCandidateSet) clear() {
for k := range s {
delete(s, k)
}
}
func (s fieldCandidateSet) union(others ...fieldCandidateSet) {
for _, other := range others {
s.add(other.list()...)
}
}
func (s fieldCandidateSet) list(filters ...filterFieldCandidateFn) (results []fieldCandidate) {
candidateLoop:
for c := range s {
for _, fn := range filters {
if fn(c) {
continue candidateLoop
}
}
results = append(results, c)
}
return results
}
func (s fieldCandidateSet) values(filters ...filterFieldCandidateFn) (results []string) {
for _, c := range s.list(filters...) {
results = append(results, c.value)
}
return results
}
func (s fieldCandidateSet) uniqueValues(filters ...filterFieldCandidateFn) []string {
return strset.New(s.values(filters...)...).List()
}

View File

@ -0,0 +1,12 @@
package cpe
// filterFieldCandidateFn instances should return true if the given fieldCandidate should be removed from a collection
type filterFieldCandidateFn func(fieldCandidate) bool
func filterOutBySubselection(c fieldCandidate) bool {
return c.disallowSubSelections
}
func filterOutByDelimiterVariations(c fieldCandidate) bool {
return c.disallowDelimiterVariations
}

View File

@ -0,0 +1,263 @@
package cpe
import (
"testing"
"github.com/stretchr/testify/assert"
)
func Test_cpeCandidateValues_filter(t *testing.T) {
tests := []struct {
name string
input []fieldCandidate
filters []filterFieldCandidateFn
expect []string
}{
{
name: "gocase",
input: []fieldCandidate{
{
value: "allow anything",
},
{
value: "no-sub-selections",
disallowSubSelections: true,
},
{
value: "no-delimiter-variations",
disallowDelimiterVariations: true,
},
{
value: "allow nothing",
disallowSubSelections: true,
disallowDelimiterVariations: true,
},
},
expect: []string{
"allow anything",
"no-sub-selections",
"no-delimiter-variations",
"allow nothing",
},
},
{
name: "filter out sub-selections",
input: []fieldCandidate{
{
value: "allow anything",
},
{
value: "no-sub-selections",
disallowSubSelections: true,
},
{
value: "no-delimiter-variations",
disallowDelimiterVariations: true,
},
{
value: "allow nothing",
disallowSubSelections: true,
disallowDelimiterVariations: true,
},
},
filters: []filterFieldCandidateFn{
filterOutBySubselection,
},
expect: []string{
"allow anything",
"no-delimiter-variations",
},
},
{
name: "filter out delimiter-variations",
input: []fieldCandidate{
{
value: "allow anything",
},
{
value: "no-sub-selections",
disallowSubSelections: true,
},
{
value: "no-delimiter-variations",
disallowDelimiterVariations: true,
},
{
value: "allow nothing",
disallowSubSelections: true,
disallowDelimiterVariations: true,
},
},
filters: []filterFieldCandidateFn{
filterOutByDelimiterVariations,
},
expect: []string{
"allow anything",
"no-sub-selections",
},
},
{
name: "all filters",
input: []fieldCandidate{
{
value: "allow anything",
},
{
value: "no-sub-selections",
disallowSubSelections: true,
},
{
value: "no-delimiter-variations",
disallowDelimiterVariations: true,
},
{
value: "allow nothing",
disallowSubSelections: true,
disallowDelimiterVariations: true,
},
},
filters: []filterFieldCandidateFn{
filterOutByDelimiterVariations,
filterOutBySubselection,
},
expect: []string{
"allow anything",
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
set := newFieldCandidateSet()
set.add(test.input...)
assert.ElementsMatch(t, test.expect, set.values(test.filters...))
})
}
}
func Test_cpeFieldCandidateSet_clear(t *testing.T) {
s := newFieldCandidateSet("1", "2")
assert.NotEmpty(t, s.values())
s.clear()
assert.Empty(t, s.values())
}
func Test_cpeFieldCandidateSet_union(t *testing.T) {
s1 := newFieldCandidateSet("1", "2")
assert.Len(t, s1.list(), 2)
s2 := newFieldCandidateSet("2", "3", "4")
assert.Len(t, s2.list(), 3)
s3 := newFieldCandidateSet()
s3.add(
fieldCandidate{
value: "1",
disallowSubSelections: true,
disallowDelimiterVariations: false,
},
fieldCandidate{
value: "4",
disallowSubSelections: false,
disallowDelimiterVariations: true,
},
fieldCandidate{
value: "5",
disallowSubSelections: true,
disallowDelimiterVariations: true,
},
)
assert.Len(t, s3.list(), 3)
s1.union(s2, s3)
// 1 & 4 have duplicate entries since there are candidate conditions set
assert.ElementsMatch(t, s1.values(), []string{"1", "1", "2", "3", "4", "4", "5"})
assert.ElementsMatch(t, s1.list(), []fieldCandidate{
{
value: "1",
},
{
value: "1",
disallowSubSelections: true,
disallowDelimiterVariations: false,
},
{
value: "2",
},
{
value: "3",
},
{
value: "4",
},
{
value: "4",
disallowSubSelections: false,
disallowDelimiterVariations: true,
},
{
value: "5",
disallowSubSelections: true,
disallowDelimiterVariations: true,
},
})
}
func Test_cpeFieldCandidateSet_union_byValue(t *testing.T) {
s1 := newFieldCandidateSet("1", "2")
assert.Len(t, s1.list(), 2)
s2 := newFieldCandidateSet("2", "3", "4")
assert.Len(t, s2.list(), 3)
s3 := newFieldCandidateSet("1", "4", "5")
assert.Len(t, s3.list(), 3)
s1.union(s2, s3)
assert.ElementsMatch(t, s1.values(), []string{"1", "2", "3", "4", "5"})
assert.ElementsMatch(t, s1.list(), []fieldCandidate{
{
value: "1",
},
{
value: "2",
},
{
value: "3",
},
{
value: "4",
},
{
value: "5",
},
})
}
func Test_cpeFieldCandidateSet_uniqueValues(t *testing.T) {
set := newFieldCandidateSet()
set.add(
fieldCandidate{
value: "1",
},
fieldCandidate{
value: "1",
disallowSubSelections: true,
},
fieldCandidate{
value: "2",
disallowDelimiterVariations: true,
},
fieldCandidate{
value: "2",
},
fieldCandidate{
value: "3",
disallowSubSelections: true,
disallowDelimiterVariations: true,
},
)
assert.ElementsMatch(t, []string{"1", "2", "3"}, set.uniqueValues())
}

View File

@ -1,4 +1,4 @@
package cataloger
package cpe
import (
"strings"
@ -9,16 +9,31 @@ import (
const jenkinsName = "jenkins"
// filterFn instances should return true if the given CPE should be removed from a collection for the given package
type filterFn func(cpe pkg.CPE, p pkg.Package) bool
var cpeFilters = []filterFn{
jiraClientPackageFilter,
jenkinsPackageNameFilter,
jenkinsPluginFilter,
disallowJiraClientServerMismatch,
disallowJenkinsServerCPEForPluginPackage,
disallowJenkinsCPEsNotAssociatedWithJenkins,
}
func filter(cpes []pkg.CPE, p pkg.Package, filters ...filterFn) (result []pkg.CPE) {
cpeLoop:
for _, cpe := range cpes {
for _, fn := range filters {
if fn(cpe, p) {
continue cpeLoop
}
}
// all filter functions passed on filtering this CPE
result = append(result, cpe)
}
return result
}
// jenkins plugins should not match against jenkins
func jenkinsPluginFilter(cpe pkg.CPE, p pkg.Package) bool {
func disallowJenkinsServerCPEForPluginPackage(cpe pkg.CPE, p pkg.Package) bool {
if p.Type == pkg.JenkinsPluginPkg && cpe.Product == jenkinsName {
return true
}
@ -26,7 +41,7 @@ func jenkinsPluginFilter(cpe pkg.CPE, p pkg.Package) bool {
}
// filter to account that packages that are not for jenkins but have a CPE generated that will match against jenkins
func jenkinsPackageNameFilter(cpe pkg.CPE, p pkg.Package) bool {
func disallowJenkinsCPEsNotAssociatedWithJenkins(cpe pkg.CPE, p pkg.Package) bool {
// jenkins server should only match against a product with the name jenkins
if cpe.Product == jenkinsName && !strings.Contains(strings.ToLower(p.Name), jenkinsName) {
if cpe.Vendor == wfn.Any || cpe.Vendor == jenkinsName || cpe.Vendor == "cloudbees" {
@ -37,7 +52,7 @@ func jenkinsPackageNameFilter(cpe pkg.CPE, p pkg.Package) bool {
}
// filter to account for packages which are jira client packages but have a CPE that will match against jira
func jiraClientPackageFilter(cpe pkg.CPE, p pkg.Package) bool {
func disallowJiraClientServerMismatch(cpe pkg.CPE, p pkg.Package) bool {
// jira / atlassian should not apply to clients
if cpe.Product == "jira" && strings.Contains(strings.ToLower(p.Name), "client") {
if cpe.Vendor == wfn.Any || cpe.Vendor == "jira" || cpe.Vendor == "atlassian" {

View File

@ -1,4 +1,4 @@
package cataloger
package cpe
import (
"testing"
@ -7,7 +7,7 @@ import (
"github.com/stretchr/testify/assert"
)
func Test_jenkinsPluginFilter(t *testing.T) {
func Test_disallowJenkinsServerCPEForPluginPackage(t *testing.T) {
tests := []struct {
name string
cpe pkg.CPE
@ -41,12 +41,12 @@ func Test_jenkinsPluginFilter(t *testing.T) {
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
assert.Equal(t, test.expected, jenkinsPluginFilter(test.cpe, test.pkg))
assert.Equal(t, test.expected, disallowJenkinsServerCPEForPluginPackage(test.cpe, test.pkg))
})
}
}
func Test_jenkinsPackageNameFilter(t *testing.T) {
func Test_disallowJenkinsCPEsNotAssociatedWithJenkins(t *testing.T) {
tests := []struct {
name string
cpe pkg.CPE
@ -101,12 +101,12 @@ func Test_jenkinsPackageNameFilter(t *testing.T) {
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
assert.Equal(t, test.expected, jenkinsPackageNameFilter(test.cpe, test.pkg))
assert.Equal(t, test.expected, disallowJenkinsCPEsNotAssociatedWithJenkins(test.cpe, test.pkg))
})
}
}
func Test_jiraClientPackageFilter(t *testing.T) {
func Test_disallowJiraClientServerMismatch(t *testing.T) {
tests := []struct {
name string
cpe pkg.CPE
@ -161,7 +161,7 @@ func Test_jiraClientPackageFilter(t *testing.T) {
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
assert.Equal(t, test.expected, jiraClientPackageFilter(test.cpe, test.pkg))
assert.Equal(t, test.expected, disallowJiraClientServerMismatch(test.cpe, test.pkg))
})
}
}

View File

@ -0,0 +1,247 @@
package cpe
import (
"bufio"
"bytes"
"fmt"
"sort"
"strings"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/pkg"
"github.com/facebookincubator/nvdtools/wfn"
)
var productCandidatesByPkgType = candidatesByPackageType{
pkg.JavaPkg: {
"springframework": []string{"spring_framework", "springsource_spring_framework"},
"spring-core": []string{"spring_framework", "springsource_spring_framework"},
},
pkg.NpmPkg: {
"hapi": []string{"hapi_server_framework"},
"handlebars.js": []string{"handlebars"},
"is-my-json-valid": []string{"is_my_json_valid"},
"mustache": []string{"mustache.js"},
},
pkg.GemPkg: {
"Arabic-Prawn": []string{"arabic_prawn"},
"bio-basespace-sdk": []string{"basespace_ruby_sdk"},
"cremefraiche": []string{"creme_fraiche"},
"html-sanitizer": []string{"html_sanitizer"},
"sentry-raven": []string{"raven-ruby"},
"RedCloth": []string{"redcloth_library"},
"VladTheEnterprising": []string{"vladtheenterprising"},
"yajl-ruby": []string{"yajl-ruby_gem"},
},
pkg.PythonPkg: {
"python-rrdtool": []string{"rrdtool"},
},
}
func newCPE(product, vendor, version, targetSW string) wfn.Attributes {
cpe := *(wfn.NewAttributesWithAny())
cpe.Part = "a"
cpe.Product = product
cpe.Vendor = vendor
cpe.Version = version
cpe.TargetSW = targetSW
return cpe
}
// Generate Create a list of CPEs for a given package, trying to guess the vendor, product tuple. We should be trying to
// generate the minimal set of representative CPEs, which implies that optional fields should not be included
// (such as target SW).
func Generate(p pkg.Package) []pkg.CPE {
vendors := candidateVendors(p)
products := candidateProducts(p)
if len(products) == 0 {
return nil
}
keys := internal.NewStringSet()
cpes := make([]pkg.CPE, 0)
for _, product := range products {
for _, vendor := range vendors {
// prevent duplicate entries...
key := fmt.Sprintf("%s|%s|%s", product, vendor, p.Version)
if keys.Contains(key) {
continue
}
keys.Add(key)
// add a new entry...
cpes = append(cpes, newCPE(product, vendor, p.Version, wfn.Any))
}
}
// filter out any known combinations that don't accurately represent this package
cpes = filter(cpes, p, cpeFilters...)
sort.Sort(BySpecificity(cpes))
return cpes
}
func candidateVendors(p pkg.Package) []string {
// in ecosystems where the packaging metadata does not have a clear field to indicate a vendor (or a field that
// could be interpreted indirectly as such) the project name tends to be a common stand in. Examples of this
// are the elasticsearch gem, xstream jar, and rack gem... all of these cases you can find vulnerabilities
// with CPEs where the vendor is the product name and doesn't appear to be derived from any available package
// metadata.
vendors := newFieldCandidateSet(candidateProducts(p)...)
switch p.Language {
case pkg.Ruby:
vendors.addValue("ruby-lang")
case pkg.Go:
// replace all candidates with only the golang-specific helper
vendors.clear()
vendor := candidateVendorForGo(p.Name)
if vendor != "" {
vendors.addValue(vendor)
}
}
// some ecosystems do not have enough metadata to determine the vendor accurately, in which case we selectively
// allow * as a candidate. Note: do NOT allow Java packages to have * vendors.
switch p.Language {
case pkg.Ruby, pkg.JavaScript:
vendors.addValue("*")
}
switch p.MetadataType {
case pkg.RpmdbMetadataType:
vendors.union(candidateVendorsForRPM(p))
case pkg.GemMetadataType:
vendors.union(candidateVendorsForRuby(p))
case pkg.PythonPackageMetadataType:
vendors.union(candidateVendorsForPython(p))
case pkg.JavaMetadataType:
vendors.union(candidateVendorsForJava(p))
}
// try swapping hyphens for underscores, vice versa, and removing separators altogether
addDelimiterVariations(vendors)
// generate sub-selections of each candidate based on separators (e.g. jenkins-ci -> [jenkins, jenkins-ci])
addAllSubSelections(vendors)
return vendors.uniqueValues()
}
func candidateProducts(p pkg.Package) []string {
products := newFieldCandidateSet(p.Name)
switch {
case p.Language == pkg.Python:
if !strings.HasPrefix(p.Name, "python") {
products.addValue("python-" + p.Name)
}
case p.Language == pkg.Java || p.MetadataType == pkg.JavaMetadataType:
products.addValue(candidateProductsForJava(p)...)
case p.Language == pkg.Go:
// replace all candidates with only the golang-specific helper
products.clear()
prod := candidateProductForGo(p.Name)
if prod != "" {
products.addValue(prod)
}
}
// try swapping hyphens for underscores, vice versa, and removing separators altogether
addDelimiterVariations(products)
// prepend any known product names for the given package type and name (note: this is not a replacement)
return append(productCandidatesByPkgType.getCandidates(p.Type, p.Name), products.uniqueValues()...)
}
func addAllSubSelections(set fieldCandidateSet) {
for _, candidate := range set.values(filterOutBySubselection) {
set.addValue(generateSubSelections(candidate)...)
}
}
// generateSubSelections attempts to split a field by hyphens and underscores and return a list of sensible sub-selections
// that can be used as product or vendor candidates. E.g. jenkins-ci-tools -> [jenkins-ci-tools, jenkins-ci, jenkins].
func generateSubSelections(field string) (results []string) {
scanner := bufio.NewScanner(strings.NewReader(field))
scanner.Split(scanByHyphenOrUnderscore)
var lastToken uint8
for scanner.Scan() {
rawCandidate := scanner.Text()
if len(rawCandidate) == 0 {
break
}
// trim any number of hyphen or underscore that is prefixed/suffixed on the given candidate. Since
// scanByHyphenOrUnderscore preserves delimiters (hyphens and underscores) they are guaranteed to be at least
// prefixed.
candidate := strings.TrimFunc(rawCandidate, trimHyphenOrUnderscore)
// capture the result (if there is content)
if len(candidate) > 0 {
if len(results) > 0 {
results = append(results, results[len(results)-1]+string(lastToken)+candidate)
} else {
results = append(results, candidate)
}
}
// keep track of the trailing separator for the next loop
lastToken = rawCandidate[len(rawCandidate)-1]
}
return results
}
// trimHyphenOrUnderscore is a character filter function for use with strings.TrimFunc in order to remove any hyphen or underscores.
func trimHyphenOrUnderscore(r rune) bool {
switch r {
case '-', '_':
return true
}
return false
}
// scanByHyphenOrUnderscore splits on hyphen or underscore and includes the separator in the split
func scanByHyphenOrUnderscore(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if i := bytes.IndexAny(data, "-_"); i >= 0 {
return i + 1, data[0 : i+1], nil
}
if atEOF {
return len(data), data, nil
}
return 0, nil, nil
}
func addDelimiterVariations(fields fieldCandidateSet) {
for _, candidate := range fields.list(filterOutByDelimiterVariations) {
field := candidate.value
hasHyphen := strings.Contains(field, "-")
hasUnderscore := strings.Contains(field, "_")
if hasHyphen {
// provide variations of hyphen candidates with an underscore
newValue := strings.ReplaceAll(field, "-", "_")
underscoreCandidate := candidate
underscoreCandidate.value = newValue
fields.add(underscoreCandidate)
}
if hasUnderscore {
// provide variations of underscore candidates with a hyphen
newValue := strings.ReplaceAll(field, "_", "-")
hyphenCandidate := candidate
hyphenCandidate.value = newValue
fields.add(hyphenCandidate)
}
}
}

View File

@ -0,0 +1,658 @@
package cpe
import (
"fmt"
"sort"
"strings"
"testing"
"github.com/anchore/syft/syft/pkg"
"github.com/scylladb/go-set"
"github.com/scylladb/go-set/strset"
"github.com/stretchr/testify/assert"
)
func TestGeneratePackageCPEs(t *testing.T) {
tests := []struct {
name string
p pkg.Package
expected []string
}{
{
name: "hyphen replacement",
p: pkg.Package{
Name: "name-part",
Version: "3.2",
FoundBy: "some-analyzer",
Language: pkg.Python,
Type: pkg.DebPkg,
},
expected: []string{
"cpe:2.3:a:name-part:name-part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:name-part:name_part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:name-part:python-name-part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:name-part:python_name_part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:name:name-part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:name:name_part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:name:python-name-part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:name:python_name_part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:name_part:name-part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:name_part:name_part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:name_part:python-name-part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:name_part:python_name_part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python-name-part:name-part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python-name-part:name_part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python-name-part:python-name-part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python-name-part:python_name_part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python-name:name-part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python-name:name_part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python-name:python-name-part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python-name:python_name_part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python:name-part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python:name_part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python:python-name-part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python:python_name_part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python_name:name-part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python_name:name_part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python_name:python-name-part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python_name:python_name_part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python_name_part:name-part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python_name_part:name_part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python_name_part:python-name-part:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python_name_part:python_name_part:3.2:*:*:*:*:*:*:*",
},
},
{
name: "python language",
p: pkg.Package{
Name: "name",
Version: "3.2",
FoundBy: "some-analyzer",
Language: pkg.Python,
Type: pkg.DebPkg,
MetadataType: pkg.PythonPackageMetadataType,
Metadata: pkg.PythonPackageMetadata{
Author: "alex goodman",
AuthorEmail: "william.goodman@anchore.com",
},
},
expected: []string{
"cpe:2.3:a:name:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:name:python-name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:name:python_name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python-name:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python-name:python-name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python-name:python_name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python:python-name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python:python_name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python_name:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python_name:python-name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:python_name:python_name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:alex_goodman:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:alex_goodman:python-name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:alex_goodman:python_name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:william-goodman:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:william-goodman:python-name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:william-goodman:python_name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:william_goodman:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:william_goodman:python-name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:william_goodman:python_name:3.2:*:*:*:*:*:*:*",
},
},
{
name: "javascript language",
p: pkg.Package{
Name: "name",
Version: "3.2",
FoundBy: "some-analyzer",
Language: pkg.JavaScript,
Type: pkg.DebPkg,
},
expected: []string{
"cpe:2.3:a:name:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:*:name:3.2:*:*:*:*:*:*:*",
},
},
{
name: "ruby language",
p: pkg.Package{
Name: "name",
Version: "3.2",
FoundBy: "some-analyzer",
Language: pkg.Ruby,
Type: pkg.DebPkg,
MetadataType: pkg.GemMetadataType,
Metadata: pkg.GemMetadata{
Authors: []string{
"someones name",
"someones.elses.name@gmail.com",
},
},
},
expected: []string{
"cpe:2.3:a:*:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:name:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:ruby-lang:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:ruby:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:ruby_lang:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:someones-elses-name:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:someones-name:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:someones_elses_name:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:someones_name:name:3.2:*:*:*:*:*:*:*",
},
},
{
name: "java language",
p: pkg.Package{
Name: "name",
Version: "3.2",
FoundBy: "some-analyzer",
Language: pkg.Java,
Type: pkg.JavaPkg,
},
expected: []string{
"cpe:2.3:a:name:name:3.2:*:*:*:*:*:*:*",
},
},
{
name: "java language with groupID",
p: pkg.Package{
Name: "name",
Version: "3.2",
FoundBy: "some-analyzer",
Language: pkg.Java,
Type: pkg.JavaPkg,
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
GroupID: "org.sonatype.nexus",
},
},
},
expected: []string{
"cpe:2.3:a:name:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:name:nexus:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:nexus:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:nexus:nexus:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:sonatype:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:sonatype:nexus:3.2:*:*:*:*:*:*:*",
},
},
{
name: "jenkins package identified via pkg type",
p: pkg.Package{
Name: "name",
Version: "3.2",
FoundBy: "some-analyzer",
Language: pkg.Java,
Type: pkg.JenkinsPluginPkg,
},
expected: []string{
"cpe:2.3:a:name:name:3.2:*:*:*:*:*:*:*",
},
},
{
name: "java language - multi tier manifest fields",
p: pkg.Package{
Name: "cxf-rt-bindings-xml",
Version: "3.3.10",
FoundBy: "java-cataloger",
Language: pkg.Java,
Type: pkg.JavaPkg,
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{
VirtualPath: "/opt/jboss/keycloak/modules/system/layers/base/org/apache/cxf/impl/main/cxf-rt-bindings-xml-3.3.10.jar",
Manifest: &pkg.JavaManifest{
Main: map[string]string{
"Automatic-Module-Name": "org.apache.cxf.binding.xml",
"Bnd-LastModified": "1615836524860",
"Build-Jdk": "1.8.0_261",
"Built-By": "dkulp",
"Bundle-ActivationPolicy": "lazy",
"Bundle-Description": "Apache CXF Runtime XML Binding",
"Bundle-DocURL": "http://cxf.apache.org",
"Bundle-License": "https://www.apache.org/licenses/LICENSE-2.0.txt",
"Bundle-ManifestVersion": "2",
"Bundle-Name": "Apache CXF Runtime XML Binding",
"Bundle-SymbolicName": "org.apache.cxf.cxf-rt-bindings-xml",
"Bundle-Vendor": "The Apache Software Foundation",
"Bundle-Version": "3.3.10",
"Created-By": "Apache Maven Bundle Plugin",
"Export-Package": "org.apache.cxf.binding.xml;version=\"3.3.10\",org.apache.cxf.binding.xml.wsdl11;version=\"3.3.10\",org.apache.cxf.binding.xml.interceptor;version=\"3.3.10\",org.apache.cxf.bindings.xformat;version=\"3.3.10\"",
"Implementation-Vendor": "The Apache Software Foundation",
"Implementation-Vendor-Id": "org.apache",
"Implementation-Version": "3.3.10",
"Import-Package": "javax.xml.bind;version=\"[0,3)\",javax.xml.bind.annotation;version=\"[0,3)\",javax.wsdl;resolution:=optional,javax.wsdl.extensions;resolution:=optional,javax.wsdl.extensions.http;resolution:=optional,javax.xml.namespace,javax.xml.stream,org.apache.cxf;version=\"[3.3,4)\",org.apache.cxf.binding;version=\"[3.3,4)\",org.apache.cxf.binding.xml,org.apache.cxf.binding.xml.interceptor,org.apache.cxf.bindings.xformat,org.apache.cxf.common.i18n;version=\"[3.3,4)\",org.apache.cxf.common.injection;version=\"[3.3,4)\",org.apache.cxf.common.logging;version=\"[3.3,4)\",org.apache.cxf.common.util;version=\"[3.3,4)\",org.apache.cxf.endpoint;version=\"[3.3,4)\",org.apache.cxf.helpers;version=\"[3.3,4)\",org.apache.cxf.interceptor;version=\"[3.3,4)\",org.apache.cxf.message;version=\"[3.3,4)\",org.apache.cxf.service.model;version=\"[3.3,4)\",org.apache.cxf.staxutils;version=\"[3.3,4)\",org.apache.cxf.tools.common;version=\"[3.3,4)\";resolution:=optional,org.apache.cxf.tools.validator;version=\"[3.3,4)\";resolution:=optional,org.apache.cxf.transport;version=\"[3.3,4)\",org.apache.cxf.wsdl;version=\"[3.3,4)\";resolution:=optional,org.apache.cxf.wsdl.http;version=\"[3.3,4)\",org.apache.cxf.wsdl.interceptors;version=\"[3.3,4)\";resolution:=optional,org.w3c.dom",
"Manifest-Version": "1.0",
"Require-Capability": "osgi.ee;filter:=\"(&(osgi.ee=JavaSE)(version=1.8))\"",
"Specification-Vendor": "The Apache Software Foundation",
"Specification-Version": "3.3.10",
"Tool": "Bnd-4.2.0.201903051501",
},
},
PomProperties: &pkg.PomProperties{
Path: "META-INF/maven/org.apache.cxf/cxf-rt-bindings-xml/pom.properties",
GroupID: "org.apache.cxf",
ArtifactID: "cxf-rt-bindings-xml",
Version: "3.3.10",
},
},
},
expected: []string{
"cpe:2.3:a:apache-software-foundation:cxf-rt-bindings-xml:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:apache-software-foundation:cxf:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:apache-software-foundation:cxf_rt_bindings_xml:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:apache:cxf-rt-bindings-xml:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:apache:cxf:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:apache:cxf_rt_bindings_xml:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:apache_software_foundation:cxf-rt-bindings-xml:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:apache_software_foundation:cxf:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:apache_software_foundation:cxf_rt_bindings_xml:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf-rt-bindings-xml:cxf-rt-bindings-xml:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf-rt-bindings-xml:cxf:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf-rt-bindings-xml:cxf_rt_bindings_xml:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf-rt-bindings:cxf-rt-bindings-xml:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf-rt-bindings:cxf:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf-rt-bindings:cxf_rt_bindings_xml:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf-rt:cxf-rt-bindings-xml:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf-rt:cxf:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf-rt:cxf_rt_bindings_xml:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf:cxf-rt-bindings-xml:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf:cxf:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf:cxf_rt_bindings_xml:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf_rt:cxf-rt-bindings-xml:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf_rt:cxf:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf_rt:cxf_rt_bindings_xml:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf_rt_bindings:cxf-rt-bindings-xml:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf_rt_bindings:cxf:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf_rt_bindings:cxf_rt_bindings_xml:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf_rt_bindings_xml:cxf-rt-bindings-xml:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf_rt_bindings_xml:cxf:3.3.10:*:*:*:*:*:*:*",
"cpe:2.3:a:cxf_rt_bindings_xml:cxf_rt_bindings_xml:3.3.10:*:*:*:*:*:*:*",
},
},
{
name: "rpm vendor selection",
p: pkg.Package{
Name: "name",
Version: "3.2",
FoundBy: "some-analyzer",
Type: pkg.RpmPkg,
MetadataType: pkg.RpmdbMetadataType,
Metadata: pkg.RpmdbMetadata{
Vendor: "some-vendor",
},
},
expected: []string{
"cpe:2.3:a:name:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:some-vendor:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:some_vendor:name:3.2:*:*:*:*:*:*:*",
},
},
{
name: "cloudbees jenkins package identified via groupId",
p: pkg.Package{
Name: "name",
Version: "3.2",
FoundBy: "some-analyzer",
Language: pkg.Java,
Type: pkg.JenkinsPluginPkg,
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
GroupID: "com.cloudbees.jenkins.plugins",
},
},
},
expected: []string{
"cpe:2.3:a:name:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:jenkins:name:3.2:*:*:*:*:*:*:*",
},
},
{
name: "jenkins.io package identified via groupId prefix",
p: pkg.Package{
Name: "name",
Version: "3.2",
FoundBy: "some-analyzer",
Language: pkg.Java,
Type: pkg.JenkinsPluginPkg,
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
GroupID: "io.jenkins.plugins.name.something",
},
},
},
expected: []string{
"cpe:2.3:a:name:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:name:something:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:something:name:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:something:something:3.2:*:*:*:*:*:*:*",
},
},
{
name: "jenkins.io package identified via groupId",
p: pkg.Package{
Name: "name",
Version: "3.2",
FoundBy: "some-analyzer",
Language: pkg.Java,
Type: pkg.JenkinsPluginPkg,
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
GroupID: "io.jenkins.plugins",
},
},
},
expected: []string{
"cpe:2.3:a:name:name:3.2:*:*:*:*:*:*:*",
},
},
{
name: "jenkins-ci.io package identified via groupId",
p: pkg.Package{
Name: "name",
Version: "3.2",
FoundBy: "some-analyzer",
Language: pkg.Java,
Type: pkg.JenkinsPluginPkg,
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
GroupID: "io.jenkins-ci.plugins",
},
},
},
expected: []string{
"cpe:2.3:a:name:name:3.2:*:*:*:*:*:*:*",
},
},
{
name: "jenkins-ci.org package identified via groupId",
p: pkg.Package{
Name: "name",
Version: "3.2",
FoundBy: "some-analyzer",
Language: pkg.Java,
Type: pkg.JenkinsPluginPkg,
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
GroupID: "org.jenkins-ci.plugins",
},
},
},
expected: []string{
"cpe:2.3:a:name:name:3.2:*:*:*:*:*:*:*",
},
},
{
name: "jira-atlassian filtering",
p: pkg.Package{
Name: "jira_client_core",
Version: "3.2",
FoundBy: "some-analyzer",
Language: pkg.Java,
Type: pkg.JavaPkg,
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
GroupID: "org.atlassian.jira",
ArtifactID: "jira_client_core",
},
},
},
expected: []string{
"cpe:2.3:a:atlassian:jira-client-core:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:atlassian:jira_client_core:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:jira-client-core:jira-client-core:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:jira-client-core:jira:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:jira-client-core:jira_client_core:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:jira-client:jira-client-core:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:jira-client:jira:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:jira-client:jira_client_core:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:jira:jira-client-core:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:jira:jira_client_core:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:jira_client:jira-client-core:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:jira_client:jira:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:jira_client:jira_client_core:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:jira_client_core:jira-client-core:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:jira_client_core:jira:3.2:*:*:*:*:*:*:*",
"cpe:2.3:a:jira_client_core:jira_client_core:3.2:*:*:*:*:*:*:*",
},
},
{
name: "jenkins filtering",
p: pkg.Package{
Name: "cloudbees-installation-manager",
Version: "2.89.0.33",
FoundBy: "some-analyzer",
Language: pkg.Java,
Type: pkg.JavaPkg,
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
GroupID: "com.cloudbees.jenkins.modules",
ArtifactID: "cloudbees-installation-manager",
},
},
},
expected: []string{
"cpe:2.3:a:cloudbees-installation-manager:cloudbees-installation-manager:2.89.0.33:*:*:*:*:*:*:*",
"cpe:2.3:a:cloudbees-installation-manager:cloudbees_installation_manager:2.89.0.33:*:*:*:*:*:*:*",
"cpe:2.3:a:cloudbees-installation:cloudbees-installation-manager:2.89.0.33:*:*:*:*:*:*:*",
"cpe:2.3:a:cloudbees-installation:cloudbees_installation_manager:2.89.0.33:*:*:*:*:*:*:*",
"cpe:2.3:a:cloudbees:cloudbees-installation-manager:2.89.0.33:*:*:*:*:*:*:*",
"cpe:2.3:a:cloudbees:cloudbees_installation_manager:2.89.0.33:*:*:*:*:*:*:*",
"cpe:2.3:a:cloudbees_installation:cloudbees-installation-manager:2.89.0.33:*:*:*:*:*:*:*",
"cpe:2.3:a:cloudbees_installation:cloudbees_installation_manager:2.89.0.33:*:*:*:*:*:*:*",
"cpe:2.3:a:cloudbees_installation_manager:cloudbees-installation-manager:2.89.0.33:*:*:*:*:*:*:*",
"cpe:2.3:a:cloudbees_installation_manager:cloudbees_installation_manager:2.89.0.33:*:*:*:*:*:*:*",
"cpe:2.3:a:jenkins:cloudbees-installation-manager:2.89.0.33:*:*:*:*:*:*:*",
"cpe:2.3:a:jenkins:cloudbees_installation_manager:2.89.0.33:*:*:*:*:*:*:*",
"cpe:2.3:a:modules:cloudbees-installation-manager:2.89.0.33:*:*:*:*:*:*:*",
"cpe:2.3:a:modules:cloudbees_installation_manager:2.89.0.33:*:*:*:*:*:*:*",
},
},
{
name: "go product and vendor candidates are wired up",
p: pkg.Package{
Name: "github.com/someone/something",
Version: "3.2",
FoundBy: "go-cataloger",
Language: pkg.Go,
Type: pkg.GoModulePkg,
},
expected: []string{
"cpe:2.3:a:someone:something:3.2:*:*:*:*:*:*:*",
},
},
{
name: "generate no CPEs for indeterminate golang package name",
p: pkg.Package{
Name: "github.com/what",
Version: "3.2",
FoundBy: "go-cataloger",
Language: pkg.Go,
Type: pkg.GoModulePkg,
},
expected: []string{},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
actual := Generate(test.p)
expectedCpeSet := set.NewStringSet(test.expected...)
actualCpeSet := set.NewStringSet()
for _, a := range actual {
actualCpeSet.Add(a.BindToFmtString())
}
extra := strset.Difference(expectedCpeSet, actualCpeSet).List()
sort.Strings(extra)
if len(extra) > 0 {
t.Errorf("found extra CPEs:")
}
for _, d := range extra {
fmt.Printf(" %q,\n", d)
}
missing := strset.Difference(actualCpeSet, expectedCpeSet).List()
sort.Strings(missing)
if len(missing) > 0 {
t.Errorf("missing CPEs:")
}
for _, d := range missing {
fmt.Printf(" %q,\n", d)
}
})
}
}
func TestCandidateProducts(t *testing.T) {
tests := []struct {
p pkg.Package
expected []string
}{
{
p: pkg.Package{
Name: "springframework",
Type: pkg.JavaPkg,
},
expected: []string{"spring_framework", "springsource_spring_framework" /* <-- known good names | default guess --> */, "springframework"},
},
{
p: pkg.Package{
Name: "some-java-package-with-group-id",
Type: pkg.JavaPkg,
Language: pkg.Java,
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
GroupID: "com.apple.itunes",
},
},
},
expected: []string{"itunes", "some-java-package-with-group-id", "some_java_package_with_group_id"},
},
{
p: pkg.Package{
Name: "some-jenkins-plugin",
Type: pkg.JenkinsPluginPkg,
Language: pkg.Java,
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
GroupID: "com.cloudbees.jenkins.plugins",
},
},
},
expected: []string{"some-jenkins-plugin", "some_jenkins_plugin", "jenkins"},
},
{
p: pkg.Package{
Name: "handlebars.js",
Type: pkg.NpmPkg,
},
expected: []string{"handlebars" /* <-- known good names | default guess --> */, "handlebars.js"},
},
{
p: pkg.Package{
Name: "RedCloth",
Type: pkg.GemPkg,
},
expected: []string{"redcloth_library" /* <-- known good names | default guess --> */, "RedCloth"},
},
{
p: pkg.Package{
Name: "python-rrdtool",
Type: pkg.PythonPkg,
},
expected: []string{"rrdtool" /* <-- known good names | default guess --> */, "python-rrdtool", "python_rrdtool"},
},
}
for _, test := range tests {
t.Run(fmt.Sprintf("%+v %+v", test.p, test.expected), func(t *testing.T) {
assert.ElementsMatch(t, test.expected, candidateProducts(test.p))
})
}
}
func Test_generateSubSelections(t *testing.T) {
tests := []struct {
field string
expected []string
}{
{
field: "jenkins",
expected: []string{"jenkins"},
},
{
field: "jenkins-ci",
expected: []string{"jenkins", "jenkins-ci"},
},
{
field: "jenkins--ci",
expected: []string{"jenkins", "jenkins-ci"},
},
{
field: "jenkins_ci_tools",
expected: []string{"jenkins", "jenkins_ci", "jenkins_ci_tools"},
},
{
field: "-jenkins",
expected: []string{"jenkins"},
},
{
field: "jenkins_",
expected: []string{"jenkins"},
},
{
field: "",
expected: nil,
},
{
field: "-",
expected: nil,
},
{
field: "_",
expected: nil,
},
}
for _, test := range tests {
t.Run(test.field, func(t *testing.T) {
assert.ElementsMatch(t, test.expected, generateSubSelections(test.field))
})
}
}
func Test_addSeparatorVariations(t *testing.T) {
tests := []struct {
input []string
expected []string
}{
{
input: []string{"jenkins-ci"},
expected: []string{"jenkins-ci", "jenkins_ci"}, //, "jenkinsci"},
},
{
input: []string{"jenkins_ci"},
expected: []string{"jenkins_ci", "jenkins-ci"}, //, "jenkinsci"},
},
{
input: []string{"jenkins"},
expected: []string{"jenkins"},
},
{
input: []string{"jenkins-ci", "circle-ci"},
expected: []string{"jenkins-ci", "jenkins_ci", "circle-ci", "circle_ci"}, //, "jenkinsci", "circleci"},
},
}
for _, test := range tests {
t.Run(strings.Join(test.input, ","), func(t *testing.T) {
val := newFieldCandidateSet(test.input...)
addDelimiterVariations(val)
assert.ElementsMatch(t, test.expected, val.values())
})
}
}

View File

@ -0,0 +1,59 @@
package cpe
import (
"net/url"
"strings"
)
// candidateProductForGo attempts to find a single product name in a best-effort attempt. This implementation prefers
// to return no vendor over returning potentially nonsensical results.
func candidateProductForGo(name string) string {
// note: url.Parse requires a scheme for correct processing, which a golang module will not have, so one is provided.
u, err := url.Parse("http://" + name)
if err != nil {
return ""
}
cleanPath := strings.Trim(u.Path, "/")
pathElements := strings.Split(cleanPath, "/")
switch u.Host {
case "golang.org", "gopkg.in":
return cleanPath
case "google.golang.org":
return pathElements[0]
}
if len(pathElements) < 2 {
return ""
}
return pathElements[1]
}
// candidateVendorForGo attempts to find a single vendor name in a best-effort attempt. This implementation prefers
// to return no vendor over returning potentially nonsensical results.
func candidateVendorForGo(name string) string {
// note: url.Parse requires a scheme for correct processing, which a golang module will not have, so one is provided.
u, err := url.Parse("http://" + name)
if err != nil {
return ""
}
cleanPath := strings.Trim(u.Path, "/")
switch u.Host {
case "google.golang.org":
return "google"
case "golang.org":
return "golang"
case "gopkg.in":
return ""
}
pathElements := strings.Split(cleanPath, "/")
if len(pathElements) < 2 {
return ""
}
return pathElements[0]
}

View File

@ -0,0 +1,99 @@
package cpe
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestCandidateProductForGo(t *testing.T) {
tests := []struct {
pkg string
expected string
}{
{
pkg: "github.com/someone/something",
expected: "something",
},
{
pkg: "golang.org/x/xerrors",
expected: "x/xerrors",
},
{
pkg: "gopkg.in/yaml.v2",
expected: "yaml.v2",
},
{
pkg: "place",
expected: "",
},
{
pkg: "place.com/",
expected: "",
},
{
pkg: "place.com/someone-or-thing",
expected: "",
},
{
pkg: "google.golang.org/genproto/googleapis/rpc/status",
expected: "genproto",
},
{
pkg: "github.com/someone/something/long/package/name",
expected: "something",
},
}
for _, test := range tests {
t.Run(test.pkg, func(t *testing.T) {
assert.Equal(t, test.expected, candidateProductForGo(test.pkg))
})
}
}
func TestCandidateVendorForGo(t *testing.T) {
tests := []struct {
pkg string
expected string
}{
{
pkg: "github.com/someone/something",
expected: "someone",
},
{
pkg: "golang.org/x/xerrors",
expected: "golang",
},
{
pkg: "gopkg.in/yaml.v2",
expected: "",
},
{
pkg: "place",
expected: "",
},
{
pkg: "place.com/",
expected: "",
},
{
pkg: "place.com/someone-or-thing",
expected: "",
},
{
pkg: "google.golang.org/genproto/googleapis/rpc/status",
expected: "google",
},
{
pkg: "github.com/someone/something/long/package/name",
expected: "someone",
},
}
for _, test := range tests {
t.Run(test.pkg, func(t *testing.T) {
assert.Equal(t, test.expected, candidateVendorForGo(test.pkg))
})
}
}

View File

@ -0,0 +1,283 @@
package cpe
import (
"strings"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/pkg"
"github.com/scylladb/go-set/strset"
)
var (
forbiddenProductGroupIDFields = strset.New("plugin", "plugins", "client")
forbiddenVendorGroupIDFields = strset.New("plugin", "plugins")
domains = []string{
"com",
"org",
"net",
"io",
}
primaryJavaManifestGroupIDFields = []string{
"Extension-Name",
"Specification-Vendor",
"Implementation-Vendor",
"Bundle-SymbolicName",
"Implementation-Vendor-Id",
"Implementation-Title",
"Bundle-Activator",
}
secondaryJavaManifestGroupIDFields = []string{
"Automatic-Module-Name",
"Main-Class",
"Package",
}
javaManifestNameFields = []string{
"Specification-Vendor",
"Implementation-Vendor",
}
)
func candidateProductsForJava(p pkg.Package) []string {
return productsFromArtifactAndGroupIDs(artifactIDFromJavaPackage(p), groupIDsFromJavaPackage(p))
}
func candidateVendorsForJava(p pkg.Package) fieldCandidateSet {
gidVendors := vendorsFromGroupIDs(groupIDsFromJavaPackage(p))
nameVendors := vendorsFromJavaManifestNames(p)
return newFieldCandidateFromSets(gidVendors, nameVendors)
}
func vendorsFromJavaManifestNames(p pkg.Package) fieldCandidateSet {
vendors := newFieldCandidateSet()
metadata, ok := p.Metadata.(pkg.JavaMetadata)
if !ok {
return vendors
}
if metadata.Manifest == nil {
return vendors
}
for _, name := range javaManifestNameFields {
if metadata.Manifest.Main != nil {
if value, exists := metadata.Manifest.Main[name]; exists {
if !startsWithTopLevelDomain(value) {
vendors.add(fieldCandidate{
value: normalizePersonName(value),
disallowSubSelections: true,
})
}
}
}
if metadata.Manifest.NamedSections != nil {
for _, section := range metadata.Manifest.NamedSections {
if section == nil {
continue
}
if value, exists := section[name]; exists {
if !startsWithTopLevelDomain(value) {
vendors.add(fieldCandidate{
value: normalizePersonName(value),
disallowSubSelections: true,
})
}
}
}
}
}
return vendors
}
func vendorsFromGroupIDs(groupIDs []string) fieldCandidateSet {
vendors := newFieldCandidateSet()
for _, groupID := range groupIDs {
for i, field := range strings.Split(groupID, ".") {
field = strings.TrimSpace(field)
if len(field) == 0 {
continue
}
if forbiddenVendorGroupIDFields.Has(strings.ToLower(field)) {
continue
}
if i == 0 {
continue
}
vendors.addValue(field)
}
}
return vendors
}
func productsFromArtifactAndGroupIDs(artifactID string, groupIDs []string) []string {
products := strset.New()
if artifactID != "" {
products.Add(artifactID)
}
for _, groupID := range groupIDs {
isPlugin := strings.Contains(artifactID, "plugin") || strings.Contains(groupID, "plugin")
for i, field := range strings.Split(groupID, ".") {
field = strings.TrimSpace(field)
if len(field) == 0 {
continue
}
// don't add this field as a name if the name is implying the package is a plugin or client
if forbiddenProductGroupIDFields.Has(strings.ToLower(field)) {
continue
}
if i <= 1 {
continue
}
// umbrella projects tend to have sub components that either start or end with the project name. We expect
// to identify fields that may represent the umbrella project, and not fields that indicate auxiliary
// information about the package.
couldBeProjectName := strings.HasPrefix(artifactID, field) || strings.HasSuffix(artifactID, field)
if artifactID == "" || (couldBeProjectName && !isPlugin) {
products.Add(field)
}
}
}
return products.List()
}
func artifactIDFromJavaPackage(p pkg.Package) string {
metadata, ok := p.Metadata.(pkg.JavaMetadata)
if !ok {
return ""
}
if metadata.PomProperties == nil {
return ""
}
artifactID := strings.TrimSpace(metadata.PomProperties.ArtifactID)
if startsWithTopLevelDomain(artifactID) && len(strings.Split(artifactID, ".")) > 1 {
// there is a strong indication that the artifact ID is really a group ID, don't use it
return ""
}
return artifactID
}
func groupIDsFromJavaPackage(p pkg.Package) (groupIDs []string) {
metadata, ok := p.Metadata.(pkg.JavaMetadata)
if !ok {
return nil
}
groupIDs = append(groupIDs, groupIDsFromPomProperties(metadata.PomProperties)...)
groupIDs = append(groupIDs, groupIDsFromPomProject(metadata.PomProject)...)
groupIDs = append(groupIDs, groupIDsFromJavaManifest(metadata.Manifest)...)
return groupIDs
}
func groupIDsFromPomProperties(properties *pkg.PomProperties) (groupIDs []string) {
if properties == nil {
return nil
}
if startsWithTopLevelDomain(properties.GroupID) {
groupIDs = append(groupIDs, strings.TrimSpace(properties.GroupID))
}
// sometimes the publisher puts the group ID in the artifact ID field unintentionally
if startsWithTopLevelDomain(properties.ArtifactID) && len(strings.Split(properties.ArtifactID, ".")) > 1 {
// there is a strong indication that the artifact ID is really a group ID
groupIDs = append(groupIDs, strings.TrimSpace(properties.ArtifactID))
}
return groupIDs
}
func groupIDsFromPomProject(project *pkg.PomProject) (groupIDs []string) {
if project == nil {
return nil
}
// extract the project info...
groupIDs = addGroupIDsFromGroupIDsAndArtifactID(project.GroupID, project.ArtifactID)
if project.Parent == nil {
return groupIDs
}
// extract the parent project info...
groupIDs = append(groupIDs, addGroupIDsFromGroupIDsAndArtifactID(project.Parent.GroupID, project.Parent.ArtifactID)...)
return groupIDs
}
func addGroupIDsFromGroupIDsAndArtifactID(groupID, artifactID string) (groupIDs []string) {
if startsWithTopLevelDomain(groupID) {
groupIDs = append(groupIDs, strings.TrimSpace(groupID))
}
// sometimes the publisher puts the group ID in the artifact ID field unintentionally
if startsWithTopLevelDomain(artifactID) && len(strings.Split(artifactID, ".")) > 1 {
// there is a strong indication that the artifact ID is really a group ID
groupIDs = append(groupIDs, strings.TrimSpace(artifactID))
}
return groupIDs
}
func groupIDsFromJavaManifest(manifest *pkg.JavaManifest) []string {
if manifest == nil {
return nil
}
// try the common manifest fields first for a set of candidates
groupIDs := getManifestFieldGroupIDs(manifest, primaryJavaManifestGroupIDFields)
if len(groupIDs) != 0 {
return groupIDs
}
// if we haven't found anything yet, let's try a last ditch effort:
// attempt to get group-id-like info from the MANIFEST.MF "Automatic-Module-Name" and "Extension-Name" field.
// for more info see pkg:maven/commons-io/commons-io@2.8.0 within cloudbees/cloudbees-core-mm:2.263.4.2
// at /usr/share/jenkins/jenkins.war:WEB-INF/plugins/analysis-model-api.hpi:WEB-INF/lib/commons-io-2.8.0.jar
// as well as the ant package from cloudbees/cloudbees-core-mm:2.277.2.4-ra.
return getManifestFieldGroupIDs(manifest, secondaryJavaManifestGroupIDFields)
}
func getManifestFieldGroupIDs(manifest *pkg.JavaManifest, fields []string) (groupIDs []string) {
if manifest == nil {
return nil
}
for _, name := range fields {
if value, exists := manifest.Main[name]; exists {
if startsWithTopLevelDomain(value) {
groupIDs = append(groupIDs, value)
}
}
for _, section := range manifest.NamedSections {
if value, exists := section[name]; exists {
if startsWithTopLevelDomain(value) {
groupIDs = append(groupIDs, value)
}
}
}
}
return groupIDs
}
func startsWithTopLevelDomain(value string) bool {
return internal.HasAnyOfPrefixes(value, domains...)
}

View File

@ -0,0 +1,428 @@
package cpe
import (
"strings"
"testing"
"github.com/anchore/syft/syft/pkg"
"github.com/stretchr/testify/assert"
)
func Test_productsFromArtifactAndGroupIDs(t *testing.T) {
tests := []struct {
groupIDs []string
artifactID string
expected []string
}{
{
groupIDs: []string{"org.sonatype.nexus"},
artifactID: "nexus-extender",
expected: []string{"nexus", "nexus-extender"},
},
{
groupIDs: []string{"org.sonatype.nexus"},
expected: []string{"nexus"},
},
{
groupIDs: []string{"org.jenkins-ci.plugins"},
artifactID: "ant",
expected: []string{"ant"},
},
{
groupIDs: []string{"org.jenkins-ci.plugins"},
artifactID: "antisamy-markup-formatter",
expected: []string{"antisamy-markup-formatter"},
},
{
groupIDs: []string{"io.jenkins.plugins"},
artifactID: "aws-global-configuration",
expected: []string{"aws-global-configuration"},
},
{
groupIDs: []string{"com.cloudbees.jenkins.plugins"},
artifactID: "cloudbees-servicenow-jenkins-plugin",
expected: []string{"cloudbees-servicenow-jenkins-plugin"},
},
{
groupIDs: []string{"com.atlassian.confluence.plugins"},
artifactID: "confluence-mobile-plugin",
expected: []string{"confluence-mobile-plugin"},
},
{
groupIDs: []string{"com.atlassian.confluence.plugins"},
artifactID: "confluence-view-file-macro",
expected: []string{"confluence-view-file-macro"},
},
{
groupIDs: []string{"com.google.guava"},
artifactID: "failureaccess",
expected: []string{"failureaccess"},
},
}
for _, test := range tests {
t.Run(strings.Join(test.groupIDs, ",")+":"+test.artifactID, func(t *testing.T) {
actual := productsFromArtifactAndGroupIDs(test.artifactID, test.groupIDs)
assert.ElementsMatch(t, test.expected, actual, "different products")
})
}
}
func Test_candidateProductsForJava(t *testing.T) {
tests := []struct {
name string
pkg pkg.Package
expected []string
}{
{
name: "duplicate groupID in artifactID field",
pkg: pkg.Package{
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
GroupID: "org.sonatype.nexus",
ArtifactID: "org.sonatype.nexus",
},
},
},
expected: []string{"nexus"},
},
{
name: "detect groupID-like value in artifactID field",
pkg: pkg.Package{
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
ArtifactID: "org.sonatype.nexus",
},
},
},
expected: []string{"nexus"},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
actual := candidateProductsForJava(test.pkg)
assert.ElementsMatch(t, test.expected, actual, "different products")
})
}
}
func Test_vendorsFromGroupIDs(t *testing.T) {
tests := []struct {
groupID string
expected []string
}{
{
groupID: "org.sonatype.nexus",
expected: []string{"sonatype", "nexus"},
},
{
groupID: "org.jenkins-ci.plugins",
expected: []string{"jenkins-ci"},
},
{
groupID: "io.jenkins.plugins",
expected: []string{"jenkins"},
},
{
groupID: "com.cloudbees.jenkins.plugins",
expected: []string{"cloudbees", "jenkins"},
},
{
groupID: "com.atlassian.confluence.plugins",
expected: []string{"atlassian", "confluence"},
},
{
groupID: "com.google.guava",
expected: []string{"google", "guava"},
},
}
for _, test := range tests {
t.Run(test.groupID, func(t *testing.T) {
assert.ElementsMatch(t, test.expected, vendorsFromGroupIDs([]string{test.groupID}).values(), "different vendors")
})
}
}
func Test_groupIDsFromJavaPackage(t *testing.T) {
tests := []struct {
name string
pkg pkg.Package
expects []string
}{
{
name: "go case",
pkg: pkg.Package{
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
GroupID: "io.jenkins-ci.plugin.thing",
},
},
},
expects: []string{"io.jenkins-ci.plugin.thing"},
},
{
name: "from artifactID",
pkg: pkg.Package{
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
ArtifactID: "io.jenkins-ci.plugin.thing",
},
},
},
expects: []string{"io.jenkins-ci.plugin.thing"},
},
{
name: "from main Extension-Name field",
pkg: pkg.Package{
Metadata: pkg.JavaMetadata{
Manifest: &pkg.JavaManifest{
Main: map[string]string{
"Extension-Name": "io.jenkins-ci.plugin.thing",
},
},
},
},
expects: []string{"io.jenkins-ci.plugin.thing"},
},
{
name: "from named section Extension-Name field",
pkg: pkg.Package{
Metadata: pkg.JavaMetadata{
Manifest: &pkg.JavaManifest{
NamedSections: map[string]map[string]string{
"section": {
"Extension-Name": "io.jenkins-ci.plugin.thing",
},
},
},
},
},
expects: []string{"io.jenkins-ci.plugin.thing"},
},
{
name: "from main field - tier 1",
pkg: pkg.Package{
Metadata: pkg.JavaMetadata{
Manifest: &pkg.JavaManifest{
Main: map[string]string{
// positive cases
// tier 1
"Extension-Name": "io.jenkins-ci.plugin.1",
"Specification-Vendor": "io.jenkins-ci.plugin.2",
"Implementation-Vendor": "io.jenkins-ci.plugin.3",
"Bundle-SymbolicName": "io.jenkins-ci.plugin.4",
"Implementation-Vendor-Id": "io.jenkins-ci.plugin.5",
"Implementation-Title": "io.jenkins-ci.plugin.6",
"Bundle-Activator": "io.jenkins-ci.plugin.7",
// tier 2
"Automatic-Module-Name": "io.jenkins-ci.plugin.8",
"Main-Class": "io.jenkins-ci.plugin.9",
"Package": "io.jenkins-ci.plugin.10",
},
},
},
},
expects: []string{
"io.jenkins-ci.plugin.1",
"io.jenkins-ci.plugin.2",
"io.jenkins-ci.plugin.3",
"io.jenkins-ci.plugin.4",
"io.jenkins-ci.plugin.5",
"io.jenkins-ci.plugin.6",
"io.jenkins-ci.plugin.7",
},
},
{
name: "from main field - tier 2",
pkg: pkg.Package{
Metadata: pkg.JavaMetadata{
Manifest: &pkg.JavaManifest{
Main: map[string]string{
// positive cases
"Automatic-Module-Name": "io.jenkins-ci.plugin.8",
"Main-Class": "io.jenkins-ci.plugin.9",
"Package": "io.jenkins-ci.plugin.10",
},
},
},
},
expects: []string{
"io.jenkins-ci.plugin.8",
"io.jenkins-ci.plugin.9",
"io.jenkins-ci.plugin.10",
},
},
{
name: "from main field - negative cases",
pkg: pkg.Package{
Metadata: pkg.JavaMetadata{
Manifest: &pkg.JavaManifest{
Main: map[string]string{
// negative cases
"Extension-Name": "not.a-group.id",
"bogus": "io.jenkins-ci.plugin.please-dont-find-me",
},
},
},
},
expects: nil,
},
{
name: "from named section field - tier 1",
pkg: pkg.Package{
Metadata: pkg.JavaMetadata{
Manifest: &pkg.JavaManifest{
NamedSections: map[string]map[string]string{
"section": {
// positive cases
// tier 1
"Extension-Name": "io.jenkins-ci.plugin.1",
"Specification-Vendor": "io.jenkins-ci.plugin.2",
"Implementation-Vendor": "io.jenkins-ci.plugin.3",
"Bundle-SymbolicName": "io.jenkins-ci.plugin.4",
"Implementation-Vendor-Id": "io.jenkins-ci.plugin.5",
"Implementation-Title": "io.jenkins-ci.plugin.6",
"Bundle-Activator": "io.jenkins-ci.plugin.7",
// tier 2
"Automatic-Module-Name": "io.jenkins-ci.plugin.8",
"Main-Class": "io.jenkins-ci.plugin.9",
"Package": "io.jenkins-ci.plugin.10",
},
},
},
},
},
expects: []string{
"io.jenkins-ci.plugin.1",
"io.jenkins-ci.plugin.2",
"io.jenkins-ci.plugin.3",
"io.jenkins-ci.plugin.4",
"io.jenkins-ci.plugin.5",
"io.jenkins-ci.plugin.6",
"io.jenkins-ci.plugin.7",
},
},
{
name: "from named section field - negative cases",
pkg: pkg.Package{
Metadata: pkg.JavaMetadata{
Manifest: &pkg.JavaManifest{
NamedSections: map[string]map[string]string{
"section": {
// negative cases
"Extension-Name": "not.a-group.id",
"bogus": "io.jenkins-ci.plugin.please-dont-find-me",
},
},
},
},
},
expects: nil,
},
{
name: "no manifest or pom info",
pkg: pkg.Package{
Metadata: pkg.JavaMetadata{},
},
expects: nil,
},
{
name: "no java info",
pkg: pkg.Package{},
expects: nil,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
assert.ElementsMatch(t, test.expects, groupIDsFromJavaPackage(test.pkg))
})
}
}
func Test_artifactIDFromJavaPackage(t *testing.T) {
tests := []struct {
name string
pkg pkg.Package
expects string
}{
{
name: "go case",
pkg: pkg.Package{
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
ArtifactID: "cloudbees-installation-manager",
},
},
},
expects: "cloudbees-installation-manager",
},
{
name: "ignore groupID-like things",
pkg: pkg.Package{
Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{
ArtifactID: "io.jenkins-ci.plugin.thing",
},
},
},
expects: "",
},
{
name: "no java info",
pkg: pkg.Package{},
expects: "",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
assert.Equal(t, test.expects, artifactIDFromJavaPackage(test.pkg))
})
}
}
func Test_vendorsFromJavaManifestNames(t *testing.T) {
tests := []struct {
name string
pkg pkg.Package
expects []string
}{
{
name: "from manifest named section fields",
pkg: pkg.Package{
Metadata: pkg.JavaMetadata{
Manifest: &pkg.JavaManifest{
NamedSections: map[string]map[string]string{
"section": {
// positive cases
"Specification-Vendor": "Alex Goodman",
"Implementation-Vendor": "William Goodman",
},
},
},
},
},
expects: []string{"alex_goodman", "william_goodman"},
},
{
name: "from manifest named section fields - negative cases",
pkg: pkg.Package{
Metadata: pkg.JavaMetadata{
Manifest: &pkg.JavaManifest{
NamedSections: map[string]map[string]string{
"section": {
// negative cases
"Specification-Vendor": "io.jenkins-ci.plugin.thing",
"Implementation-Vendor-ID": "William Goodman",
},
},
},
},
},
expects: nil,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
assert.ElementsMatch(t, test.expects, vendorsFromJavaManifestNames(test.pkg).values())
})
}
}

View File

@ -0,0 +1,29 @@
package cpe
import "github.com/anchore/syft/syft/pkg"
func candidateVendorsForPython(p pkg.Package) fieldCandidateSet {
metadata, ok := p.Metadata.(pkg.PythonPackageMetadata)
if !ok {
return nil
}
vendors := newFieldCandidateSet()
if metadata.Author != "" {
vendors.add(fieldCandidate{
value: normalizePersonName(metadata.Author),
disallowSubSelections: true,
disallowDelimiterVariations: true,
})
}
if metadata.AuthorEmail != "" {
vendors.add(fieldCandidate{
value: normalizePersonName(stripEmailSuffix(metadata.AuthorEmail)),
disallowSubSelections: true,
})
}
return vendors
}

View File

@ -0,0 +1,21 @@
package cpe
import "github.com/anchore/syft/syft/pkg"
func candidateVendorsForRPM(p pkg.Package) fieldCandidateSet {
metadata, ok := p.Metadata.(pkg.RpmdbMetadata)
if !ok {
return nil
}
vendors := newFieldCandidateSet()
if metadata.Vendor != "" {
vendors.add(fieldCandidate{
value: normalizeName(metadata.Vendor),
disallowSubSelections: true,
})
}
return vendors
}

View File

@ -0,0 +1,21 @@
package cpe
import "github.com/anchore/syft/syft/pkg"
func candidateVendorsForRuby(p pkg.Package) fieldCandidateSet {
metadata, ok := p.Metadata.(pkg.GemMetadata)
if !ok {
return nil
}
vendors := newFieldCandidateSet()
for _, author := range metadata.Authors {
// author could be a name or an email
vendors.add(fieldCandidate{
value: normalizePersonName(stripEmailSuffix(author)),
disallowSubSelections: true,
})
}
return vendors
}

View File

@ -1,4 +1,4 @@
package cataloger
package cpe
import (
"sort"
@ -6,15 +6,15 @@ import (
"github.com/facebookincubator/nvdtools/wfn"
)
var _ sort.Interface = (*ByCPESpecificity)(nil)
var _ sort.Interface = (*BySpecificity)(nil)
type ByCPESpecificity []wfn.Attributes
type BySpecificity []wfn.Attributes
func (c ByCPESpecificity) Len() int { return len(c) }
func (c BySpecificity) Len() int { return len(c) }
func (c ByCPESpecificity) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
func (c BySpecificity) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
func (c ByCPESpecificity) Less(i, j int) bool {
func (c BySpecificity) Less(i, j int) bool {
iScore := weightedCountForSpecifiedFields(c[i])
jScore := weightedCountForSpecifiedFields(c[j])

View File

@ -1,4 +1,4 @@
package cataloger
package cpe
import (
"sort"
@ -84,7 +84,7 @@ func TestCPESpecificity(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
sort.Sort(ByCPESpecificity(test.input))
sort.Sort(BySpecificity(test.input))
assert.Equal(t, test.expected, test.input)
})
}

View File

@ -0,0 +1,21 @@
package cpe
import "strings"
func stripEmailSuffix(email string) string {
return strings.Split(email, "@")[0]
}
func normalizePersonName(name string) string {
name = strings.TrimSpace(strings.ToLower(name))
for _, value := range []string{"-", " ", "."} {
name = strings.ReplaceAll(name, value, "_")
}
return strings.TrimPrefix(name, "the_")
}
func normalizeName(name string) string {
name = strings.Split(name, ",")[0]
name = strings.TrimSpace(strings.ToLower(name))
return strings.ReplaceAll(name, " ", "")
}

View File

@ -0,0 +1,71 @@
package cpe
import (
"testing"
"github.com/stretchr/testify/assert"
)
func Test_normalizeName(t *testing.T) {
tests := []struct {
input string
expects string
}{
{
// note: extra spaces
input: " Alex Goodman ",
expects: "alexgoodman",
},
{
input: "Alex Goodman, LLC",
expects: "alexgoodman",
},
{
input: "alex.goodman",
expects: "alex.goodman",
},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
assert.Equal(t, test.expects, normalizeName(test.input))
})
}
}
func Test_normalizePersonName(t *testing.T) {
tests := []struct {
input string
expects string
}{
{
// note: extra spaces
input: " Alex Goodman ",
expects: "alex_goodman",
},
{
input: "Alex Goodman",
expects: "alex_goodman",
},
{
input: "Alex.Goodman",
expects: "alex_goodman",
},
{
input: "Alex.Goodman",
expects: "alex_goodman",
},
{
input: "AlexGoodman",
expects: "alexgoodman",
},
{
input: "The Apache Software Foundation",
expects: "apache_software_foundation",
},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
assert.Equal(t, test.expects, normalizePersonName(test.input))
})
}
}

View File

@ -1,503 +0,0 @@
package cataloger
import (
"bufio"
"bytes"
"fmt"
"net/url"
"sort"
"strings"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/pkg"
"github.com/facebookincubator/nvdtools/wfn"
)
var domains = []string{
"com",
"org",
"net",
"io",
}
var (
forbiddenProductGroupIDFields = strset.New("plugin", "plugins", "client")
forbiddenVendorGroupIDFields = strset.New("plugin", "plugins")
)
var productCandidatesByPkgType = candidateStore{
pkg.JavaPkg: {
"springframework": []string{"spring_framework", "springsource_spring_framework"},
"spring-core": []string{"spring_framework", "springsource_spring_framework"},
},
pkg.NpmPkg: {
"hapi": []string{"hapi_server_framework"},
"handlebars.js": []string{"handlebars"},
"is-my-json-valid": []string{"is_my_json_valid"},
"mustache": []string{"mustache.js"},
},
pkg.GemPkg: {
"Arabic-Prawn": []string{"arabic_prawn"},
"bio-basespace-sdk": []string{"basespace_ruby_sdk"},
"cremefraiche": []string{"creme_fraiche"},
"html-sanitizer": []string{"html_sanitizer"},
"sentry-raven": []string{"raven-ruby"},
"RedCloth": []string{"redcloth_library"},
"VladTheEnterprising": []string{"vladtheenterprising"},
"yajl-ruby": []string{"yajl-ruby_gem"},
},
pkg.PythonPkg: {
"python-rrdtool": []string{"rrdtool"},
},
}
// this is a static mapping of known package names (keys) to official cpe names for each package
type candidateStore map[pkg.Type]map[string][]string
func (s candidateStore) getCandidates(t pkg.Type, key string) []string {
if _, ok := s[t]; !ok {
return nil
}
value, ok := s[t][key]
if !ok {
return nil
}
return value
}
func newCPE(product, vendor, version, targetSW string) wfn.Attributes {
cpe := *(wfn.NewAttributesWithAny())
cpe.Part = "a"
cpe.Product = product
cpe.Vendor = vendor
cpe.Version = version
cpe.TargetSW = targetSW
return cpe
}
func filterCPEs(cpes []pkg.CPE, p pkg.Package, filters ...filterFn) (result []pkg.CPE) {
cpeLoop:
for _, cpe := range cpes {
for _, fn := range filters {
if fn(cpe, p) {
continue cpeLoop
}
}
// all filter functions passed on filtering this CPE
result = append(result, cpe)
}
return result
}
// generatePackageCPEs Create a list of CPEs, trying to guess the vendor, product tuple and setting TargetSoftware if possible
func generatePackageCPEs(p pkg.Package) []pkg.CPE {
targetSws := candidateTargetSoftwareAttrs(p)
vendors := candidateVendors(p)
products := candidateProducts(p)
if len(products) == 0 {
return nil
}
keys := internal.NewStringSet()
cpes := make([]pkg.CPE, 0)
for _, product := range products {
for _, vendor := range vendors {
for _, targetSw := range append([]string{wfn.Any}, targetSws...) {
// prevent duplicate entries...
key := fmt.Sprintf("%s|%s|%s|%s", product, vendor, p.Version, targetSw)
if keys.Contains(key) {
continue
}
keys.Add(key)
// add a new entry...
c := newCPE(product, vendor, p.Version, targetSw)
cpes = append(cpes, c)
}
}
}
// filter out any known combinations that don't accurately represent this package
cpes = filterCPEs(cpes, p, cpeFilters...)
sort.Sort(ByCPESpecificity(cpes))
return cpes
}
func candidateTargetSoftwareAttrs(p pkg.Package) []string {
// TODO: would be great to allow these to be overridden by user data/config
var targetSw []string
switch p.Language {
case pkg.Java:
targetSw = append(targetSw, candidateTargetSoftwareAttrsForJava(p)...)
case pkg.JavaScript:
targetSw = append(targetSw, "node.js", "nodejs")
case pkg.Ruby:
targetSw = append(targetSw, "ruby", "rails")
case pkg.Python:
targetSw = append(targetSw, "python")
case pkg.Go:
targetSw = append(targetSw, "go", "golang")
}
return targetSw
}
func candidateTargetSoftwareAttrsForJava(p pkg.Package) []string {
// Use the more specific indicator if available
if p.Type == pkg.JenkinsPluginPkg {
return []string{"jenkins", "cloudbees_jenkins"}
}
return []string{"java", "maven"}
}
func candidateVendors(p pkg.Package) []string {
// TODO: Confirm whether using products as vendors is helpful to the matching process
vendors := strset.New(candidateProducts(p)...)
switch p.Language {
case pkg.Ruby:
vendors.Add("ruby-lang")
case pkg.Java:
if p.MetadataType == pkg.JavaMetadataType {
vendors.Add(candidateVendorsForJava(p)...)
}
case pkg.Go:
// replace all candidates with only the golang-specific helper
vendors.Clear()
vendor := candidateVendorForGo(p.Name)
if vendor != "" {
vendors.Add(vendor)
}
}
// try swapping hyphens for underscores, vice versa, and removing separators altogether
addSeparatorVariations(vendors)
// generate sub-selections of each candidate based on separators (e.g. jenkins-ci -> [jenkins, jenkins-ci])
return generateAllSubSelections(vendors.List())
}
func candidateProducts(p pkg.Package) []string {
products := strset.New(p.Name)
switch {
case p.Language == pkg.Python:
if !strings.HasPrefix(p.Name, "python") {
products.Add("python-" + p.Name)
}
case p.Language == pkg.Java || p.MetadataType == pkg.JavaMetadataType:
products.Add(candidateProductsForJava(p)...)
case p.Language == pkg.Go:
// replace all candidates with only the golang-specific helper
products.Clear()
prod := candidateProductForGo(p.Name)
if prod != "" {
products.Add(prod)
}
}
// try swapping hyphens for underscores, vice versa, and removing separators altogether
addSeparatorVariations(products)
// prepend any known product name swaps prepended to the results
return append(productCandidatesByPkgType.getCandidates(p.Type, p.Name), products.List()...)
}
// candidateProductForGo attempts to find a single product name in a best-effort attempt. This implementation prefers
// to return no vendor over returning potentially nonsensical results.
func candidateProductForGo(name string) string {
// note: url.Parse requires a scheme for correct processing, which a golang module will not have, so one is provided.
u, err := url.Parse("http://" + name)
if err != nil {
return ""
}
cleanPath := strings.Trim(u.Path, "/")
pathElements := strings.Split(cleanPath, "/")
switch u.Host {
case "golang.org", "gopkg.in":
return cleanPath
case "google.golang.org":
return pathElements[0]
}
if len(pathElements) < 2 {
return ""
}
return pathElements[1]
}
// candidateVendorForGo attempts to find a single vendor name in a best-effort attempt. This implementation prefers
// to return no vendor over returning potentially nonsensical results.
func candidateVendorForGo(name string) string {
// note: url.Parse requires a scheme for correct processing, which a golang module will not have, so one is provided.
u, err := url.Parse("http://" + name)
if err != nil {
return ""
}
cleanPath := strings.Trim(u.Path, "/")
switch u.Host {
case "google.golang.org":
return "google"
case "golang.org":
return "golang"
case "gopkg.in":
return ""
}
pathElements := strings.Split(cleanPath, "/")
if len(pathElements) < 2 {
return ""
}
return pathElements[0]
}
func candidateProductsForJava(p pkg.Package) []string {
return productsFromArtifactAndGroupIDs(artifactIDFromJavaPackage(p), groupIDsFromJavaPackage(p))
}
func candidateVendorsForJava(p pkg.Package) []string {
return vendorsFromGroupIDs(groupIDsFromJavaPackage(p))
}
func vendorsFromGroupIDs(groupIDs []string) []string {
vendors := strset.New()
for _, groupID := range groupIDs {
for i, field := range strings.Split(groupID, ".") {
field = strings.TrimSpace(field)
if len(field) == 0 {
continue
}
if forbiddenVendorGroupIDFields.Has(strings.ToLower(field)) {
continue
}
if i == 0 {
continue
}
// e.g. jenkins-ci -> [jenkins-ci, jenkins]
vendors.Add(generateSubSelections(field)...)
}
}
return vendors.List()
}
func productsFromArtifactAndGroupIDs(artifactID string, groupIDs []string) []string {
products := strset.New()
if artifactID != "" {
products.Add(artifactID)
}
for _, groupID := range groupIDs {
isPlugin := strings.Contains(artifactID, "plugin") || strings.Contains(groupID, "plugin")
for i, field := range strings.Split(groupID, ".") {
field = strings.TrimSpace(field)
if len(field) == 0 {
continue
}
// don't add this field as a name if the name is implying the package is a plugin or client
if forbiddenProductGroupIDFields.Has(strings.ToLower(field)) {
continue
}
if i <= 1 {
continue
}
// umbrella projects tend to have sub components that either start or end with the project name. We want
// to identify fields that may represent the umbrella project, and not fields that indicate auxiliary
// information about the package.
couldBeProjectName := strings.HasPrefix(artifactID, field) || strings.HasSuffix(artifactID, field)
if artifactID == "" || (couldBeProjectName && !isPlugin) {
products.Add(field)
}
}
}
return products.List()
}
func artifactIDFromJavaPackage(p pkg.Package) string {
metadata, ok := p.Metadata.(pkg.JavaMetadata)
if !ok {
return ""
}
if metadata.PomProperties == nil {
return ""
}
artifactID := strings.TrimSpace(metadata.PomProperties.ArtifactID)
if startsWithDomain(artifactID) && len(strings.Split(artifactID, ".")) > 1 {
// there is a strong indication that the artifact ID is really a group ID, don't use it
return ""
}
return artifactID
}
func groupIDsFromJavaPackage(p pkg.Package) (groupIDs []string) {
metadata, ok := p.Metadata.(pkg.JavaMetadata)
if !ok {
return nil
}
groupIDs = append(groupIDs, groupIDsFromPomProperties(metadata.PomProperties)...)
groupIDs = append(groupIDs, groupIDsFromJavaManifest(metadata.Manifest)...)
return groupIDs
}
func groupIDsFromPomProperties(properties *pkg.PomProperties) (groupIDs []string) {
if properties == nil {
return nil
}
if startsWithDomain(properties.GroupID) {
groupIDs = append(groupIDs, strings.TrimSpace(properties.GroupID))
}
// sometimes the publisher puts the group ID in the artifact ID field unintentionally
if startsWithDomain(properties.ArtifactID) && len(strings.Split(properties.ArtifactID, ".")) > 1 {
// there is a strong indication that the artifact ID is really a group ID
groupIDs = append(groupIDs, strings.TrimSpace(properties.ArtifactID))
}
return groupIDs
}
func groupIDsFromJavaManifest(manifest *pkg.JavaManifest) (groupIDs []string) {
if manifest == nil {
return nil
}
// attempt to get group-id-like info from the MANIFEST.MF "Automatic-Module-Name" and "Extension-Name" field.
// for more info see pkg:maven/commons-io/commons-io@2.8.0 within cloudbees/cloudbees-core-mm:2.263.4.2
// at /usr/share/jenkins/jenkins.war:WEB-INF/plugins/analysis-model-api.hpi:WEB-INF/lib/commons-io-2.8.0.jar
// as well as the ant package from cloudbees/cloudbees-core-mm:2.277.2.4-ra.
for name, value := range manifest.Main {
value = strings.TrimSpace(value)
switch name {
case "Extension-Name", "Automatic-Module-Name":
if startsWithDomain(value) {
groupIDs = append(groupIDs, value)
}
}
}
for _, section := range manifest.NamedSections {
for name, value := range section {
value = strings.TrimSpace(value)
switch name {
case "Extension-Name", "Automatic-Module-Name":
if startsWithDomain(value) {
groupIDs = append(groupIDs, value)
}
}
}
}
return groupIDs
}
func startsWithDomain(value string) bool {
return internal.HasAnyOfPrefixes(value, domains...)
}
func generateAllSubSelections(fields []string) (results []string) {
for _, field := range fields {
results = append(results, generateSubSelections(field)...)
}
return results
}
// generateSubSelections attempts to split a field by hyphens and underscores and return a list of sensible sub-selections
// that can be used as product or vendor candidates. E.g. jenkins-ci-tools -> [jenkins-ci-tools, jenkins-ci, jenkins].
func generateSubSelections(field string) (results []string) {
scanner := bufio.NewScanner(strings.NewReader(field))
scanner.Split(scanByHyphenOrUnderscore)
var lastToken uint8
for scanner.Scan() {
rawCandidate := scanner.Text()
if len(rawCandidate) == 0 {
break
}
// trim any number of hyphen or underscore that is prefixed/suffixed on the given candidate. Since
// scanByHyphenOrUnderscore preserves delimiters (hyphens and underscores) they are guaranteed to be at least
// prefixed.
candidate := strings.TrimFunc(rawCandidate, trimHyphenOrUnderscore)
// capture the result (if there is content)
if len(candidate) > 0 {
if len(results) > 0 {
results = append(results, results[len(results)-1]+string(lastToken)+candidate)
} else {
results = append(results, candidate)
}
}
// keep track of the trailing separator for the next loop
lastToken = rawCandidate[len(rawCandidate)-1]
}
return results
}
// trimHyphenOrUnderscore is a character filter function for use with strings.TrimFunc in order to remove any hyphen or underscores.
func trimHyphenOrUnderscore(r rune) bool {
switch r {
case '-', '_':
return true
}
return false
}
// scanByHyphenOrUnderscore splits on hyphen or underscore and includes the separator in the split
func scanByHyphenOrUnderscore(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if i := bytes.IndexAny(data, "-_"); i >= 0 {
return i + 1, data[0 : i+1], nil
}
if atEOF {
return len(data), data, nil
}
return 0, nil, nil
}
func addSeparatorVariations(fields *strset.Set) {
for _, field := range fields.List() {
hasHyphen := strings.Contains(field, "-")
hasUnderscore := strings.Contains(field, "_")
if hasHyphen {
// provide variations of hyphen candidates with an underscore
fields.Add(strings.ReplaceAll(field, "-", "_"))
}
if hasUnderscore {
// provide variations of underscore candidates with a hyphen
fields.Add(strings.ReplaceAll(field, "_", "-"))
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -24,10 +24,10 @@ import (
// and case-insensitive, respectively). "Ungreedy" is important so that the '*' that trails the package name
// component doesn't consume the rest of the string.
//
// [[:alpha:]][[:word:]]* ... Matches any word, and the word can include "word" characters (
// which includes numbers and underscores), but the first character of the word MUST be a letter.
// [[:alpha:]][[:word:].]* ... Matches any word, and the word can include "word" characters (
// which includes numbers and underscores), and periods, but the first character of the word MUST be a letter.
//
// (?:\.[[:alpha:]][[:word:]]*)* ... This looks redundant, but it's not. It
// (?:\.[[:alpha:]][[:word:].]*)* ... This looks redundant, but it's not. It
// extends the previous pattern such that the net effect of both components is
// that words can also include a period and more words (thus, when combined, not
// only is "something" matched, but so is "com.prefix.thing"
@ -47,7 +47,7 @@ import (
// my-http2-server-5 --> name="my-http2-server", version="5"
// jetpack-build235-rc5 --> name="jetpack", version="build2.0-rc5"
// ironman-r4-2009 --> name="ironman", version="r4-2009"
var nameAndVersionPattern = regexp.MustCompile(`(?Ui)^(?P<name>(?:[[:alpha:]][[:word:]]*(?:\.[[:alpha:]][[:word:]]*)*-?)+)(?:-(?P<version>(?:\d.*|(?:build\d*.*)|(?:rc?\d+(?:^[[:alpha:]].*)?))))?$`)
var nameAndVersionPattern = regexp.MustCompile(`(?Ui)^(?P<name>(?:[[:alpha:]][[:word:].]*(?:\.[[:alpha:]][[:word:].]*)*-?)+)(?:-(?P<version>(?:\d.*|(?:build\d*.*)|(?:rc?\d+(?:^[[:alpha:]].*)?))))?$`)
type archiveFilename struct {
raw string

View File

@ -114,12 +114,22 @@ func TestExtractInfoFromJavaArchiveFilename(t *testing.T) {
ty: pkg.JavaPkg,
},
{
filename: "BOOT-INF/lib/spring-data-r2dbc-1.1.0.RELEASE.jar", // Regression: https://github.com/anchore/syft/issues/255
// regression: https://github.com/anchore/syft/issues/255
filename: "BOOT-INF/lib/spring-data-r2dbc-1.1.0.RELEASE.jar",
version: "1.1.0.RELEASE",
extension: "jar",
name: "spring-data-r2dbc",
ty: pkg.JavaPkg,
},
{
// regression for artifact of the same name within jboss/keycloak:13.0.1 docker image
// which covers package name components with periods in them
filename: "jboss-saaj-api_1.4_spec-1.0.2.Final.jar",
version: "1.0.2.Final",
extension: "jar",
name: "jboss-saaj-api_1.4_spec",
ty: pkg.JavaPkg,
},
}
for _, test := range tests {

View File

@ -3,6 +3,7 @@ package java
import (
"fmt"
"io"
"path"
"strings"
"github.com/anchore/syft/internal/log"
@ -88,8 +89,8 @@ func (j *archiveParser) parse() ([]pkg.Package, error) {
return nil, fmt.Errorf("could not generate package from %s: %w", j.virtualPath, err)
}
// find aux packages from pom.properties and potentially modify the existing parentPkg
auxPkgs, err := j.discoverPkgsFromAllPomProperties(parentPkg)
// find aux packages from pom.properties/pom.xml and potentially modify the existing parentPkg
auxPkgs, err := j.discoverPkgsFromAllMavenFiles(parentPkg)
if err != nil {
return nil, err
}
@ -150,80 +151,42 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
}, nil
}
// discoverPkgsFromAllPomProperties parses Maven POM properties for a given
// discoverPkgsFromAllMavenFiles parses Maven POM properties/xml for a given
// parent package, returning all listed Java packages found for each pom
// properties discovered and potentially updating the given parentPkg with new
// data.
func (j *archiveParser) discoverPkgsFromAllPomProperties(parentPkg *pkg.Package) ([]pkg.Package, error) {
func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([]pkg.Package, error) {
if parentPkg == nil {
return nil, nil
}
var pkgs []pkg.Package
// search and parse pom.properties files & fetch the contents
contentsOfPomPropertiesFiles, err := file.ContentsFromZip(j.archivePath, j.fileManifest.GlobMatch(pomPropertiesGlob)...)
properties, err := pomPropertiesByParentPath(j.archivePath, j.fileManifest.GlobMatch(pomPropertiesGlob), j.virtualPath)
if err != nil {
return nil, fmt.Errorf("unable to extract pom.properties: %w", err)
return nil, err
}
for filePath, fileContents := range contentsOfPomPropertiesFiles {
// parse the pom properties file into a rich object
pomProperties, err := parsePomProperties(filePath, strings.NewReader(fileContents))
if err != nil {
log.Warnf("failed to parse pom.properties (%s): %+v", j.virtualPath, err)
continue
projects, err := pomProjectByParentPath(j.archivePath, j.fileManifest.GlobMatch(pomXMLGlob), j.virtualPath)
if err != nil {
return nil, err
}
for parentPath, propertiesObj := range properties {
var pomProject *pkg.PomProject
if proj, exists := projects[parentPath]; exists {
pomProject = &proj
}
if pomProperties == nil {
continue
}
if pomProperties.Version == "" || pomProperties.ArtifactID == "" {
// TODO: if there is no parentPkg (no java manifest) one of these poms could be the parent. We should discover the right parent and attach the correct info accordingly to each discovered package
continue
}
pkgFromPom := j.newPackageFromPomProperties(*pomProperties, parentPkg)
pkgFromPom := newPackageFromMavenData(propertiesObj, pomProject, parentPkg, j.virtualPath)
if pkgFromPom != nil {
pkgs = append(pkgs, *pkgFromPom)
}
}
return pkgs, nil
}
// packagesFromPomProperties processes a single Maven POM properties for a given parent package, returning all listed Java packages found and
// associating each discovered package to the given parent package.
func (j *archiveParser) newPackageFromPomProperties(pomProperties pkg.PomProperties, parentPkg *pkg.Package) *pkg.Package {
// keep the artifact name within the virtual path if this package does not match the parent package
vPathSuffix := ""
if !strings.HasPrefix(pomProperties.ArtifactID, parentPkg.Name) {
vPathSuffix += ":" + pomProperties.ArtifactID
}
virtualPath := j.virtualPath + vPathSuffix
// discovered props = new package
p := pkg.Package{
Name: pomProperties.ArtifactID,
Version: pomProperties.Version,
Language: pkg.Java,
Type: pomProperties.PkgTypeIndicated(),
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{
VirtualPath: virtualPath,
PomProperties: &pomProperties,
Parent: parentPkg,
},
}
if packageIdentitiesMatch(p, parentPkg) {
updatePackage(p, parentPkg)
return nil
}
return &p
}
// discoverPkgsFromNestedArchives finds Java archives within Java archives, returning all listed Java packages found and
// associating each discovered package to the given parent package.
func (j *archiveParser) discoverPkgsFromNestedArchives(parentPkg *pkg.Package) ([]pkg.Package, error) {
@ -268,6 +231,95 @@ func (j *archiveParser) discoverPkgsFromNestedArchives(parentPkg *pkg.Package) (
return pkgs, nil
}
func pomPropertiesByParentPath(archivePath string, extractPaths []string, virtualPath string) (map[string]pkg.PomProperties, error) {
contentsOfMavenPropertiesFiles, err := file.ContentsFromZip(archivePath, extractPaths...)
if err != nil {
return nil, fmt.Errorf("unable to extract maven files: %w", err)
}
propertiesByParentPath := make(map[string]pkg.PomProperties)
for filePath, fileContents := range contentsOfMavenPropertiesFiles {
pomProperties, err := parsePomProperties(filePath, strings.NewReader(fileContents))
if err != nil {
log.Warnf("failed to parse pom.properties virtualPath=%q path=%q: %+v", virtualPath, filePath, err)
continue
}
if pomProperties == nil {
continue
}
if pomProperties.Version == "" || pomProperties.ArtifactID == "" {
// TODO: if there is no parentPkg (no java manifest) one of these poms could be the parent. We should discover the right parent and attach the correct info accordingly to each discovered package
continue
}
propertiesByParentPath[path.Dir(filePath)] = *pomProperties
}
return propertiesByParentPath, nil
}
func pomProjectByParentPath(archivePath string, extractPaths []string, virtualPath string) (map[string]pkg.PomProject, error) {
contentsOfMavenProjectFiles, err := file.ContentsFromZip(archivePath, extractPaths...)
if err != nil {
return nil, fmt.Errorf("unable to extract maven files: %w", err)
}
projectByParentPath := make(map[string]pkg.PomProject)
for filePath, fileContents := range contentsOfMavenProjectFiles {
pomProject, err := parsePomXML(filePath, strings.NewReader(fileContents))
if err != nil {
log.Warnf("failed to parse pom.xml virtualPath=%q path=%q: %+v", virtualPath, filePath, err)
continue
}
if pomProject == nil {
continue
}
if pomProject.Version == "" || pomProject.ArtifactID == "" {
// TODO: if there is no parentPkg (no java manifest) one of these poms could be the parent. We should discover the right parent and attach the correct info accordingly to each discovered package
continue
}
projectByParentPath[path.Dir(filePath)] = *pomProject
}
return projectByParentPath, nil
}
// packagesFromPomProperties processes a single Maven POM properties for a given parent package, returning all listed Java packages found and
// associating each discovered package to the given parent package. Note the pom.xml is optional, the pom.properties is not.
func newPackageFromMavenData(pomProperties pkg.PomProperties, pomProject *pkg.PomProject, parentPkg *pkg.Package, virtualPath string) *pkg.Package {
// keep the artifact name within the virtual path if this package does not match the parent package
vPathSuffix := ""
if !strings.HasPrefix(pomProperties.ArtifactID, parentPkg.Name) {
vPathSuffix += ":" + pomProperties.ArtifactID
}
virtualPath += vPathSuffix
// discovered props = new package
p := pkg.Package{
Name: pomProperties.ArtifactID,
Version: pomProperties.Version,
Language: pkg.Java,
Type: pomProperties.PkgTypeIndicated(),
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{
VirtualPath: virtualPath,
PomProperties: &pomProperties,
PomProject: pomProject,
Parent: parentPkg,
},
}
if packageIdentitiesMatch(p, parentPkg) {
updatePackage(p, parentPkg)
return nil
}
return &p
}
func packageIdentitiesMatch(p pkg.Package, parentPkg *pkg.Package) bool {
// the name/version pair matches...
if uniquePkgKey(&p) == uniquePkgKey(parentPkg) {

View File

@ -209,6 +209,15 @@ func TestParseJar(t *testing.T) {
Version: "2.9.2",
Extra: map[string]string{},
},
PomProject: &pkg.PomProject{
Path: "META-INF/maven/joda-time/joda-time/pom.xml",
GroupID: "joda-time",
ArtifactID: "joda-time",
Version: "2.9.2",
Name: "Joda-Time",
Description: "Date and time library to replace JDK date handling",
URL: "http://www.joda.org/joda-time/",
},
},
},
},
@ -281,7 +290,7 @@ func TestParseJar(t *testing.T) {
// write censored data back
a.Metadata = metadata
diffs := deep.Equal(a, e)
diffs := deep.Equal(e, a)
if len(diffs) > 0 {
t.Errorf("diffs found for %q", a.Name)
for _, d := range diffs {
@ -561,18 +570,19 @@ func TestParseNestedJar(t *testing.T) {
}
}
func TestPackagesFromPomProperties(t *testing.T) {
func Test_newPackageFromMavenData(t *testing.T) {
virtualPath := "given/virtual/path"
tests := []struct {
name string
props *pkg.PomProperties
props pkg.PomProperties
project *pkg.PomProject
parent *pkg.Package
expectedParent pkg.Package
expectedPackage *pkg.Package
}{
{
name: "go case: get a single package from pom properties",
props: &pkg.PomProperties{
props: pkg.PomProperties{
Name: "some-name",
GroupID: "some-group-id",
ArtifactID: "some-artifact-id",
@ -626,9 +636,91 @@ func TestPackagesFromPomProperties(t *testing.T) {
},
},
},
{
name: "get a single package from pom properties + project",
props: pkg.PomProperties{
Name: "some-name",
GroupID: "some-group-id",
ArtifactID: "some-artifact-id",
Version: "1.0",
},
project: &pkg.PomProject{
Parent: &pkg.PomParent{
GroupID: "some-parent-group-id",
ArtifactID: "some-parent-artifact-id",
Version: "1.0-parent",
},
Name: "some-name",
GroupID: "some-group-id",
ArtifactID: "some-artifact-id",
Version: "1.0",
Description: "desc",
URL: "aweso.me",
},
parent: &pkg.Package{
Name: "some-parent-name",
Version: "2.0",
Metadata: pkg.JavaMetadata{
VirtualPath: "some-parent-virtual-path",
Manifest: nil,
PomProperties: nil,
Parent: nil,
},
},
// note: the SAME as the original parent values
expectedParent: pkg.Package{
Name: "some-parent-name",
Version: "2.0",
Metadata: pkg.JavaMetadata{
VirtualPath: "some-parent-virtual-path",
Manifest: nil,
PomProperties: nil,
Parent: nil,
},
},
expectedPackage: &pkg.Package{
Name: "some-artifact-id",
Version: "1.0",
Language: pkg.Java,
Type: pkg.JavaPkg,
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{
VirtualPath: virtualPath + ":" + "some-artifact-id",
PomProperties: &pkg.PomProperties{
Name: "some-name",
GroupID: "some-group-id",
ArtifactID: "some-artifact-id",
Version: "1.0",
},
PomProject: &pkg.PomProject{
Parent: &pkg.PomParent{
GroupID: "some-parent-group-id",
ArtifactID: "some-parent-artifact-id",
Version: "1.0-parent",
},
Name: "some-name",
GroupID: "some-group-id",
ArtifactID: "some-artifact-id",
Version: "1.0",
Description: "desc",
URL: "aweso.me",
},
Parent: &pkg.Package{
Name: "some-parent-name",
Version: "2.0",
Metadata: pkg.JavaMetadata{
VirtualPath: "some-parent-virtual-path",
Manifest: nil,
PomProperties: nil,
Parent: nil,
},
},
},
},
},
{
name: "single package from pom properties that's a Jenkins plugin",
props: &pkg.PomProperties{
props: pkg.PomProperties{
Name: "some-name",
GroupID: "com.cloudbees.jenkins.plugins",
ArtifactID: "some-artifact-id",
@ -684,7 +776,7 @@ func TestPackagesFromPomProperties(t *testing.T) {
},
{
name: "child matches parent by key",
props: &pkg.PomProperties{
props: pkg.PomProperties{
Name: "some-name",
GroupID: "some-group-id",
ArtifactID: "some-parent-name", // note: matches parent package
@ -723,7 +815,7 @@ func TestPackagesFromPomProperties(t *testing.T) {
},
{
name: "child matches parent by key and is Jenkins plugin",
props: &pkg.PomProperties{
props: pkg.PomProperties{
Name: "some-name",
GroupID: "com.cloudbees.jenkins.plugins",
ArtifactID: "some-parent-name", // note: matches parent package
@ -761,7 +853,7 @@ func TestPackagesFromPomProperties(t *testing.T) {
},
{
name: "child matches parent by virtual path -- override name and version",
props: &pkg.PomProperties{
props: pkg.PomProperties{
Name: "some-name",
GroupID: "some-group-id",
ArtifactID: "some-parent-name", // note: DOES NOT match parent package
@ -799,7 +891,7 @@ func TestPackagesFromPomProperties(t *testing.T) {
},
{
name: "child matches parent by artifact id",
props: &pkg.PomProperties{
props: pkg.PomProperties{
Name: "some-name",
GroupID: "some-group-id",
ArtifactID: "some-parent-name", // note: matches parent package
@ -840,17 +932,7 @@ func TestPackagesFromPomProperties(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
// note: this zip doesn't matter, as long as it is a zip
nop, err := os.Open("test-fixtures/java-builds/packages/spring-boot-0.0.1-SNAPSHOT.jar")
assert.NoError(t, err)
// make the parser
parser, cleanup, err := newJavaArchiveParser(virtualPath, nop, false)
assert.NoError(t, err)
t.Cleanup(cleanup)
// get the test data
actualPackage := parser.newPackageFromPomProperties(*test.props, test.parent)
actualPackage := newPackageFromMavenData(test.props, test.project, test.parent, virtualPath)
assert.Equal(t, test.expectedPackage, actualPackage, "new package doesn't match")
assert.Equal(t, test.expectedParent, *test.parent, "parent doesn't match")
})

View File

@ -1,12 +1,11 @@
package java
import (
"encoding/json"
"os"
"testing"
"github.com/anchore/syft/syft/pkg"
"github.com/go-test/deep"
"github.com/stretchr/testify/assert"
)
func TestParseJavaPomProperties(t *testing.T) {
@ -67,28 +66,12 @@ func TestParseJavaPomProperties(t *testing.T) {
for _, test := range tests {
t.Run(test.expected.Path, func(t *testing.T) {
fixture, err := os.Open(test.expected.Path)
if err != nil {
t.Fatalf("could not open fixture: %+v", err)
}
assert.NoError(t, err)
actual, err := parsePomProperties(fixture.Name(), fixture)
if err != nil {
t.Fatalf("failed to parse manifest: %+v", err)
}
assert.NoError(t, err)
diffs := deep.Equal(actual, &test.expected)
if len(diffs) > 0 {
for _, d := range diffs {
t.Errorf("diff: %+v", d)
}
b, err := json.MarshalIndent(actual, "", " ")
if err != nil {
t.Fatalf("can't show results: %+v", err)
}
t.Errorf("full result: %s", string(b))
}
assert.Equal(t, &test.expected, actual)
})
}
}

View File

@ -0,0 +1,60 @@
package java
import (
"encoding/xml"
"fmt"
"io"
"strings"
"github.com/anchore/syft/syft/pkg"
"github.com/vifraa/gopom"
"golang.org/x/net/html/charset"
)
const pomXMLGlob = "*pom.xml"
func parsePomXML(path string, reader io.Reader) (*pkg.PomProject, error) {
var project gopom.Project
decoder := xml.NewDecoder(reader)
// prevent against warnings for "xml: encoding "iso-8859-1" declared but Decoder.CharsetReader is nil"
decoder.CharsetReader = charset.NewReaderLabel
if err := decoder.Decode(&project); err != nil {
return nil, fmt.Errorf("unable to unmarshal pom.xml: %w", err)
}
return &pkg.PomProject{
Path: path,
Parent: pomParent(project.Parent),
GroupID: project.GroupID,
ArtifactID: project.ArtifactID,
Version: project.Version,
Name: project.Name,
Description: cleanDescription(project.Description),
URL: project.URL,
}, nil
}
func pomParent(parent gopom.Parent) (result *pkg.PomParent) {
if parent.ArtifactID != "" || parent.GroupID != "" || parent.Version != "" {
result = &pkg.PomParent{
GroupID: parent.GroupID,
ArtifactID: parent.ArtifactID,
Version: parent.Version,
}
}
return result
}
func cleanDescription(original string) (cleaned string) {
descriptionLines := strings.Split(original, "\n")
for _, line := range descriptionLines {
line = strings.TrimSpace(line)
if len(line) == 0 {
continue
}
cleaned += line + " "
}
return strings.TrimSpace(cleaned)
}

View File

@ -0,0 +1,123 @@
package java
import (
"os"
"testing"
"github.com/vifraa/gopom"
"github.com/anchore/syft/syft/pkg"
"github.com/stretchr/testify/assert"
)
func Test_parsePomXML(t *testing.T) {
tests := []struct {
expected pkg.PomProject
}{
{
expected: pkg.PomProject{
Path: "test-fixtures/pom/commons-codec.pom.xml",
Parent: &pkg.PomParent{
GroupID: "org.apache.commons",
ArtifactID: "commons-parent",
Version: "42",
},
GroupID: "commons-codec",
ArtifactID: "commons-codec",
Version: "1.11",
Name: "Apache Commons Codec",
Description: "The Apache Commons Codec package contains simple encoder and decoders for various formats such as Base64 and Hexadecimal. In addition to these widely used encoders and decoders, the codec package also maintains a collection of phonetic encoding utilities.",
URL: "http://commons.apache.org/proper/commons-codec/",
},
},
}
for _, test := range tests {
t.Run(test.expected.Path, func(t *testing.T) {
fixture, err := os.Open(test.expected.Path)
assert.NoError(t, err)
actual, err := parsePomXML(fixture.Name(), fixture)
assert.NoError(t, err)
assert.Equal(t, &test.expected, actual)
})
}
}
func Test_pomParent(t *testing.T) {
tests := []struct {
name string
input gopom.Parent
expected *pkg.PomParent
}{
{
name: "only group ID",
input: gopom.Parent{
GroupID: "org.something",
},
expected: &pkg.PomParent{
GroupID: "org.something",
},
},
{
name: "only artifact ID",
input: gopom.Parent{
ArtifactID: "something",
},
expected: &pkg.PomParent{
ArtifactID: "something",
},
},
{
name: "only Version",
input: gopom.Parent{
Version: "something",
},
expected: &pkg.PomParent{
Version: "something",
},
},
{
name: "empty",
input: gopom.Parent{},
expected: nil,
},
{
name: "unused field",
input: gopom.Parent{
RelativePath: "something",
},
expected: nil,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
assert.Equal(t, test.expected, pomParent(test.input))
})
}
}
func Test_cleanDescription(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "indent + multiline",
input: ` The Apache Commons Codec package contains simple encoder and decoders for
various formats such as Base64 and Hexadecimal. In addition to these
widely used encoders and decoders, the codec package also maintains a
collection of phonetic encoding utilities.`,
expected: "The Apache Commons Codec package contains simple encoder and decoders for various formats such as Base64 and Hexadecimal. In addition to these widely used encoders and decoders, the codec package also maintains a collection of phonetic encoding utilities.",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
assert.Equal(t, test.expected, cleanDescription(test.input))
})
}
}

View File

@ -0,0 +1,410 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
To produce reports, use the reporting profile, for example: mvn -Preporting clean site
You may need to use the -U option to update your environment if you get an error.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.commons</groupId>
<artifactId>commons-parent</artifactId>
<version>42</version>
</parent>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<!-- Remember to update the version in default.properties as well -->
<version>1.11</version>
<name>Apache Commons Codec</name>
<inceptionYear>2002</inceptionYear>
<description>
The Apache Commons Codec package contains simple encoder and decoders for
various formats such as Base64 and Hexadecimal. In addition to these
widely used encoders and decoders, the codec package also maintains a
collection of phonetic encoding utilities.
</description>
<prerequisites>
<maven>3.0.0</maven>
</prerequisites>
<url>http://commons.apache.org/proper/commons-codec/</url>
<issueManagement>
<system>jira</system>
<url>http://issues.apache.org/jira/browse/CODEC</url>
</issueManagement>
<scm>
<connection>scm:svn:http://svn.apache.org/repos/asf/commons/proper/codec/trunk</connection>
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/commons/proper/codec/trunk</developerConnection>
<url>http://svn.apache.org/viewvc/commons/proper/codec/trunk</url>
</scm>
<distributionManagement>
<site>
<id>stagingSite</id>
<name>Apache Staging Website</name>
<url>${commons.deployment.protocol}://people.apache.org/www/commons.apache.org/${commons.componentid}/</url>
</site>
</distributionManagement>
<developers>
<developer>
<name>Henri Yandell</name>
<id>bayard</id>
<email>bayard@apache.org</email>
</developer>
<developer>
<name>Tim OBrien</name>
<id>tobrien</id>
<email>tobrien@apache.org</email>
<timezone>-6</timezone>
</developer>
<developer>
<name>Scott Sanders</name>
<id>sanders</id>
<email>sanders@totalsync.com</email>
</developer>
<developer>
<name>Rodney Waldhoff</name>
<id>rwaldhoff</id>
<email>rwaldhoff@apache.org</email>
</developer>
<developer>
<name>Daniel Rall</name>
<id>dlr</id>
<email>dlr@finemaltcoding.com</email>
</developer>
<developer>
<name>Jon S. Stevens</name>
<id>jon</id>
<email>jon@collab.net</email>
</developer>
<developer>
<name>Gary Gregory</name>
<id>ggregory</id>
<email>ggregory@apache.org</email>
<url>http://www.garygregory.com</url>
<timezone>-8</timezone>
</developer>
<developer>
<name>David Graham</name>
<id>dgraham</id>
<email>dgraham@apache.org</email>
</developer>
<developer>
<name>Julius Davies</name>
<id>julius</id>
<email>julius@apache.org</email>
<organizationUrl>http://juliusdavies.ca/</organizationUrl>
<timezone>-8</timezone>
</developer>
<developer>
<name>Thomas Neidhart</name>
<id>tn</id>
<email>tn@apache.org</email>
</developer>
</developers>
<contributors>
<contributor>
<name>Christopher O'Brien</name>
<email>siege@preoccupied.net</email>
<roles>
<role>hex</role>
<role>md5</role>
<role>architecture</role>
</roles>
</contributor>
<contributor>
<name>Martin Redington</name>
<roles>
<role>Representing xml-rpc</role>
</roles>
</contributor>
<contributor>
<name>Jeffery Dever</name>
<roles>
<role>Representing http-client</role>
</roles>
</contributor>
<contributor>
<name>Steve Zimmermann</name>
<email>steve.zimmermann@heii.com</email>
<roles>
<role>Documentation</role>
</roles>
</contributor>
<contributor>
<name>Benjamin Walstrum</name>
<email>ben@walstrum.com</email>
</contributor>
<contributor>
<name>Oleg Kalnichevski</name>
<email>oleg@ural.ru</email>
<roles>
<role>Representing http-client</role>
</roles>
</contributor>
<contributor>
<name>Dave Dribin</name>
<email>apache@dave.dribin.org</email>
<roles>
<role>DigestUtil</role>
</roles>
</contributor>
<contributor>
<name>Alex Karasulu</name>
<email>aok123 at bellsouth.net</email>
<roles>
<role>Submitted Binary class and test</role>
</roles>
</contributor>
<contributor>
<name>Matthew Inger</name>
<email>mattinger at yahoo.com</email>
<roles>
<role>Submitted DIFFERENCE algorithm for Soundex and RefinedSoundex</role>
</roles>
</contributor>
<contributor>
<name>Jochen Wiedmann</name>
<email>jochen@apache.org</email>
<roles>
<role>Base64 code [CODEC-69]</role>
</roles>
</contributor>
<contributor>
<name>Sebastian Bazley</name>
<email>sebb@apache.org</email>
<roles>
<role>Streaming Base64</role>
</roles>
</contributor>
<contributor>
<name>Matthew Pocock</name>
<email>turingatemyhamster@gmail.com</email>
<roles>
<role>Beider-Morse phonetic matching</role>
</roles>
</contributor>
<contributor>
<name>Colm Rice</name>
<email>colm_rice at hotmail dot com</email>
<roles>
<role>Submitted Match Rating Approach (MRA) phonetic encoder and tests [CODEC-161]</role>
</roles>
</contributor>
</contributors>
<!-- Codec only has test dependencies ATM -->
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.5</version>
<scope>test</scope>
</dependency>
</dependencies>
<properties>
<maven.compiler.source>1.6</maven.compiler.source>
<maven.compiler.target>1.6</maven.compiler.target>
<commons.componentid>codec</commons.componentid>
<commons.module.name>org.apache.commons.codec</commons.module.name>
<commons.release.version>1.11</commons.release.version>
<!-- The RC version used in the staging repository URL. -->
<commons.rc.version>RC1</commons.rc.version>
<commons.jira.id>CODEC</commons.jira.id>
<commons.jira.pid>12310464</commons.jira.pid>
<!-- Ensure copies work OK (can be removed later when this is in parent POM) -->
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<commons.encoding>UTF-8</commons.encoding>
<checkstyle.header.file>${basedir}/LICENSE-header.txt</checkstyle.header.file>
<checkstyle.version>2.17</checkstyle.version>
<commons.clirr.version>2.8</commons.clirr.version>
</properties>
<build>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-scm-publish-plugin</artifactId>
<version>${commons.scm-publish.version}</version>
<configuration>
<ignorePathsToDelete>
<ignorePathToDelete>archive**</ignorePathToDelete>
</ignorePathsToDelete>
</configuration>
</plugin>
</plugins>
</pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<configuration>
<excludes>
<exclude>src/site/resources/.htaccess</exclude>
</excludes>
</configuration>
</plugin>
<!-- Add Java 9 Automatic-Module-Name -->
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive combine.children="append">
<!-- Temporary fix, remove this after this has implemented in parent pom -->
<manifestEntries>
<Automatic-Module-Name>${commons.module.name}</Automatic-Module-Name>
</manifestEntries>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/*AbstractTest.java</exclude>
<exclude>**/*PerformanceTest.java</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>src/assembly/bin.xml</descriptor>
<descriptor>src/assembly/src.xml</descriptor>
</descriptors>
<tarLongFileMode>gnu</tarLongFileMode>
</configuration>
</plugin>
<!-- Allow use of mvn checkstyle:checkstyle. Must agree with reporting section below. -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-checkstyle-plugin</artifactId>
<version>${checkstyle.version}</version>
<configuration>
<configLocation>${basedir}/checkstyle.xml</configLocation>
<enableRulesSummary>false</enableRulesSummary>
<headerFile>${basedir}/LICENSE-header.txt</headerFile>
</configuration>
</plugin>
</plugins>
</build>
<reporting>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-checkstyle-plugin</artifactId>
<version>${checkstyle.version}</version>
<configuration>
<configLocation>${basedir}/checkstyle.xml</configLocation>
<enableRulesSummary>false</enableRulesSummary>
<headerFile>${basedir}/LICENSE-header.txt</headerFile>
</configuration>
<!-- We need to specify reportSets because 2.9.1 creates two reports -->
<reportSets>
<reportSet>
<reports>
<report>checkstyle</report>
</reports>
</reportSet>
</reportSets>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-pmd-plugin</artifactId>
<version>3.8</version>
<configuration>
<targetJdk>${maven.compiler.target}</targetJdk>
<linkXref>true</linkXref>
<rulesets>
<ruleset>${basedir}/pmd.xml</ruleset>
</rulesets>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>findbugs-maven-plugin</artifactId>
<version>${commons.findbugs.version}</version>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>taglist-maven-plugin</artifactId>
<version>2.4</version>
<configuration>
<tags>
<tag>TODO</tag>
<tag>NOPMD</tag>
<tag>NOTE</tag>
</tags>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>javancss-maven-plugin</artifactId>
<version>2.1</version>
</plugin>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<configuration>
<excludes>
<exclude>src/site/resources/.htaccess</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</reporting>
<profiles>
<profile>
<id>travis</id>
<activation>
<property>
<name>env.TRAVIS</name>
<value>true</value>
</property>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>cobertura-maven-plugin</artifactId>
<version>${commons.cobertura.version}</version>
<configuration>
<formats>
<format>xml</format>
</formats>
</configuration>
</plugin>
<plugin>
<groupId>org.eluder.coveralls</groupId>
<artifactId>coveralls-maven-plugin</artifactId>
<version>4.3.0</version>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

View File

@ -20,10 +20,11 @@ type JavaMetadata struct {
VirtualPath string `json:"virtualPath"`
Manifest *JavaManifest `mapstructure:"Manifest" json:"manifest,omitempty"`
PomProperties *PomProperties `mapstructure:"PomProperties" json:"pomProperties,omitempty"`
PomProject *PomProject `mapstructure:"PomProject" json:"pomProject,omitempty"`
Parent *Package `json:"-"`
}
// PomProperties represents the fields of interest extracted from a Java archive's pom.xml file.
// PomProperties represents the fields of interest extracted from a Java archive's pom.properties file.
type PomProperties struct {
Path string `mapstructure:"path" json:"path"`
Name string `mapstructure:"name" json:"name"`
@ -33,6 +34,25 @@ type PomProperties struct {
Extra map[string]string `mapstructure:",remain" json:"extraFields"`
}
// PomProject represents fields of interest extracted from a Java archive's pom.xml file. See https://maven.apache.org/ref/3.6.3/maven-model/maven.html for more details.
type PomProject struct {
Path string `json:"path"`
Parent *PomParent `json:"parent,omitempty"`
GroupID string `json:"groupId"`
ArtifactID string `json:"artifactId"`
Version string `json:"version"`
Name string `json:"name"`
Description string `json:"description,omitempty"`
URL string `json:"url,omitempty"`
}
// PomParent contains the fields within the <parent> tag in a pom.xml file
type PomParent struct {
GroupID string `json:"groupId"`
ArtifactID string `json:"artifactId"`
Version string `json:"version"`
}
// PkgTypeIndicated returns the package Type indicated by the data contained in the PomProperties.
func (p PomProperties) PkgTypeIndicated() Type {
if internal.HasAnyOfPrefixes(p.GroupID, JenkinsPluginPomPropertiesGroupIDs...) || strings.Contains(p.GroupID, ".jenkins.plugin") {