Add pURL generation for java packages + fix NPM pURL generation (#812)

* enhance pURL generation for java packages

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* optionally split out npm namespaces for pURL generation

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* nit updates

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2022-02-10 13:46:38 -05:00 committed by GitHub
parent 5ab872c732
commit ca032434b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 176 additions and 98 deletions

View File

@ -40,11 +40,11 @@ var (
)
func candidateProductsForJava(p pkg.Package) []string {
return productsFromArtifactAndGroupIDs(artifactIDFromJavaPackage(p), groupIDsFromJavaPackage(p))
return productsFromArtifactAndGroupIDs(artifactIDFromJavaPackage(p), GroupIDsFromJavaPackage(p))
}
func candidateVendorsForJava(p pkg.Package) fieldCandidateSet {
gidVendors := vendorsFromGroupIDs(groupIDsFromJavaPackage(p))
gidVendors := vendorsFromGroupIDs(GroupIDsFromJavaPackage(p))
nameVendors := vendorsFromJavaManifestNames(p)
return newFieldCandidateSetFromSets(gidVendors, nameVendors)
}
@ -173,7 +173,7 @@ func artifactIDFromJavaPackage(p pkg.Package) string {
return artifactID
}
func groupIDsFromJavaPackage(p pkg.Package) (groupIDs []string) {
func GroupIDsFromJavaPackage(p pkg.Package) (groupIDs []string) {
metadata, ok := p.Metadata.(pkg.JavaMetadata)
if !ok {
return nil

View File

@ -333,7 +333,7 @@ func Test_groupIDsFromJavaPackage(t *testing.T) {
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
assert.ElementsMatch(t, test.expects, groupIDsFromJavaPackage(test.pkg))
assert.ElementsMatch(t, test.expects, GroupIDsFromJavaPackage(test.pkg))
})
}
}

View File

@ -55,28 +55,13 @@ type archiveFilename struct {
version string
}
// TODO: Remove this method once we're using Go 1.15+.
//
// Go 1.15 introduces a `SubexpIndex` method for the Regexp type that would let
// this code be made more elegant. Once we've reached 1.15, we should eliminate
// this function in favor of that method.
func subexpIndex(re *regexp.Regexp, name string) int {
for i, subexpName := range re.SubexpNames() {
if subexpName == name {
return i
}
}
return -1
}
func getSubexp(matches []string, subexpName string, re *regexp.Regexp, raw string) string {
if len(matches) < 1 {
log.Warnf("unexpectedly empty matches for archive '%s'", raw)
return ""
}
index := subexpIndex(re, subexpName)
index := re.SubexpIndex(subexpName)
if index < 1 {
log.Warnf("unexpected index of '%s' capture group for Java archive '%s'", subexpName, raw)
return ""

View File

@ -122,6 +122,13 @@ func (j *archiveParser) parse() ([]*pkg.Package, []artifact.Relationship, error)
pkgs = append([]*pkg.Package{parentPkg}, pkgs...)
}
// add pURLs to all packages found
// note: since package information may change after initial creation when parsing multiple locations within the
// jar, we wait until the conclusion of the parsing process before synthesizing pURLs.
for _, p := range pkgs {
addPURL(p)
}
return pkgs, relationships, nil
}
@ -348,7 +355,7 @@ func newPackageFromMavenData(pomProperties pkg.PomProperties, pomProject *pkg.Po
}
if packageIdentitiesMatch(p, parentPkg) {
updatePackage(p, parentPkg)
updateParentPackage(p, parentPkg)
return nil
}
@ -379,7 +386,7 @@ func packageIdentitiesMatch(p pkg.Package, parentPkg *pkg.Package) bool {
return false
}
func updatePackage(p pkg.Package, parentPkg *pkg.Package) {
func updateParentPackage(p pkg.Package, parentPkg *pkg.Package) {
// we've run across more information about our parent package, add this info to the parent package metadata
// the pom properties is typically a better source of information for name and version than the manifest
parentPkg.Name = p.Name
@ -401,3 +408,17 @@ func updatePackage(p pkg.Package, parentPkg *pkg.Package) {
parentPkg.Metadata = parentMetadata
}
}
func addPURL(p *pkg.Package) {
purl := packageURL(*p)
if purl == "" {
return
}
metadata, ok := p.Metadata.(pkg.JavaMetadata)
if !ok {
return
}
metadata.PURL = purl
p.Metadata = metadata
}

View File

@ -134,6 +134,7 @@ func TestParseJar(t *testing.T) {
Version: "1.0-SNAPSHOT",
Extra: map[string]string{},
},
PURL: "pkg:maven/io.jenkins.plugins/example-jenkins-plugin@1.0-SNAPSHOT",
},
},
},
@ -154,6 +155,7 @@ func TestParseJar(t *testing.T) {
"Manifest-Version": "1.0",
},
},
PURL: "pkg:maven/example-java-app-gradle/example-java-app-gradle@0.1.0",
},
},
},
@ -191,6 +193,7 @@ func TestParseJar(t *testing.T) {
Version: "0.1.0",
Extra: map[string]string{},
},
PURL: "pkg:maven/org.anchore/example-java-app-maven@0.1.0",
},
},
"joda-time": {
@ -219,6 +222,7 @@ func TestParseJar(t *testing.T) {
Description: "Date and time library to replace JDK date handling",
URL: "http://www.joda.org/joda-time/",
},
PURL: "pkg:maven/joda-time/joda-time@2.9.2",
},
},
},

View File

@ -0,0 +1,25 @@
package java
import (
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common/cpe"
)
// PackageURL returns the PURL for the specific java package (see https://github.com/package-url/purl-spec)
func packageURL(p pkg.Package) string {
var groupID = p.Name
groupIDs := cpe.GroupIDsFromJavaPackage(p)
if len(groupIDs) > 0 {
groupID = groupIDs[0]
}
pURL := packageurl.NewPackageURL(
packageurl.TypeMaven, // TODO: should we filter down by package types here?
groupID,
p.Name,
p.Version,
nil, // TODO: there are probably several qualifiers that can be specified here
"")
return pURL.ToString()
}

View File

@ -0,0 +1,45 @@
package java
import (
"github.com/anchore/syft/syft/pkg"
"github.com/stretchr/testify/assert"
"testing"
)
func Test_packageURL(t *testing.T) {
tests := []struct {
pkg pkg.Package
expect string
}{
{
pkg: pkg.Package{
Name: "example-java-app-maven",
Version: "0.1.0",
Language: pkg.Java,
Type: pkg.JavaPkg,
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{
VirtualPath: "test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.jar",
Manifest: &pkg.JavaManifest{
Main: map[string]string{
"Manifest-Version": "1.0",
},
},
PomProperties: &pkg.PomProperties{
Path: "META-INF/maven/org.anchore/example-java-app-maven/pom.properties",
GroupID: "org.anchore",
ArtifactID: "example-java-app-maven",
Version: "0.1.0",
Extra: make(map[string]string),
},
},
},
expect: "pkg:maven/org.anchore/example-java-app-maven@0.1.0",
},
}
for _, tt := range tests {
t.Run(tt.expect, func(t *testing.T) {
assert.Equal(t, tt.expect, packageURL(tt.pkg))
})
}
}

View File

@ -5,13 +5,12 @@ import (
"github.com/anchore/syft/syft/linux"
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/internal"
)
var _ urlIdentifier = (*JavaMetadata)(nil)
var JenkinsPluginPomPropertiesGroupIDs = []string{
var jenkinsPluginPomPropertiesGroupIDs = []string{
"io.jenkins.plugins",
"org.jenkins.plugins",
"org.jenkins-ci.plugins",
@ -25,6 +24,7 @@ type JavaMetadata struct {
Manifest *JavaManifest `mapstructure:"Manifest" json:"manifest,omitempty"`
PomProperties *PomProperties `mapstructure:"PomProperties" json:"pomProperties,omitempty" cyclonedx:"-"`
PomProject *PomProject `mapstructure:"PomProject" json:"pomProject,omitempty"`
PURL string `hash:"ignore" json:"-"` // pURLs and CPEs are ignored for package IDs
Parent *Package `hash:"ignore" json:"-"` // note: the parent cannot be included in the minimal definition of uniqueness since this field is not reproducible in an encode-decode cycle (is lossy).
}
@ -59,7 +59,7 @@ type PomParent struct {
// PkgTypeIndicated returns the package Type indicated by the data contained in the PomProperties.
func (p PomProperties) PkgTypeIndicated() Type {
if internal.HasAnyOfPrefixes(p.GroupID, JenkinsPluginPomPropertiesGroupIDs...) || strings.Contains(p.GroupID, ".jenkins.plugin") {
if internal.HasAnyOfPrefixes(p.GroupID, jenkinsPluginPomPropertiesGroupIDs...) || strings.Contains(p.GroupID, ".jenkins.plugin") {
return JenkinsPluginPkg
}
@ -74,18 +74,5 @@ type JavaManifest struct {
// PackageURL returns the PURL for the specific Maven package (see https://github.com/package-url/purl-spec)
func (m JavaMetadata) PackageURL(_ *linux.Release) string {
if m.PomProperties != nil {
pURL := packageurl.NewPackageURL(
packageurl.TypeMaven,
m.PomProperties.GroupID,
m.PomProperties.ArtifactID,
m.PomProperties.Version,
nil, // TODO: there are probably several qualifiers that can be specified here
"")
return pURL.ToString()
}
// TODO: support non-maven artifacts
return ""
return m.PURL
}

View File

@ -3,7 +3,6 @@ package pkg
import (
"testing"
"github.com/sergi/go-diff/diffmatchpatch"
"github.com/stretchr/testify/assert"
)
@ -110,38 +109,3 @@ func TestPomProperties_PkgTypeIndicated(t *testing.T) {
})
}
}
func TestJavaMetadata_pURL(t *testing.T) {
tests := []struct {
metadata JavaMetadata
expected string
}{
{
metadata: JavaMetadata{
PomProperties: &PomProperties{
Path: "p",
Name: "n",
GroupID: "g.id",
ArtifactID: "a",
Version: "v",
},
},
expected: "pkg:maven/g.id/a@v",
},
{
metadata: JavaMetadata{},
expected: "",
},
}
for _, test := range tests {
t.Run(test.expected, func(t *testing.T) {
actual := test.metadata.PackageURL(nil)
if actual != test.expected {
dmp := diffmatchpatch.New()
diffs := dmp.DiffMain(test.expected, actual, true)
t.Errorf("diff: %s", dmp.DiffPrettyText(diffs))
}
})
}
}

View File

@ -1,6 +1,10 @@
package pkg
import "github.com/anchore/packageurl-go"
import (
"strings"
"github.com/anchore/packageurl-go"
)
// Language represents a single programming language.
type Language string
@ -43,16 +47,16 @@ func LanguageFromPURL(p string) Language {
}
func LanguageByName(name string) Language {
switch name {
case packageurl.TypeMaven, purlGradlePkgType:
switch strings.ToLower(name) {
case packageurl.TypeMaven, string(purlGradlePkgType), string(JavaPkg), string(Java):
return Java
case packageurl.TypeComposer:
case packageurl.TypeComposer, string(PhpComposerPkg), string(PHP):
return PHP
case packageurl.TypeGolang:
case packageurl.TypeGolang, string(GoModulePkg), string(Go):
return Go
case packageurl.TypeNPM:
case packageurl.TypeNPM, string(JavaScript):
return JavaScript
case packageurl.TypePyPi:
case packageurl.TypePyPi, string(Python):
return Python
case packageurl.TypeGem:
return Ruby

View File

@ -1,6 +1,8 @@
package pkg
import (
"strings"
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/linux"
)
@ -21,10 +23,19 @@ type NpmPackageJSONMetadata struct {
// PackageURL returns the PURL for the specific NPM package (see https://github.com/package-url/purl-spec)
func (p NpmPackageJSONMetadata) PackageURL(_ *linux.Release) string {
var namespace string
name := p.Name
fields := strings.SplitN(p.Name, "/", 2)
if len(fields) > 1 {
namespace = fields[0]
name = fields[1]
}
return packageurl.NewPackageURL(
packageurl.TypeNPM,
"",
p.Name,
namespace,
name,
p.Version,
nil,
"",

View File

@ -0,0 +1,42 @@
package pkg
import (
"github.com/anchore/packageurl-go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"testing"
)
func TestNpmPackageJSONMetadata_PackageURL(t *testing.T) {
tests := []struct {
name string
metadata NpmPackageJSONMetadata
expected string
}{
{
name: "no namespace",
metadata: NpmPackageJSONMetadata{
Name: "arborist",
Version: "2.6.2",
},
expected: "pkg:npm/arborist@2.6.2",
},
{
name: "split by namespace",
metadata: NpmPackageJSONMetadata{
Name: "@npmcli/arborist",
Version: "2.6.2",
},
expected: "pkg:npm/@npmcli/arborist@2.6.2",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
actual := tt.metadata.PackageURL(nil)
assert.Equal(t, tt.expected, actual)
_, err := packageurl.FromString(actual)
require.NoError(t, err)
})
}
}

View File

@ -141,13 +141,8 @@ func TestPackageURL(t *testing.T) {
Version: "bad-v0.1.0",
Type: JavaPkg,
Metadata: JavaMetadata{
PomProperties: &PomProperties{
Path: "p",
Name: "n",
GroupID: "g.id",
ArtifactID: "a",
Version: "v",
},
PomProperties: &PomProperties{},
PURL: "pkg:maven/g.id/a@v", // assembled by the java cataloger
},
},
@ -160,13 +155,8 @@ func TestPackageURL(t *testing.T) {
Version: "bad-v0.1.0",
Type: JenkinsPluginPkg,
Metadata: JavaMetadata{
PomProperties: &PomProperties{
Path: "p",
Name: "n",
GroupID: "g.id",
ArtifactID: "a",
Version: "v",
},
PomProperties: &PomProperties{},
PURL: "pkg:maven/g.id/a@v", // assembled by the java cataloger
},
},