Add pURL generation for java packages + fix NPM pURL generation (#812)

* enhance pURL generation for java packages

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* optionally split out npm namespaces for pURL generation

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* nit updates

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2022-02-10 13:46:38 -05:00 committed by GitHub
parent 5ab872c732
commit ca032434b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 176 additions and 98 deletions

View File

@ -40,11 +40,11 @@ var (
) )
func candidateProductsForJava(p pkg.Package) []string { func candidateProductsForJava(p pkg.Package) []string {
return productsFromArtifactAndGroupIDs(artifactIDFromJavaPackage(p), groupIDsFromJavaPackage(p)) return productsFromArtifactAndGroupIDs(artifactIDFromJavaPackage(p), GroupIDsFromJavaPackage(p))
} }
func candidateVendorsForJava(p pkg.Package) fieldCandidateSet { func candidateVendorsForJava(p pkg.Package) fieldCandidateSet {
gidVendors := vendorsFromGroupIDs(groupIDsFromJavaPackage(p)) gidVendors := vendorsFromGroupIDs(GroupIDsFromJavaPackage(p))
nameVendors := vendorsFromJavaManifestNames(p) nameVendors := vendorsFromJavaManifestNames(p)
return newFieldCandidateSetFromSets(gidVendors, nameVendors) return newFieldCandidateSetFromSets(gidVendors, nameVendors)
} }
@ -173,7 +173,7 @@ func artifactIDFromJavaPackage(p pkg.Package) string {
return artifactID return artifactID
} }
func groupIDsFromJavaPackage(p pkg.Package) (groupIDs []string) { func GroupIDsFromJavaPackage(p pkg.Package) (groupIDs []string) {
metadata, ok := p.Metadata.(pkg.JavaMetadata) metadata, ok := p.Metadata.(pkg.JavaMetadata)
if !ok { if !ok {
return nil return nil

View File

@ -333,7 +333,7 @@ func Test_groupIDsFromJavaPackage(t *testing.T) {
} }
for _, test := range tests { for _, test := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
assert.ElementsMatch(t, test.expects, groupIDsFromJavaPackage(test.pkg)) assert.ElementsMatch(t, test.expects, GroupIDsFromJavaPackage(test.pkg))
}) })
} }
} }

View File

@ -55,28 +55,13 @@ type archiveFilename struct {
version string version string
} }
// TODO: Remove this method once we're using Go 1.15+.
//
// Go 1.15 introduces a `SubexpIndex` method for the Regexp type that would let
// this code be made more elegant. Once we've reached 1.15, we should eliminate
// this function in favor of that method.
func subexpIndex(re *regexp.Regexp, name string) int {
for i, subexpName := range re.SubexpNames() {
if subexpName == name {
return i
}
}
return -1
}
func getSubexp(matches []string, subexpName string, re *regexp.Regexp, raw string) string { func getSubexp(matches []string, subexpName string, re *regexp.Regexp, raw string) string {
if len(matches) < 1 { if len(matches) < 1 {
log.Warnf("unexpectedly empty matches for archive '%s'", raw) log.Warnf("unexpectedly empty matches for archive '%s'", raw)
return "" return ""
} }
index := subexpIndex(re, subexpName) index := re.SubexpIndex(subexpName)
if index < 1 { if index < 1 {
log.Warnf("unexpected index of '%s' capture group for Java archive '%s'", subexpName, raw) log.Warnf("unexpected index of '%s' capture group for Java archive '%s'", subexpName, raw)
return "" return ""

View File

@ -122,6 +122,13 @@ func (j *archiveParser) parse() ([]*pkg.Package, []artifact.Relationship, error)
pkgs = append([]*pkg.Package{parentPkg}, pkgs...) pkgs = append([]*pkg.Package{parentPkg}, pkgs...)
} }
// add pURLs to all packages found
// note: since package information may change after initial creation when parsing multiple locations within the
// jar, we wait until the conclusion of the parsing process before synthesizing pURLs.
for _, p := range pkgs {
addPURL(p)
}
return pkgs, relationships, nil return pkgs, relationships, nil
} }
@ -348,7 +355,7 @@ func newPackageFromMavenData(pomProperties pkg.PomProperties, pomProject *pkg.Po
} }
if packageIdentitiesMatch(p, parentPkg) { if packageIdentitiesMatch(p, parentPkg) {
updatePackage(p, parentPkg) updateParentPackage(p, parentPkg)
return nil return nil
} }
@ -379,7 +386,7 @@ func packageIdentitiesMatch(p pkg.Package, parentPkg *pkg.Package) bool {
return false return false
} }
func updatePackage(p pkg.Package, parentPkg *pkg.Package) { func updateParentPackage(p pkg.Package, parentPkg *pkg.Package) {
// we've run across more information about our parent package, add this info to the parent package metadata // we've run across more information about our parent package, add this info to the parent package metadata
// the pom properties is typically a better source of information for name and version than the manifest // the pom properties is typically a better source of information for name and version than the manifest
parentPkg.Name = p.Name parentPkg.Name = p.Name
@ -401,3 +408,17 @@ func updatePackage(p pkg.Package, parentPkg *pkg.Package) {
parentPkg.Metadata = parentMetadata parentPkg.Metadata = parentMetadata
} }
} }
func addPURL(p *pkg.Package) {
purl := packageURL(*p)
if purl == "" {
return
}
metadata, ok := p.Metadata.(pkg.JavaMetadata)
if !ok {
return
}
metadata.PURL = purl
p.Metadata = metadata
}

View File

@ -134,6 +134,7 @@ func TestParseJar(t *testing.T) {
Version: "1.0-SNAPSHOT", Version: "1.0-SNAPSHOT",
Extra: map[string]string{}, Extra: map[string]string{},
}, },
PURL: "pkg:maven/io.jenkins.plugins/example-jenkins-plugin@1.0-SNAPSHOT",
}, },
}, },
}, },
@ -154,6 +155,7 @@ func TestParseJar(t *testing.T) {
"Manifest-Version": "1.0", "Manifest-Version": "1.0",
}, },
}, },
PURL: "pkg:maven/example-java-app-gradle/example-java-app-gradle@0.1.0",
}, },
}, },
}, },
@ -191,6 +193,7 @@ func TestParseJar(t *testing.T) {
Version: "0.1.0", Version: "0.1.0",
Extra: map[string]string{}, Extra: map[string]string{},
}, },
PURL: "pkg:maven/org.anchore/example-java-app-maven@0.1.0",
}, },
}, },
"joda-time": { "joda-time": {
@ -219,6 +222,7 @@ func TestParseJar(t *testing.T) {
Description: "Date and time library to replace JDK date handling", Description: "Date and time library to replace JDK date handling",
URL: "http://www.joda.org/joda-time/", URL: "http://www.joda.org/joda-time/",
}, },
PURL: "pkg:maven/joda-time/joda-time@2.9.2",
}, },
}, },
}, },

View File

@ -0,0 +1,25 @@
package java
import (
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common/cpe"
)
// PackageURL returns the PURL for the specific java package (see https://github.com/package-url/purl-spec)
func packageURL(p pkg.Package) string {
var groupID = p.Name
groupIDs := cpe.GroupIDsFromJavaPackage(p)
if len(groupIDs) > 0 {
groupID = groupIDs[0]
}
pURL := packageurl.NewPackageURL(
packageurl.TypeMaven, // TODO: should we filter down by package types here?
groupID,
p.Name,
p.Version,
nil, // TODO: there are probably several qualifiers that can be specified here
"")
return pURL.ToString()
}

View File

@ -0,0 +1,45 @@
package java
import (
"github.com/anchore/syft/syft/pkg"
"github.com/stretchr/testify/assert"
"testing"
)
func Test_packageURL(t *testing.T) {
tests := []struct {
pkg pkg.Package
expect string
}{
{
pkg: pkg.Package{
Name: "example-java-app-maven",
Version: "0.1.0",
Language: pkg.Java,
Type: pkg.JavaPkg,
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{
VirtualPath: "test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.jar",
Manifest: &pkg.JavaManifest{
Main: map[string]string{
"Manifest-Version": "1.0",
},
},
PomProperties: &pkg.PomProperties{
Path: "META-INF/maven/org.anchore/example-java-app-maven/pom.properties",
GroupID: "org.anchore",
ArtifactID: "example-java-app-maven",
Version: "0.1.0",
Extra: make(map[string]string),
},
},
},
expect: "pkg:maven/org.anchore/example-java-app-maven@0.1.0",
},
}
for _, tt := range tests {
t.Run(tt.expect, func(t *testing.T) {
assert.Equal(t, tt.expect, packageURL(tt.pkg))
})
}
}

View File

@ -5,13 +5,12 @@ import (
"github.com/anchore/syft/syft/linux" "github.com/anchore/syft/syft/linux"
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/internal" "github.com/anchore/syft/internal"
) )
var _ urlIdentifier = (*JavaMetadata)(nil) var _ urlIdentifier = (*JavaMetadata)(nil)
var JenkinsPluginPomPropertiesGroupIDs = []string{ var jenkinsPluginPomPropertiesGroupIDs = []string{
"io.jenkins.plugins", "io.jenkins.plugins",
"org.jenkins.plugins", "org.jenkins.plugins",
"org.jenkins-ci.plugins", "org.jenkins-ci.plugins",
@ -25,6 +24,7 @@ type JavaMetadata struct {
Manifest *JavaManifest `mapstructure:"Manifest" json:"manifest,omitempty"` Manifest *JavaManifest `mapstructure:"Manifest" json:"manifest,omitempty"`
PomProperties *PomProperties `mapstructure:"PomProperties" json:"pomProperties,omitempty" cyclonedx:"-"` PomProperties *PomProperties `mapstructure:"PomProperties" json:"pomProperties,omitempty" cyclonedx:"-"`
PomProject *PomProject `mapstructure:"PomProject" json:"pomProject,omitempty"` PomProject *PomProject `mapstructure:"PomProject" json:"pomProject,omitempty"`
PURL string `hash:"ignore" json:"-"` // pURLs and CPEs are ignored for package IDs
Parent *Package `hash:"ignore" json:"-"` // note: the parent cannot be included in the minimal definition of uniqueness since this field is not reproducible in an encode-decode cycle (is lossy). Parent *Package `hash:"ignore" json:"-"` // note: the parent cannot be included in the minimal definition of uniqueness since this field is not reproducible in an encode-decode cycle (is lossy).
} }
@ -59,7 +59,7 @@ type PomParent struct {
// PkgTypeIndicated returns the package Type indicated by the data contained in the PomProperties. // PkgTypeIndicated returns the package Type indicated by the data contained in the PomProperties.
func (p PomProperties) PkgTypeIndicated() Type { func (p PomProperties) PkgTypeIndicated() Type {
if internal.HasAnyOfPrefixes(p.GroupID, JenkinsPluginPomPropertiesGroupIDs...) || strings.Contains(p.GroupID, ".jenkins.plugin") { if internal.HasAnyOfPrefixes(p.GroupID, jenkinsPluginPomPropertiesGroupIDs...) || strings.Contains(p.GroupID, ".jenkins.plugin") {
return JenkinsPluginPkg return JenkinsPluginPkg
} }
@ -74,18 +74,5 @@ type JavaManifest struct {
// PackageURL returns the PURL for the specific Maven package (see https://github.com/package-url/purl-spec) // PackageURL returns the PURL for the specific Maven package (see https://github.com/package-url/purl-spec)
func (m JavaMetadata) PackageURL(_ *linux.Release) string { func (m JavaMetadata) PackageURL(_ *linux.Release) string {
if m.PomProperties != nil { return m.PURL
pURL := packageurl.NewPackageURL(
packageurl.TypeMaven,
m.PomProperties.GroupID,
m.PomProperties.ArtifactID,
m.PomProperties.Version,
nil, // TODO: there are probably several qualifiers that can be specified here
"")
return pURL.ToString()
}
// TODO: support non-maven artifacts
return ""
} }

View File

@ -3,7 +3,6 @@ package pkg
import ( import (
"testing" "testing"
"github.com/sergi/go-diff/diffmatchpatch"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
@ -110,38 +109,3 @@ func TestPomProperties_PkgTypeIndicated(t *testing.T) {
}) })
} }
} }
func TestJavaMetadata_pURL(t *testing.T) {
tests := []struct {
metadata JavaMetadata
expected string
}{
{
metadata: JavaMetadata{
PomProperties: &PomProperties{
Path: "p",
Name: "n",
GroupID: "g.id",
ArtifactID: "a",
Version: "v",
},
},
expected: "pkg:maven/g.id/a@v",
},
{
metadata: JavaMetadata{},
expected: "",
},
}
for _, test := range tests {
t.Run(test.expected, func(t *testing.T) {
actual := test.metadata.PackageURL(nil)
if actual != test.expected {
dmp := diffmatchpatch.New()
diffs := dmp.DiffMain(test.expected, actual, true)
t.Errorf("diff: %s", dmp.DiffPrettyText(diffs))
}
})
}
}

View File

@ -1,6 +1,10 @@
package pkg package pkg
import "github.com/anchore/packageurl-go" import (
"strings"
"github.com/anchore/packageurl-go"
)
// Language represents a single programming language. // Language represents a single programming language.
type Language string type Language string
@ -43,16 +47,16 @@ func LanguageFromPURL(p string) Language {
} }
func LanguageByName(name string) Language { func LanguageByName(name string) Language {
switch name { switch strings.ToLower(name) {
case packageurl.TypeMaven, purlGradlePkgType: case packageurl.TypeMaven, string(purlGradlePkgType), string(JavaPkg), string(Java):
return Java return Java
case packageurl.TypeComposer: case packageurl.TypeComposer, string(PhpComposerPkg), string(PHP):
return PHP return PHP
case packageurl.TypeGolang: case packageurl.TypeGolang, string(GoModulePkg), string(Go):
return Go return Go
case packageurl.TypeNPM: case packageurl.TypeNPM, string(JavaScript):
return JavaScript return JavaScript
case packageurl.TypePyPi: case packageurl.TypePyPi, string(Python):
return Python return Python
case packageurl.TypeGem: case packageurl.TypeGem:
return Ruby return Ruby

View File

@ -1,6 +1,8 @@
package pkg package pkg
import ( import (
"strings"
"github.com/anchore/packageurl-go" "github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/linux" "github.com/anchore/syft/syft/linux"
) )
@ -21,10 +23,19 @@ type NpmPackageJSONMetadata struct {
// PackageURL returns the PURL for the specific NPM package (see https://github.com/package-url/purl-spec) // PackageURL returns the PURL for the specific NPM package (see https://github.com/package-url/purl-spec)
func (p NpmPackageJSONMetadata) PackageURL(_ *linux.Release) string { func (p NpmPackageJSONMetadata) PackageURL(_ *linux.Release) string {
var namespace string
name := p.Name
fields := strings.SplitN(p.Name, "/", 2)
if len(fields) > 1 {
namespace = fields[0]
name = fields[1]
}
return packageurl.NewPackageURL( return packageurl.NewPackageURL(
packageurl.TypeNPM, packageurl.TypeNPM,
"", namespace,
p.Name, name,
p.Version, p.Version,
nil, nil,
"", "",

View File

@ -0,0 +1,42 @@
package pkg
import (
"github.com/anchore/packageurl-go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"testing"
)
func TestNpmPackageJSONMetadata_PackageURL(t *testing.T) {
tests := []struct {
name string
metadata NpmPackageJSONMetadata
expected string
}{
{
name: "no namespace",
metadata: NpmPackageJSONMetadata{
Name: "arborist",
Version: "2.6.2",
},
expected: "pkg:npm/arborist@2.6.2",
},
{
name: "split by namespace",
metadata: NpmPackageJSONMetadata{
Name: "@npmcli/arborist",
Version: "2.6.2",
},
expected: "pkg:npm/@npmcli/arborist@2.6.2",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
actual := tt.metadata.PackageURL(nil)
assert.Equal(t, tt.expected, actual)
_, err := packageurl.FromString(actual)
require.NoError(t, err)
})
}
}

View File

@ -141,13 +141,8 @@ func TestPackageURL(t *testing.T) {
Version: "bad-v0.1.0", Version: "bad-v0.1.0",
Type: JavaPkg, Type: JavaPkg,
Metadata: JavaMetadata{ Metadata: JavaMetadata{
PomProperties: &PomProperties{ PomProperties: &PomProperties{},
Path: "p", PURL: "pkg:maven/g.id/a@v", // assembled by the java cataloger
Name: "n",
GroupID: "g.id",
ArtifactID: "a",
Version: "v",
},
}, },
}, },
@ -160,13 +155,8 @@ func TestPackageURL(t *testing.T) {
Version: "bad-v0.1.0", Version: "bad-v0.1.0",
Type: JenkinsPluginPkg, Type: JenkinsPluginPkg,
Metadata: JavaMetadata{ Metadata: JavaMetadata{
PomProperties: &PomProperties{ PomProperties: &PomProperties{},
Path: "p", PURL: "pkg:maven/g.id/a@v", // assembled by the java cataloger
Name: "n",
GroupID: "g.id",
ArtifactID: "a",
Version: "v",
},
}, },
}, },