improve java name and version extraction as well as parent pkg pairing

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2020-10-30 08:12:25 -04:00
parent a5cba13ddf
commit 03dbfb8dfb
No known key found for this signature in database
GPG Key ID: 5CB45AE22BAB7EA7
6 changed files with 107 additions and 24 deletions

View File

@ -10,11 +10,20 @@ import (
"github.com/anchore/syft/syft/pkg"
)
// match on versions and anything after the version. This is used to isolate the name from the version.
// match examples:
// wagon-webdav-1.0.2-rc1-hudson.jar ---> -1.0.2-rc1-hudson.jar
// windows-remote-command-1.0.jar ---> -1.0.jar
// wstx-asl-1-2.jar ---> -1-2.jar
// guava-rc0.jar ---> -rc0.jar
var versionAreaPattern = regexp.MustCompile(`-(?P<version>(\d+\.)?(\d+\.)?(r?c?\d+)(-[a-zA-Z0-9\-.]+)*)(?P<remaining>.*)$`)
// match on explicit versions. This is used for extracting version information.
// match examples:
// pkg-extra-field-4.3.2-rc1 --> match(name=pkg-extra-field version=4.3.2-rc1)
// pkg-extra-field-4.3-rc1 --> match(name=pkg-extra-field version=4.3-rc1)
// pkg-extra-field-4.3 --> match(name=pkg-extra-field version=4.3)
var versionPattern = regexp.MustCompile(`(?P<name>.+)-(?P<version>(\d+\.)?(\d+\.)?(\*|\d+)(-[a-zA-Z0-9\-\.]+)*)`)
var versionPattern = regexp.MustCompile(`-(?P<version>(\d+\.)?(\d+\.)?(r?c?\d+)(-[a-zA-Z0-9\-.]+)*)`)
type archiveFilename struct {
raw string
@ -70,14 +79,8 @@ func (a archiveFilename) version() string {
}
func (a archiveFilename) name() string {
for _, fieldSet := range a.fields {
if name, ok := fieldSet["name"]; ok {
// return the first name
return name
}
}
// derive the name from the archive name (no path or extension)
// derive the name from the archive name (no path or extension) and remove any versions found
basename := filepath.Base(a.raw)
return strings.TrimSuffix(basename, filepath.Ext(basename))
cleaned := strings.TrimSuffix(basename, filepath.Ext(basename))
return versionAreaPattern.ReplaceAllString(cleaned, "")
}

View File

@ -1,9 +1,10 @@
package java
import (
"testing"
"github.com/anchore/syft/syft/pkg"
"github.com/sergi/go-diff/diffmatchpatch"
"testing"
)
func TestExtractInfoFromJavaArchiveFilename(t *testing.T) {
@ -56,12 +57,78 @@ func TestExtractInfoFromJavaArchiveFilename(t *testing.T) {
name: "pkg-extra-field-maven",
ty: pkg.JenkinsPluginPkg,
},
{
filename: "/some/path-with-version-5.4.3/wagon-webdav-1.0.2-beta-2.2.3a-hudson.jar",
version: "1.0.2-beta-2.2.3a-hudson",
extension: "jar",
name: "wagon-webdav",
ty: pkg.JavaPkg,
},
{
filename: "/some/path-with-version-5.4.3/wagon-webdav-1.0.2-beta-2.2.3-hudson.jar",
version: "1.0.2-beta-2.2.3-hudson",
extension: "jar",
name: "wagon-webdav",
ty: pkg.JavaPkg,
},
{
filename: "/some/path-with-version-5.4.3/windows-remote-command-1.0.jar",
version: "1.0",
extension: "jar",
name: "windows-remote-command",
ty: pkg.JavaPkg,
},
{
filename: "/some/path-with-version-5.4.3/wagon-http-lightweight-1.0.5-beta-2.jar",
version: "1.0.5-beta-2",
extension: "jar",
name: "wagon-http-lightweight",
ty: pkg.JavaPkg,
},
{
filename: "/hudson.war:WEB-INF/lib/commons-jelly-1.1-hudson-20100305.jar",
version: "1.1-hudson-20100305",
extension: "jar",
name: "commons-jelly",
ty: pkg.JavaPkg,
},
{
filename: "/hudson.war:WEB-INF/lib/jtidy-4aug2000r7-dev-hudson-1.jar",
// I don't see how we can reliably account for this case
//version: "4aug2000r7-dev-hudson-1",
version: "",
extension: "jar",
name: "jtidy",
ty: pkg.JavaPkg,
},
{
filename: "/hudson.war:WEB-INF/lib/trilead-ssh2-build212-hudson-5.jar",
// I don't see how we can reliably account for this case
//version: "build212-hudson-5",
version: "5",
extension: "jar",
// name: "trilead-ssh2",
name: "trilead-ssh2-build212-hudson",
ty: pkg.JavaPkg,
},
{
filename: "/hudson.war:WEB-INF/lib/guava-r06.jar",
version: "r06",
extension: "jar",
name: "guava",
ty: pkg.JavaPkg,
},
}
for _, test := range tests {
t.Run(test.filename, func(t *testing.T) {
obj := newJavaArchiveFilename(test.filename)
ty := obj.pkgType()
if ty != test.ty {
t.Errorf("mismatched type: %+v != %v", ty, test.ty)
}
version := obj.version()
if version != test.version {
dmp := diffmatchpatch.New()

View File

@ -66,13 +66,17 @@ func newJavaArchiveParser(virtualPath string, reader io.Reader, detectNested boo
return nil, cleanupFn, fmt.Errorf("unable to read files from java archive: %w", err)
}
// fetch the last element of the virtual path
virtualElements := strings.Split(virtualPath, ":")
currentFilepath := virtualElements[len(virtualElements)-1]
return &archiveParser{
discoveredPkgs: internal.NewStringSet(),
fileManifest: fileManifest,
virtualPath: virtualPath,
archivePath: archivePath,
contentPath: contentPath,
fileInfo: newJavaArchiveFilename(virtualPath),
fileInfo: newJavaArchiveFilename(currentFilepath),
detectNested: detectNested,
}, cleanupFn, nil
}
@ -182,6 +186,7 @@ func (j *archiveParser) discoverPkgsFromPomProperties(parentPkg *pkg.Package) ([
if !strings.HasPrefix(propsObj.ArtifactID, parentPkg.Name) {
vPathSuffix += ":" + propsObj.ArtifactID
}
virtualPath := j.virtualPath + vPathSuffix
// discovered props = new package
p := pkg.Package{
@ -191,7 +196,7 @@ func (j *archiveParser) discoverPkgsFromPomProperties(parentPkg *pkg.Package) ([
Type: pkg.JavaPkg,
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{
VirtualPath: j.virtualPath + vPathSuffix,
VirtualPath: virtualPath,
PomProperties: propsObj,
Parent: parentPkg,
},
@ -199,16 +204,24 @@ func (j *archiveParser) discoverPkgsFromPomProperties(parentPkg *pkg.Package) ([
pkgKey := uniquePkgKey(&p)
if !j.discoveredPkgs.Contains(pkgKey) {
// only keep packages we haven't seen yet
pkgs = append(pkgs, p)
} else if pkgKey == parentKey {
if pkgKey == parentKey || parentPkg.Metadata.(pkg.JavaMetadata).VirtualPath == virtualPath || len(contents) == 1 {
// we've run across more information about our parent package, add this info to the parent package metadata
// the pom properties is typically a better source of information for name and version than the manifest
if p.Name != parentPkg.Name {
parentPkg.Name = p.Name
}
if p.Version != parentPkg.Version {
parentPkg.Version = p.Version
}
parentMetadata, ok := parentPkg.Metadata.(pkg.JavaMetadata)
if ok {
parentMetadata.PomProperties = propsObj
parentPkg.Metadata = parentMetadata
}
} else if !j.discoveredPkgs.Contains(pkgKey) {
// only keep packages we haven't seen yet (and are not related to the parent package)
pkgs = append(pkgs, p)
}
}
}

View File

@ -70,7 +70,7 @@ func parseJavaManifest(path string, reader io.Reader) (*pkg.JavaManifest, error)
if len(sections) > 0 {
manifest.Main = sections[0]
if len(sections) > 1 {
manifest.Sections = make(map[string]map[string]string)
manifest.NamedSections = make(map[string]map[string]string)
for i, s := range sections[1:] {
name, ok := s["Name"]
if !ok {
@ -82,7 +82,7 @@ func parseJavaManifest(path string, reader io.Reader) (*pkg.JavaManifest, error)
} else {
delete(s, "Name")
}
manifest.Sections[name] = s
manifest.NamedSections[name] = s
}
}
}
@ -117,10 +117,10 @@ func selectName(manifest *pkg.JavaManifest, filenameObj archiveFilename) string
func selectVersion(manifest *pkg.JavaManifest, filenameObj archiveFilename) string {
var version string
switch {
case manifest.Main["Implementation-Version"] != "":
version = manifest.Main["Implementation-Version"]
case filenameObj.version() != "":
version = filenameObj.version()
case manifest.Main["Implementation-Version"] != "":
version = manifest.Main["Implementation-Version"]
case manifest.Main["Specification-Version"] != "":
version = manifest.Main["Specification-Version"]
case manifest.Main["Plugin-Version"] != "":

View File

@ -45,7 +45,7 @@ func TestParseJavaManifest(t *testing.T) {
"Archiver-Version": "Plexus Archiver",
"Created-By": "Apache Maven 3.6.3",
},
Sections: map[string]map[string]string{
NamedSections: map[string]map[string]string{
"thing-1": {
"Built-By": "?",
},

View File

@ -22,8 +22,8 @@ type PomProperties struct {
// JavaManifest represents the fields of interest extracted from a Java archive's META-INF/MANIFEST.MF file.
type JavaManifest struct {
Main map[string]string `json:"main,omitempty"`
Sections map[string]map[string]string `json:"sections,omitempty"`
Main map[string]string `json:"main,omitempty"`
NamedSections map[string]map[string]string `json:"namedSections,omitempty"`
}
func (m JavaMetadata) PackageURL() string {