Rework Java archive name and version detection and clean up tests

Signed-off-by: Dan Luhring <dan.luhring@anchore.com>
This commit is contained in:
Dan Luhring 2020-11-14 20:30:51 -05:00
parent 5afdd574a8
commit 3e8bca6911
No known key found for this signature in database
GPG Key ID: 9CEE23D079426CEF
4 changed files with 128 additions and 95 deletions

View File

@ -10,45 +10,100 @@ import (
"github.com/anchore/syft/syft/pkg"
)
// match on versions and anything after the version. This is used to isolate the name from the version.
// match examples:
// wagon-webdav-1.0.2-rc1-hudson.jar ---> -1.0.2-rc1-hudson.jar
// windows-remote-command-1.0.jar ---> -1.0.jar
// wstx-asl-1-2.jar ---> -1-2.jar
// guava-rc0.jar ---> -rc0.jar
var versionAreaPattern = regexp.MustCompile(`-(?P<version>(\d+\.)?(\d+\.)?(r?c?\d+)(-[a-zA-Z0-9\-.]+)*)(?P<remaining>.*)$`)
// match on explicit versions. This is used for extracting version information.
// match examples:
// pkg-extra-field-4.3.2-rc1 --> match(name=pkg-extra-field version=4.3.2-rc1)
// pkg-extra-field-4.3-rc1 --> match(name=pkg-extra-field version=4.3-rc1)
// pkg-extra-field-4.3 --> match(name=pkg-extra-field version=4.3)
var versionPattern = regexp.MustCompile(`-(?P<version>(\d+\.)?(\d+\.)?(r?c?\d+)(-[a-zA-Z0-9\-.]+)*)`)
// nameAndVersionPattern finds the package name and version (as named capture
// groups) in a string. The pattern's strategy is to start at the beginning of
// the string, and for every next dash-delimited group, consider the group to be
// a continuation of the package name, unless the group begins with a number or
// matches any of a specified set of "version-indicating" patterns. When a given
// group meets this criterion, consider the group and the remainder of the
// string to be the package version.
//
// Regex components of note:
//
// (?Ui) ... Sets the "U" and the "i" options for this Regex —— (ungreedy,
// and case-insensitive, respectively). "Ungreedy" is important so that the '*' that trails the package name
// component doesn't consume the rest of the string.
//
// [[:alpha:]][[:word:]]* ... Matches any word, and the word can include "word" characters (
// which includes numbers and underscores), but the first character of the word MUST be a letter.
//
// (?:\.[[:alpha:]][[:word:]]*)* ... This looks redundant, but it's not. It
// extends the previous pattern such that the net effect of both components is
// that words can also include a period and more words (thus, when combined, not
// only is "something" matched, but so is "com.prefix.thing"
//
// (?:\d.*|(?:build\d*.*)|(?:rc?\d+(?:^[[:alpha:]].*)?)) ...
// This match group covers the "version-indicating" patterns mentioned in the above description. Given the pipes (
// '|'), this functions as a series of 'OR'-joined conditions:
//
// \d.* ... "If it starts with a numeric digit, this is a version, no matter what follows."
// build\d*.* ... "If it starts with "build" and then a numeric digit immediately after, this is a version."
// rc?\d+(?:^[[:alpha:]].*)? ... "If it starts with "r" or "rc" and then one or more numeric digits immediately
// after, but no alpha characters right after that (in the same word), this is a version."
//
// Match examples:
// some-package-4.0.1 --> name="some-package", version="4.0.1"
// prefix.thing-4 --> name="prefix.thing", version="4"
// my-http2-server-5 --> name="my-http2-server", version="5"
// jetpack-build235-rc5 --> name="jetpack", version="build2.0-rc5"
// ironman-r4-2009 --> name="ironman", version="r4-2009"
var nameAndVersionPattern = regexp.MustCompile(`(?Ui)^(?P<name>(?:[[:alpha:]][[:word:]]*(?:\.[[:alpha:]][[:word:]]*)*-?)+)(?:-(?P<version>(?:\d.*|(?:build\d*.*)|(?:rc?\d+(?:^[[:alpha:]].*)?))))?$`)
type archiveFilename struct {
raw string
fields []map[string]string
name string
version string
}
// TODO: Remove this method once we're using Go 1.15+.
//
// Go 1.15 introduces a `SubexpIndex` method for the Regexp type that would let
// this code be made more elegant. Once we've reached 1.15, we should eliminate
// this function in favor of that method.
func subexpIndex(re *regexp.Regexp, name string) int {
for i, subexpName := range re.SubexpNames() {
if subexpName == name {
return i
}
}
return -1
}
func getSubexp(matches []string, subexpName string, re *regexp.Regexp, raw string) string {
if len(matches) < 1 {
log.Warnf("unexpectedly empty matches for archive '%s'", raw)
return ""
}
index := subexpIndex(re, subexpName)
if index < 1 {
log.Warnf("unexpected index of '%s' capture group for Java archive '%s'", subexpName, raw)
return ""
}
// Prevent out-of-range panic
if len(matches) < index+1 {
log.Warnf("no match found for '%s' in '%s'", subexpName, matches[0])
return ""
}
return matches[index]
}
func newJavaArchiveFilename(raw string) archiveFilename {
// trim the file extension and remove any path prefixes
name := strings.TrimSuffix(filepath.Base(raw), filepath.Ext(raw))
cleanedFileName := strings.TrimSuffix(filepath.Base(raw), filepath.Ext(raw))
matches := versionPattern.FindAllStringSubmatch(name, -1)
fields := make([]map[string]string, 0)
for _, match := range matches {
item := make(map[string]string)
for i, name := range versionPattern.SubexpNames() {
if i != 0 && name != "" {
item[name] = match[i]
}
}
fields = append(fields, item)
}
matches := nameAndVersionPattern.FindStringSubmatch(cleanedFileName)
name := getSubexp(matches, "name", nameAndVersionPattern, raw)
version := getSubexp(matches, "version", nameAndVersionPattern, raw)
return archiveFilename{
raw: raw,
fields: fields,
name: name,
version: version,
}
}
@ -66,21 +121,3 @@ func (a archiveFilename) pkgType() pkg.Type {
return pkg.UnknownPkg
}
}
func (a archiveFilename) version() string {
if len(a.fields) > 1 {
log.Warnf("discovered multiple name-version pairings from %q: %+v", a.raw, a.fields)
return ""
} else if len(a.fields) < 1 {
return ""
}
return a.fields[0]["version"]
}
func (a archiveFilename) name() string {
// derive the name from the archive name (no path or extension) and remove any versions found
basename := filepath.Base(a.raw)
cleaned := strings.TrimSuffix(basename, filepath.Ext(basename))
return versionAreaPattern.ReplaceAllString(cleaned, "")
}

View File

@ -131,24 +131,24 @@ func TestExtractInfoFromJavaArchiveFilename(t *testing.T) {
t.Errorf("mismatched type: %+v != %v", ty, test.ty)
}
version := obj.version()
version := obj.version
if version != test.version {
dmp := diffmatchpatch.New()
diffs := dmp.DiffMain(version, test.version, true)
diffs := dmp.DiffMain(test.version, version, true)
t.Errorf("mismatched version:\n%s", dmp.DiffPrettyText(diffs))
}
extension := obj.extension()
if extension != test.extension {
dmp := diffmatchpatch.New()
diffs := dmp.DiffMain(extension, test.extension, true)
diffs := dmp.DiffMain(test.extension, extension, true)
t.Errorf("mismatched extension:\n%s", dmp.DiffPrettyText(diffs))
}
name := obj.name()
name := obj.name
if name != test.name {
dmp := diffmatchpatch.New()
diffs := dmp.DiffMain(name, test.name, true)
diffs := dmp.DiffMain(test.name, name, true)
t.Errorf("mismatched name:\n%s", dmp.DiffPrettyText(diffs))
}
})

View File

@ -303,7 +303,7 @@ func TestParseNestedJar(t *testing.T) {
},
{
Name: "spring-boot-starter",
Version: "2.2.2",
Version: "2.2.2.RELEASE",
},
{
Name: "jul-to-slf4j",
@ -315,7 +315,7 @@ func TestParseNestedJar(t *testing.T) {
},
{
Name: "spring-boot-starter-validation",
Version: "2.2.2",
Version: "2.2.2.RELEASE",
},
{
Name: "hibernate-validator",
@ -327,7 +327,7 @@ func TestParseNestedJar(t *testing.T) {
},
{
Name: "spring-expression",
Version: "5.2.2",
Version: "5.2.2.RELEASE",
},
{
Name: "jakarta.validation-api",
@ -335,11 +335,11 @@ func TestParseNestedJar(t *testing.T) {
},
{
Name: "spring-web",
Version: "5.2.2",
Version: "5.2.2.RELEASE",
},
{
Name: "spring-boot-starter-actuator",
Version: "2.2.2",
Version: "2.2.2.RELEASE",
},
{
Name: "log4j-api",
@ -359,23 +359,23 @@ func TestParseNestedJar(t *testing.T) {
},
{
Name: "spring-aop",
Version: "5.2.2",
Version: "5.2.2.RELEASE",
},
{
Name: "spring-boot-actuator-autoconfigure",
Version: "2.2.2",
Version: "2.2.2.RELEASE",
},
{
Name: "spring-jcl",
Version: "5.2.2",
Version: "5.2.2.RELEASE",
},
{
Name: "spring-boot",
Version: "2.2.2",
Version: "2.2.2.RELEASE",
},
{
Name: "spring-boot-starter-logging",
Version: "2.2.2",
Version: "2.2.2.RELEASE",
},
{
Name: "jakarta.annotation-api",
@ -383,7 +383,7 @@ func TestParseNestedJar(t *testing.T) {
},
{
Name: "spring-webmvc",
Version: "5.2.2",
Version: "5.2.2.RELEASE",
},
{
Name: "HdrHistogram",
@ -391,7 +391,7 @@ func TestParseNestedJar(t *testing.T) {
},
{
Name: "spring-boot-starter-web",
Version: "2.2.2",
Version: "2.2.2.RELEASE",
},
{
Name: "logback-classic",
@ -403,7 +403,7 @@ func TestParseNestedJar(t *testing.T) {
},
{
Name: "spring-boot-starter-json",
Version: "2.2.2",
Version: "2.2.2.RELEASE",
},
{
Name: "jackson-databind",
@ -419,7 +419,7 @@ func TestParseNestedJar(t *testing.T) {
},
{
Name: "spring-boot-autoconfigure",
Version: "2.2.2",
Version: "2.2.2.RELEASE",
},
{
Name: "jackson-datatype-jdk8",
@ -435,11 +435,11 @@ func TestParseNestedJar(t *testing.T) {
},
{
Name: "spring-beans",
Version: "5.2.2",
Version: "5.2.2.RELEASE",
},
{
Name: "spring-boot-actuator",
Version: "2.2.2",
Version: "2.2.2.RELEASE",
},
{
Name: "slf4j-api",
@ -447,7 +447,7 @@ func TestParseNestedJar(t *testing.T) {
},
{
Name: "spring-core",
Version: "5.2.2",
Version: "5.2.2.RELEASE",
},
{
Name: "logback-core",
@ -467,7 +467,7 @@ func TestParseNestedJar(t *testing.T) {
},
{
Name: "spring-boot-starter-tomcat",
Version: "2.2.2",
Version: "2.2.2.RELEASE",
},
{
Name: "classmate",
@ -475,7 +475,7 @@ func TestParseNestedJar(t *testing.T) {
},
{
Name: "spring-context",
Version: "5.2.2",
Version: "5.2.2.RELEASE",
},
},
},
@ -509,32 +509,28 @@ func TestParseNestedJar(t *testing.T) {
expectedNameVersionPairSet.Add(makeKey(&e))
}
if len(actual) != len(expectedNameVersionPairSet) {
actualNameVersionPairSet := internal.NewStringSet()
for _, a := range actual {
key := makeKey(&a)
actualNameVersionPairSet.Add(key)
if !expectedNameVersionPairSet.Contains(key) {
t.Logf("extra package: %s", a)
t.Errorf("extra package: %s", a)
}
}
for _, key := range expectedNameVersionPairSet.ToSlice() {
if !actualNameVersionPairSet.Contains(key) {
t.Logf("missing package: %s", key)
t.Errorf("missing package: %s", key)
}
}
if len(actual) != len(expectedNameVersionPairSet) {
t.Fatalf("unexpected package count: %d!=%d", len(actual), len(expectedNameVersionPairSet))
}
for _, a := range actual {
actualKey := makeKey(&a)
if !expectedNameVersionPairSet.Contains(actualKey) {
t.Errorf("unexpected pkg: %q", actualKey)
}
metadata := a.Metadata.(pkg.JavaMetadata)
if actualKey == "spring-boot|0.0.1-SNAPSHOT" {
if metadata.Parent != nil {

View File

@ -93,8 +93,8 @@ func parseJavaManifest(path string, reader io.Reader) (*pkg.JavaManifest, error)
func selectName(manifest *pkg.JavaManifest, filenameObj archiveFilename) string {
var name string
switch {
case filenameObj.name() != "":
name = filenameObj.name()
case filenameObj.name != "":
name = filenameObj.name
case manifest.Main["Name"] != "":
// Manifest original spec...
name = manifest.Main["Name"]
@ -117,8 +117,8 @@ func selectName(manifest *pkg.JavaManifest, filenameObj archiveFilename) string
func selectVersion(manifest *pkg.JavaManifest, filenameObj archiveFilename) string {
var version string
switch {
case filenameObj.version() != "":
version = filenameObj.version()
case filenameObj.version != "":
version = filenameObj.version
case manifest.Main["Implementation-Version"] != "":
version = manifest.Main["Implementation-Version"]
case manifest.Main["Specification-Version"] != "":