Rework Java archive name and version detection and clean up tests

Signed-off-by: Dan Luhring <dan.luhring@anchore.com>
This commit is contained in:
Dan Luhring 2020-11-14 20:30:51 -05:00
parent 5afdd574a8
commit 3e8bca6911
No known key found for this signature in database
GPG Key ID: 9CEE23D079426CEF
4 changed files with 128 additions and 95 deletions

View File

@ -10,45 +10,100 @@ import (
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
) )
// match on versions and anything after the version. This is used to isolate the name from the version. // nameAndVersionPattern finds the package name and version (as named capture
// match examples: // groups) in a string. The pattern's strategy is to start at the beginning of
// wagon-webdav-1.0.2-rc1-hudson.jar ---> -1.0.2-rc1-hudson.jar // the string, and for every next dash-delimited group, consider the group to be
// windows-remote-command-1.0.jar ---> -1.0.jar // a continuation of the package name, unless the group begins with a number or
// wstx-asl-1-2.jar ---> -1-2.jar // matches any of a specified set of "version-indicating" patterns. When a given
// guava-rc0.jar ---> -rc0.jar // group meets this criterion, consider the group and the remainder of the
var versionAreaPattern = regexp.MustCompile(`-(?P<version>(\d+\.)?(\d+\.)?(r?c?\d+)(-[a-zA-Z0-9\-.]+)*)(?P<remaining>.*)$`) // string to be the package version.
//
// match on explicit versions. This is used for extracting version information. // Regex components of note:
// match examples: //
// pkg-extra-field-4.3.2-rc1 --> match(name=pkg-extra-field version=4.3.2-rc1) // (?Ui) ... Sets the "U" and the "i" options for this Regex —— (ungreedy,
// pkg-extra-field-4.3-rc1 --> match(name=pkg-extra-field version=4.3-rc1) // and case-insensitive, respectively). "Ungreedy" is important so that the '*' that trails the package name
// pkg-extra-field-4.3 --> match(name=pkg-extra-field version=4.3) // component doesn't consume the rest of the string.
var versionPattern = regexp.MustCompile(`-(?P<version>(\d+\.)?(\d+\.)?(r?c?\d+)(-[a-zA-Z0-9\-.]+)*)`) //
// [[:alpha:]][[:word:]]* ... Matches any word, and the word can include "word" characters (
// which includes numbers and underscores), but the first character of the word MUST be a letter.
//
// (?:\.[[:alpha:]][[:word:]]*)* ... This looks redundant, but it's not. It
// extends the previous pattern such that the net effect of both components is
// that words can also include a period and more words (thus, when combined, not
// only is "something" matched, but so is "com.prefix.thing"
//
// (?:\d.*|(?:build\d*.*)|(?:rc?\d+(?:^[[:alpha:]].*)?)) ...
// This match group covers the "version-indicating" patterns mentioned in the above description. Given the pipes (
// '|'), this functions as a series of 'OR'-joined conditions:
//
// \d.* ... "If it starts with a numeric digit, this is a version, no matter what follows."
// build\d*.* ... "If it starts with "build" and then a numeric digit immediately after, this is a version."
// rc?\d+(?:^[[:alpha:]].*)? ... "If it starts with "r" or "rc" and then one or more numeric digits immediately
// after, but no alpha characters right after that (in the same word), this is a version."
//
// Match examples:
// some-package-4.0.1 --> name="some-package", version="4.0.1"
// prefix.thing-4 --> name="prefix.thing", version="4"
// my-http2-server-5 --> name="my-http2-server", version="5"
// jetpack-build235-rc5 --> name="jetpack", version="build2.0-rc5"
// ironman-r4-2009 --> name="ironman", version="r4-2009"
var nameAndVersionPattern = regexp.MustCompile(`(?Ui)^(?P<name>(?:[[:alpha:]][[:word:]]*(?:\.[[:alpha:]][[:word:]]*)*-?)+)(?:-(?P<version>(?:\d.*|(?:build\d*.*)|(?:rc?\d+(?:^[[:alpha:]].*)?))))?$`)
type archiveFilename struct { type archiveFilename struct {
raw string raw string
fields []map[string]string name string
version string
}
// TODO: Remove this method once we're using Go 1.15+.
//
// Go 1.15 introduces a `SubexpIndex` method for the Regexp type that would let
// this code be made more elegant. Once we've reached 1.15, we should eliminate
// this function in favor of that method.
func subexpIndex(re *regexp.Regexp, name string) int {
for i, subexpName := range re.SubexpNames() {
if subexpName == name {
return i
}
}
return -1
}
func getSubexp(matches []string, subexpName string, re *regexp.Regexp, raw string) string {
if len(matches) < 1 {
log.Warnf("unexpectedly empty matches for archive '%s'", raw)
return ""
}
index := subexpIndex(re, subexpName)
if index < 1 {
log.Warnf("unexpected index of '%s' capture group for Java archive '%s'", subexpName, raw)
return ""
}
// Prevent out-of-range panic
if len(matches) < index+1 {
log.Warnf("no match found for '%s' in '%s'", subexpName, matches[0])
return ""
}
return matches[index]
} }
func newJavaArchiveFilename(raw string) archiveFilename { func newJavaArchiveFilename(raw string) archiveFilename {
// trim the file extension and remove any path prefixes // trim the file extension and remove any path prefixes
name := strings.TrimSuffix(filepath.Base(raw), filepath.Ext(raw)) cleanedFileName := strings.TrimSuffix(filepath.Base(raw), filepath.Ext(raw))
matches := versionPattern.FindAllStringSubmatch(name, -1) matches := nameAndVersionPattern.FindStringSubmatch(cleanedFileName)
fields := make([]map[string]string, 0)
for _, match := range matches { name := getSubexp(matches, "name", nameAndVersionPattern, raw)
item := make(map[string]string) version := getSubexp(matches, "version", nameAndVersionPattern, raw)
for i, name := range versionPattern.SubexpNames() {
if i != 0 && name != "" {
item[name] = match[i]
}
}
fields = append(fields, item)
}
return archiveFilename{ return archiveFilename{
raw: raw, raw: raw,
fields: fields, name: name,
version: version,
} }
} }
@ -66,21 +121,3 @@ func (a archiveFilename) pkgType() pkg.Type {
return pkg.UnknownPkg return pkg.UnknownPkg
} }
} }
func (a archiveFilename) version() string {
if len(a.fields) > 1 {
log.Warnf("discovered multiple name-version pairings from %q: %+v", a.raw, a.fields)
return ""
} else if len(a.fields) < 1 {
return ""
}
return a.fields[0]["version"]
}
func (a archiveFilename) name() string {
// derive the name from the archive name (no path or extension) and remove any versions found
basename := filepath.Base(a.raw)
cleaned := strings.TrimSuffix(basename, filepath.Ext(basename))
return versionAreaPattern.ReplaceAllString(cleaned, "")
}

View File

@ -131,24 +131,24 @@ func TestExtractInfoFromJavaArchiveFilename(t *testing.T) {
t.Errorf("mismatched type: %+v != %v", ty, test.ty) t.Errorf("mismatched type: %+v != %v", ty, test.ty)
} }
version := obj.version() version := obj.version
if version != test.version { if version != test.version {
dmp := diffmatchpatch.New() dmp := diffmatchpatch.New()
diffs := dmp.DiffMain(version, test.version, true) diffs := dmp.DiffMain(test.version, version, true)
t.Errorf("mismatched version:\n%s", dmp.DiffPrettyText(diffs)) t.Errorf("mismatched version:\n%s", dmp.DiffPrettyText(diffs))
} }
extension := obj.extension() extension := obj.extension()
if extension != test.extension { if extension != test.extension {
dmp := diffmatchpatch.New() dmp := diffmatchpatch.New()
diffs := dmp.DiffMain(extension, test.extension, true) diffs := dmp.DiffMain(test.extension, extension, true)
t.Errorf("mismatched extension:\n%s", dmp.DiffPrettyText(diffs)) t.Errorf("mismatched extension:\n%s", dmp.DiffPrettyText(diffs))
} }
name := obj.name() name := obj.name
if name != test.name { if name != test.name {
dmp := diffmatchpatch.New() dmp := diffmatchpatch.New()
diffs := dmp.DiffMain(name, test.name, true) diffs := dmp.DiffMain(test.name, name, true)
t.Errorf("mismatched name:\n%s", dmp.DiffPrettyText(diffs)) t.Errorf("mismatched name:\n%s", dmp.DiffPrettyText(diffs))
} }
}) })

View File

@ -303,7 +303,7 @@ func TestParseNestedJar(t *testing.T) {
}, },
{ {
Name: "spring-boot-starter", Name: "spring-boot-starter",
Version: "2.2.2", Version: "2.2.2.RELEASE",
}, },
{ {
Name: "jul-to-slf4j", Name: "jul-to-slf4j",
@ -315,7 +315,7 @@ func TestParseNestedJar(t *testing.T) {
}, },
{ {
Name: "spring-boot-starter-validation", Name: "spring-boot-starter-validation",
Version: "2.2.2", Version: "2.2.2.RELEASE",
}, },
{ {
Name: "hibernate-validator", Name: "hibernate-validator",
@ -327,7 +327,7 @@ func TestParseNestedJar(t *testing.T) {
}, },
{ {
Name: "spring-expression", Name: "spring-expression",
Version: "5.2.2", Version: "5.2.2.RELEASE",
}, },
{ {
Name: "jakarta.validation-api", Name: "jakarta.validation-api",
@ -335,11 +335,11 @@ func TestParseNestedJar(t *testing.T) {
}, },
{ {
Name: "spring-web", Name: "spring-web",
Version: "5.2.2", Version: "5.2.2.RELEASE",
}, },
{ {
Name: "spring-boot-starter-actuator", Name: "spring-boot-starter-actuator",
Version: "2.2.2", Version: "2.2.2.RELEASE",
}, },
{ {
Name: "log4j-api", Name: "log4j-api",
@ -359,23 +359,23 @@ func TestParseNestedJar(t *testing.T) {
}, },
{ {
Name: "spring-aop", Name: "spring-aop",
Version: "5.2.2", Version: "5.2.2.RELEASE",
}, },
{ {
Name: "spring-boot-actuator-autoconfigure", Name: "spring-boot-actuator-autoconfigure",
Version: "2.2.2", Version: "2.2.2.RELEASE",
}, },
{ {
Name: "spring-jcl", Name: "spring-jcl",
Version: "5.2.2", Version: "5.2.2.RELEASE",
}, },
{ {
Name: "spring-boot", Name: "spring-boot",
Version: "2.2.2", Version: "2.2.2.RELEASE",
}, },
{ {
Name: "spring-boot-starter-logging", Name: "spring-boot-starter-logging",
Version: "2.2.2", Version: "2.2.2.RELEASE",
}, },
{ {
Name: "jakarta.annotation-api", Name: "jakarta.annotation-api",
@ -383,7 +383,7 @@ func TestParseNestedJar(t *testing.T) {
}, },
{ {
Name: "spring-webmvc", Name: "spring-webmvc",
Version: "5.2.2", Version: "5.2.2.RELEASE",
}, },
{ {
Name: "HdrHistogram", Name: "HdrHistogram",
@ -391,7 +391,7 @@ func TestParseNestedJar(t *testing.T) {
}, },
{ {
Name: "spring-boot-starter-web", Name: "spring-boot-starter-web",
Version: "2.2.2", Version: "2.2.2.RELEASE",
}, },
{ {
Name: "logback-classic", Name: "logback-classic",
@ -403,7 +403,7 @@ func TestParseNestedJar(t *testing.T) {
}, },
{ {
Name: "spring-boot-starter-json", Name: "spring-boot-starter-json",
Version: "2.2.2", Version: "2.2.2.RELEASE",
}, },
{ {
Name: "jackson-databind", Name: "jackson-databind",
@ -419,7 +419,7 @@ func TestParseNestedJar(t *testing.T) {
}, },
{ {
Name: "spring-boot-autoconfigure", Name: "spring-boot-autoconfigure",
Version: "2.2.2", Version: "2.2.2.RELEASE",
}, },
{ {
Name: "jackson-datatype-jdk8", Name: "jackson-datatype-jdk8",
@ -435,11 +435,11 @@ func TestParseNestedJar(t *testing.T) {
}, },
{ {
Name: "spring-beans", Name: "spring-beans",
Version: "5.2.2", Version: "5.2.2.RELEASE",
}, },
{ {
Name: "spring-boot-actuator", Name: "spring-boot-actuator",
Version: "2.2.2", Version: "2.2.2.RELEASE",
}, },
{ {
Name: "slf4j-api", Name: "slf4j-api",
@ -447,7 +447,7 @@ func TestParseNestedJar(t *testing.T) {
}, },
{ {
Name: "spring-core", Name: "spring-core",
Version: "5.2.2", Version: "5.2.2.RELEASE",
}, },
{ {
Name: "logback-core", Name: "logback-core",
@ -467,7 +467,7 @@ func TestParseNestedJar(t *testing.T) {
}, },
{ {
Name: "spring-boot-starter-tomcat", Name: "spring-boot-starter-tomcat",
Version: "2.2.2", Version: "2.2.2.RELEASE",
}, },
{ {
Name: "classmate", Name: "classmate",
@ -475,7 +475,7 @@ func TestParseNestedJar(t *testing.T) {
}, },
{ {
Name: "spring-context", Name: "spring-context",
Version: "5.2.2", Version: "5.2.2.RELEASE",
}, },
}, },
}, },
@ -509,32 +509,28 @@ func TestParseNestedJar(t *testing.T) {
expectedNameVersionPairSet.Add(makeKey(&e)) expectedNameVersionPairSet.Add(makeKey(&e))
} }
actualNameVersionPairSet := internal.NewStringSet()
for _, a := range actual {
key := makeKey(&a)
actualNameVersionPairSet.Add(key)
if !expectedNameVersionPairSet.Contains(key) {
t.Errorf("extra package: %s", a)
}
}
for _, key := range expectedNameVersionPairSet.ToSlice() {
if !actualNameVersionPairSet.Contains(key) {
t.Errorf("missing package: %s", key)
}
}
if len(actual) != len(expectedNameVersionPairSet) { if len(actual) != len(expectedNameVersionPairSet) {
actualNameVersionPairSet := internal.NewStringSet()
for _, a := range actual {
key := makeKey(&a)
actualNameVersionPairSet.Add(key)
if !expectedNameVersionPairSet.Contains(key) {
t.Logf("extra package: %s", a)
}
}
for _, key := range expectedNameVersionPairSet.ToSlice() {
if !actualNameVersionPairSet.Contains(key) {
t.Logf("missing package: %s", key)
}
}
t.Fatalf("unexpected package count: %d!=%d", len(actual), len(expectedNameVersionPairSet)) t.Fatalf("unexpected package count: %d!=%d", len(actual), len(expectedNameVersionPairSet))
} }
for _, a := range actual { for _, a := range actual {
actualKey := makeKey(&a) actualKey := makeKey(&a)
if !expectedNameVersionPairSet.Contains(actualKey) {
t.Errorf("unexpected pkg: %q", actualKey)
}
metadata := a.Metadata.(pkg.JavaMetadata) metadata := a.Metadata.(pkg.JavaMetadata)
if actualKey == "spring-boot|0.0.1-SNAPSHOT" { if actualKey == "spring-boot|0.0.1-SNAPSHOT" {
if metadata.Parent != nil { if metadata.Parent != nil {

View File

@ -93,8 +93,8 @@ func parseJavaManifest(path string, reader io.Reader) (*pkg.JavaManifest, error)
func selectName(manifest *pkg.JavaManifest, filenameObj archiveFilename) string { func selectName(manifest *pkg.JavaManifest, filenameObj archiveFilename) string {
var name string var name string
switch { switch {
case filenameObj.name() != "": case filenameObj.name != "":
name = filenameObj.name() name = filenameObj.name
case manifest.Main["Name"] != "": case manifest.Main["Name"] != "":
// Manifest original spec... // Manifest original spec...
name = manifest.Main["Name"] name = manifest.Main["Name"]
@ -117,8 +117,8 @@ func selectName(manifest *pkg.JavaManifest, filenameObj archiveFilename) string
func selectVersion(manifest *pkg.JavaManifest, filenameObj archiveFilename) string { func selectVersion(manifest *pkg.JavaManifest, filenameObj archiveFilename) string {
var version string var version string
switch { switch {
case filenameObj.version() != "": case filenameObj.version != "":
version = filenameObj.version() version = filenameObj.version
case manifest.Main["Implementation-Version"] != "": case manifest.Main["Implementation-Version"] != "":
version = manifest.Main["Implementation-Version"] version = manifest.Main["Implementation-Version"]
case manifest.Main["Specification-Version"] != "": case manifest.Main["Specification-Version"] != "":