From 3e8bca69110ef5214790ad057b38714144a19fdf Mon Sep 17 00:00:00 2001 From: Dan Luhring Date: Sat, 14 Nov 2020 20:30:51 -0500 Subject: [PATCH] Rework Java archive name and version detection and clean up tests Signed-off-by: Dan Luhring --- syft/cataloger/java/archive_filename.go | 133 ++++++++++++------- syft/cataloger/java/archive_filename_test.go | 10 +- syft/cataloger/java/archive_parser_test.go | 72 +++++----- syft/cataloger/java/java_manifest.go | 8 +- 4 files changed, 128 insertions(+), 95 deletions(-) diff --git a/syft/cataloger/java/archive_filename.go b/syft/cataloger/java/archive_filename.go index b5d1d5675..b1b945334 100644 --- a/syft/cataloger/java/archive_filename.go +++ b/syft/cataloger/java/archive_filename.go @@ -10,45 +10,100 @@ import ( "github.com/anchore/syft/syft/pkg" ) -// match on versions and anything after the version. This is used to isolate the name from the version. -// match examples: -// wagon-webdav-1.0.2-rc1-hudson.jar ---> -1.0.2-rc1-hudson.jar -// windows-remote-command-1.0.jar ---> -1.0.jar -// wstx-asl-1-2.jar ---> -1-2.jar -// guava-rc0.jar ---> -rc0.jar -var versionAreaPattern = regexp.MustCompile(`-(?P(\d+\.)?(\d+\.)?(r?c?\d+)(-[a-zA-Z0-9\-.]+)*)(?P.*)$`) - -// match on explicit versions. This is used for extracting version information. -// match examples: -// pkg-extra-field-4.3.2-rc1 --> match(name=pkg-extra-field version=4.3.2-rc1) -// pkg-extra-field-4.3-rc1 --> match(name=pkg-extra-field version=4.3-rc1) -// pkg-extra-field-4.3 --> match(name=pkg-extra-field version=4.3) -var versionPattern = regexp.MustCompile(`-(?P(\d+\.)?(\d+\.)?(r?c?\d+)(-[a-zA-Z0-9\-.]+)*)`) +// nameAndVersionPattern finds the package name and version (as named capture +// groups) in a string. The pattern's strategy is to start at the beginning of +// the string, and for every next dash-delimited group, consider the group to be +// a continuation of the package name, unless the group begins with a number or +// matches any of a specified set of "version-indicating" patterns. When a given +// group meets this criterion, consider the group and the remainder of the +// string to be the package version. +// +// Regex components of note: +// +// (?Ui) ... Sets the "U" and the "i" options for this Regex —— (ungreedy, +// and case-insensitive, respectively). "Ungreedy" is important so that the '*' that trails the package name +// component doesn't consume the rest of the string. +// +// [[:alpha:]][[:word:]]* ... Matches any word, and the word can include "word" characters ( +// which includes numbers and underscores), but the first character of the word MUST be a letter. +// +// (?:\.[[:alpha:]][[:word:]]*)* ... This looks redundant, but it's not. It +// extends the previous pattern such that the net effect of both components is +// that words can also include a period and more words (thus, when combined, not +// only is "something" matched, but so is "com.prefix.thing" +// +// (?:\d.*|(?:build\d*.*)|(?:rc?\d+(?:^[[:alpha:]].*)?)) ... +// This match group covers the "version-indicating" patterns mentioned in the above description. Given the pipes ( +// '|'), this functions as a series of 'OR'-joined conditions: +// +// \d.* ... "If it starts with a numeric digit, this is a version, no matter what follows." +// build\d*.* ... "If it starts with "build" and then a numeric digit immediately after, this is a version." +// rc?\d+(?:^[[:alpha:]].*)? ... "If it starts with "r" or "rc" and then one or more numeric digits immediately +// after, but no alpha characters right after that (in the same word), this is a version." +// +// Match examples: +// some-package-4.0.1 --> name="some-package", version="4.0.1" +// prefix.thing-4 --> name="prefix.thing", version="4" +// my-http2-server-5 --> name="my-http2-server", version="5" +// jetpack-build235-rc5 --> name="jetpack", version="build2.0-rc5" +// ironman-r4-2009 --> name="ironman", version="r4-2009" +var nameAndVersionPattern = regexp.MustCompile(`(?Ui)^(?P(?:[[:alpha:]][[:word:]]*(?:\.[[:alpha:]][[:word:]]*)*-?)+)(?:-(?P(?:\d.*|(?:build\d*.*)|(?:rc?\d+(?:^[[:alpha:]].*)?))))?$`) type archiveFilename struct { - raw string - fields []map[string]string + raw string + name string + version string +} + +// TODO: Remove this method once we're using Go 1.15+. +// +// Go 1.15 introduces a `SubexpIndex` method for the Regexp type that would let +// this code be made more elegant. Once we've reached 1.15, we should eliminate +// this function in favor of that method. +func subexpIndex(re *regexp.Regexp, name string) int { + for i, subexpName := range re.SubexpNames() { + if subexpName == name { + return i + } + } + + return -1 +} + +func getSubexp(matches []string, subexpName string, re *regexp.Regexp, raw string) string { + if len(matches) < 1 { + log.Warnf("unexpectedly empty matches for archive '%s'", raw) + return "" + } + + index := subexpIndex(re, subexpName) + if index < 1 { + log.Warnf("unexpected index of '%s' capture group for Java archive '%s'", subexpName, raw) + return "" + } + + // Prevent out-of-range panic + if len(matches) < index+1 { + log.Warnf("no match found for '%s' in '%s'", subexpName, matches[0]) + return "" + } + + return matches[index] } func newJavaArchiveFilename(raw string) archiveFilename { // trim the file extension and remove any path prefixes - name := strings.TrimSuffix(filepath.Base(raw), filepath.Ext(raw)) + cleanedFileName := strings.TrimSuffix(filepath.Base(raw), filepath.Ext(raw)) - matches := versionPattern.FindAllStringSubmatch(name, -1) - fields := make([]map[string]string, 0) - for _, match := range matches { - item := make(map[string]string) - for i, name := range versionPattern.SubexpNames() { - if i != 0 && name != "" { - item[name] = match[i] - } - } - fields = append(fields, item) - } + matches := nameAndVersionPattern.FindStringSubmatch(cleanedFileName) + + name := getSubexp(matches, "name", nameAndVersionPattern, raw) + version := getSubexp(matches, "version", nameAndVersionPattern, raw) return archiveFilename{ - raw: raw, - fields: fields, + raw: raw, + name: name, + version: version, } } @@ -66,21 +121,3 @@ func (a archiveFilename) pkgType() pkg.Type { return pkg.UnknownPkg } } - -func (a archiveFilename) version() string { - if len(a.fields) > 1 { - log.Warnf("discovered multiple name-version pairings from %q: %+v", a.raw, a.fields) - return "" - } else if len(a.fields) < 1 { - return "" - } - - return a.fields[0]["version"] -} - -func (a archiveFilename) name() string { - // derive the name from the archive name (no path or extension) and remove any versions found - basename := filepath.Base(a.raw) - cleaned := strings.TrimSuffix(basename, filepath.Ext(basename)) - return versionAreaPattern.ReplaceAllString(cleaned, "") -} diff --git a/syft/cataloger/java/archive_filename_test.go b/syft/cataloger/java/archive_filename_test.go index 972e6fb6f..917ff58bb 100644 --- a/syft/cataloger/java/archive_filename_test.go +++ b/syft/cataloger/java/archive_filename_test.go @@ -131,24 +131,24 @@ func TestExtractInfoFromJavaArchiveFilename(t *testing.T) { t.Errorf("mismatched type: %+v != %v", ty, test.ty) } - version := obj.version() + version := obj.version if version != test.version { dmp := diffmatchpatch.New() - diffs := dmp.DiffMain(version, test.version, true) + diffs := dmp.DiffMain(test.version, version, true) t.Errorf("mismatched version:\n%s", dmp.DiffPrettyText(diffs)) } extension := obj.extension() if extension != test.extension { dmp := diffmatchpatch.New() - diffs := dmp.DiffMain(extension, test.extension, true) + diffs := dmp.DiffMain(test.extension, extension, true) t.Errorf("mismatched extension:\n%s", dmp.DiffPrettyText(diffs)) } - name := obj.name() + name := obj.name if name != test.name { dmp := diffmatchpatch.New() - diffs := dmp.DiffMain(name, test.name, true) + diffs := dmp.DiffMain(test.name, name, true) t.Errorf("mismatched name:\n%s", dmp.DiffPrettyText(diffs)) } }) diff --git a/syft/cataloger/java/archive_parser_test.go b/syft/cataloger/java/archive_parser_test.go index 0586ee5fe..41b9b15ca 100644 --- a/syft/cataloger/java/archive_parser_test.go +++ b/syft/cataloger/java/archive_parser_test.go @@ -303,7 +303,7 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-boot-starter", - Version: "2.2.2", + Version: "2.2.2.RELEASE", }, { Name: "jul-to-slf4j", @@ -315,7 +315,7 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-boot-starter-validation", - Version: "2.2.2", + Version: "2.2.2.RELEASE", }, { Name: "hibernate-validator", @@ -327,7 +327,7 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-expression", - Version: "5.2.2", + Version: "5.2.2.RELEASE", }, { Name: "jakarta.validation-api", @@ -335,11 +335,11 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-web", - Version: "5.2.2", + Version: "5.2.2.RELEASE", }, { Name: "spring-boot-starter-actuator", - Version: "2.2.2", + Version: "2.2.2.RELEASE", }, { Name: "log4j-api", @@ -359,23 +359,23 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-aop", - Version: "5.2.2", + Version: "5.2.2.RELEASE", }, { Name: "spring-boot-actuator-autoconfigure", - Version: "2.2.2", + Version: "2.2.2.RELEASE", }, { Name: "spring-jcl", - Version: "5.2.2", + Version: "5.2.2.RELEASE", }, { Name: "spring-boot", - Version: "2.2.2", + Version: "2.2.2.RELEASE", }, { Name: "spring-boot-starter-logging", - Version: "2.2.2", + Version: "2.2.2.RELEASE", }, { Name: "jakarta.annotation-api", @@ -383,7 +383,7 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-webmvc", - Version: "5.2.2", + Version: "5.2.2.RELEASE", }, { Name: "HdrHistogram", @@ -391,7 +391,7 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-boot-starter-web", - Version: "2.2.2", + Version: "2.2.2.RELEASE", }, { Name: "logback-classic", @@ -403,7 +403,7 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-boot-starter-json", - Version: "2.2.2", + Version: "2.2.2.RELEASE", }, { Name: "jackson-databind", @@ -419,7 +419,7 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-boot-autoconfigure", - Version: "2.2.2", + Version: "2.2.2.RELEASE", }, { Name: "jackson-datatype-jdk8", @@ -435,11 +435,11 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-beans", - Version: "5.2.2", + Version: "5.2.2.RELEASE", }, { Name: "spring-boot-actuator", - Version: "2.2.2", + Version: "2.2.2.RELEASE", }, { Name: "slf4j-api", @@ -447,7 +447,7 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-core", - Version: "5.2.2", + Version: "5.2.2.RELEASE", }, { Name: "logback-core", @@ -467,7 +467,7 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-boot-starter-tomcat", - Version: "2.2.2", + Version: "2.2.2.RELEASE", }, { Name: "classmate", @@ -475,7 +475,7 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-context", - Version: "5.2.2", + Version: "5.2.2.RELEASE", }, }, }, @@ -509,32 +509,28 @@ func TestParseNestedJar(t *testing.T) { expectedNameVersionPairSet.Add(makeKey(&e)) } + actualNameVersionPairSet := internal.NewStringSet() + for _, a := range actual { + key := makeKey(&a) + actualNameVersionPairSet.Add(key) + if !expectedNameVersionPairSet.Contains(key) { + t.Errorf("extra package: %s", a) + } + } + + for _, key := range expectedNameVersionPairSet.ToSlice() { + if !actualNameVersionPairSet.Contains(key) { + t.Errorf("missing package: %s", key) + } + } + if len(actual) != len(expectedNameVersionPairSet) { - actualNameVersionPairSet := internal.NewStringSet() - for _, a := range actual { - key := makeKey(&a) - actualNameVersionPairSet.Add(key) - if !expectedNameVersionPairSet.Contains(key) { - t.Logf("extra package: %s", a) - } - } - - for _, key := range expectedNameVersionPairSet.ToSlice() { - if !actualNameVersionPairSet.Contains(key) { - t.Logf("missing package: %s", key) - } - } - t.Fatalf("unexpected package count: %d!=%d", len(actual), len(expectedNameVersionPairSet)) } for _, a := range actual { actualKey := makeKey(&a) - if !expectedNameVersionPairSet.Contains(actualKey) { - t.Errorf("unexpected pkg: %q", actualKey) - } - metadata := a.Metadata.(pkg.JavaMetadata) if actualKey == "spring-boot|0.0.1-SNAPSHOT" { if metadata.Parent != nil { diff --git a/syft/cataloger/java/java_manifest.go b/syft/cataloger/java/java_manifest.go index dac44384e..ba44aec58 100644 --- a/syft/cataloger/java/java_manifest.go +++ b/syft/cataloger/java/java_manifest.go @@ -93,8 +93,8 @@ func parseJavaManifest(path string, reader io.Reader) (*pkg.JavaManifest, error) func selectName(manifest *pkg.JavaManifest, filenameObj archiveFilename) string { var name string switch { - case filenameObj.name() != "": - name = filenameObj.name() + case filenameObj.name != "": + name = filenameObj.name case manifest.Main["Name"] != "": // Manifest original spec... name = manifest.Main["Name"] @@ -117,8 +117,8 @@ func selectName(manifest *pkg.JavaManifest, filenameObj archiveFilename) string func selectVersion(manifest *pkg.JavaManifest, filenameObj archiveFilename) string { var version string switch { - case filenameObj.version() != "": - version = filenameObj.version() + case filenameObj.version != "": + version = filenameObj.version case manifest.Main["Implementation-Version"] != "": version = manifest.Main["Implementation-Version"] case manifest.Main["Specification-Version"] != "":