From 31e0fc36e3a66b39363726ce2db760ebe99b5762 Mon Sep 17 00:00:00 2001 From: William Murphy Date: Wed, 31 Jan 2024 08:21:33 -0500 Subject: [PATCH] fix: Better test for group ID in filename (#2565) This fixes an issue where filenames containing a period that aren't a group ID, such as some-jar.12.jar, would be mistakenly be reported as having the name "12" by syft, instead of the name "some-jar.12". It works by testing whether the parts of the filename split on "." are all valid Java identifiers. Signed-off-by: Will Murphy --- .../pkg/cataloger/java/parse_java_manifest.go | 30 ++++++++++++++++++- .../java/parse_java_manifest_test.go | 16 ++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/syft/pkg/cataloger/java/parse_java_manifest.go b/syft/pkg/cataloger/java/parse_java_manifest.go index 50088111f..3f9ca8cdb 100644 --- a/syft/pkg/cataloger/java/parse_java_manifest.go +++ b/syft/pkg/cataloger/java/parse_java_manifest.go @@ -6,6 +6,7 @@ import ( "io" "strconv" "strings" + "unicode" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/pkg" @@ -162,13 +163,40 @@ func extractNameFromArchiveFilename(a archiveFilename) string { return a.name } + // Maybe the filename is like groupid + . + artifactid. If so, return artifact id. fields := strings.Split(a.name, ".") - return fields[len(fields)-1] + maybeGroupID := true + for _, f := range fields { + if !isValidJavaIdentifier(f) { + maybeGroupID = false + break + } + } + if maybeGroupID { + return fields[len(fields)-1] + } } return a.name } +func isValidJavaIdentifier(field string) bool { + runes := []rune(field) + if len(runes) == 0 { + return false + } + // check whether first rune can start an identifier name in Java + // Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc] + // see https://developer.classpath.org/doc/java/lang/Character-source.html + // line 3295 + r := runes[0] + return unicode.Is(unicode.Lu, r) || + unicode.Is(unicode.Ll, r) || unicode.Is(unicode.Lt, r) || + unicode.Is(unicode.Lm, r) || unicode.Is(unicode.Lo, r) || + unicode.Is(unicode.Nl, r) || + unicode.Is(unicode.Sc, r) || unicode.Is(unicode.Pc, r) +} + func selectName(manifest *pkg.JavaManifest, filenameObj archiveFilename) string { name := extractNameFromApacheMavenBundlePlugin(manifest) if name != "" { diff --git a/syft/pkg/cataloger/java/parse_java_manifest_test.go b/syft/pkg/cataloger/java/parse_java_manifest_test.go index 7bca35bd8..cdd7f4864 100644 --- a/syft/pkg/cataloger/java/parse_java_manifest_test.go +++ b/syft/pkg/cataloger/java/parse_java_manifest_test.go @@ -209,6 +209,22 @@ func TestSelectName(t *testing.T) { archive: newJavaArchiveFilename("/something/com.atlassian.gadgets.atlassian-gadgets-api.jar"), expected: "atlassian-gadgets-api", }, + { + desc: "Filename has period that is not groupid + artifact id", + manifest: pkg.JavaManifest{ + Main: map[string]string{}, + }, + archive: newJavaArchiveFilename("/something/http4s-crypto_2.12-0.1.0.jar"), + expected: "http4s-crypto_2.12", + }, + { + desc: "Filename has period that is not groupid + artifact id, kafka", + manifest: pkg.JavaManifest{ + Main: map[string]string{}, + }, + archive: newJavaArchiveFilename("/something//kafka_2.13-3.2.2.jar"), + expected: "kafka_2.13", // see https://mvnrepository.com/artifact/org.apache.kafka/kafka_2.13/3.2.2 + }, { desc: "Skip stripping groupId prefix from archive filename for org.eclipse", manifest: pkg.JavaManifest{