diff --git a/syft/cataloger/java/archive_filename.go b/syft/cataloger/java/archive_filename.go index 0f50d99c5..85c4621cd 100644 --- a/syft/cataloger/java/archive_filename.go +++ b/syft/cataloger/java/archive_filename.go @@ -10,11 +10,20 @@ import ( "github.com/anchore/syft/syft/pkg" ) +// match on versions and anything after the version. This is used to isolate the name from the version. +// match examples: +// wagon-webdav-1.0.2-rc1-hudson.jar ---> -1.0.2-rc1-hudson.jar +// windows-remote-command-1.0.jar ---> -1.0.jar +// wstx-asl-1-2.jar ---> -1-2.jar +// guava-rc0.jar ---> -rc0.jar +var versionAreaPattern = regexp.MustCompile(`-(?P(\d+\.)?(\d+\.)?(r?c?\d+)(-[a-zA-Z0-9\-.]+)*)(?P.*)$`) + +// match on explicit versions. This is used for extracting version information. // match examples: // pkg-extra-field-4.3.2-rc1 --> match(name=pkg-extra-field version=4.3.2-rc1) // pkg-extra-field-4.3-rc1 --> match(name=pkg-extra-field version=4.3-rc1) // pkg-extra-field-4.3 --> match(name=pkg-extra-field version=4.3) -var versionPattern = regexp.MustCompile(`(?P.+)-(?P(\d+\.)?(\d+\.)?(\*|\d+)(-[a-zA-Z0-9\-\.]+)*)`) +var versionPattern = regexp.MustCompile(`-(?P(\d+\.)?(\d+\.)?(r?c?\d+)(-[a-zA-Z0-9\-.]+)*)`) type archiveFilename struct { raw string @@ -70,14 +79,8 @@ func (a archiveFilename) version() string { } func (a archiveFilename) name() string { - for _, fieldSet := range a.fields { - if name, ok := fieldSet["name"]; ok { - // return the first name - return name - } - } - - // derive the name from the archive name (no path or extension) + // derive the name from the archive name (no path or extension) and remove any versions found basename := filepath.Base(a.raw) - return strings.TrimSuffix(basename, filepath.Ext(basename)) + cleaned := strings.TrimSuffix(basename, filepath.Ext(basename)) + return versionAreaPattern.ReplaceAllString(cleaned, "") } diff --git a/syft/cataloger/java/archive_filename_test.go b/syft/cataloger/java/archive_filename_test.go index db79e61d9..553ce9c41 100644 --- a/syft/cataloger/java/archive_filename_test.go +++ b/syft/cataloger/java/archive_filename_test.go @@ -1,9 +1,10 @@ package java import ( + "testing" + "github.com/anchore/syft/syft/pkg" "github.com/sergi/go-diff/diffmatchpatch" - "testing" ) func TestExtractInfoFromJavaArchiveFilename(t *testing.T) { @@ -56,12 +57,78 @@ func TestExtractInfoFromJavaArchiveFilename(t *testing.T) { name: "pkg-extra-field-maven", ty: pkg.JenkinsPluginPkg, }, + { + filename: "/some/path-with-version-5.4.3/wagon-webdav-1.0.2-beta-2.2.3a-hudson.jar", + version: "1.0.2-beta-2.2.3a-hudson", + extension: "jar", + name: "wagon-webdav", + ty: pkg.JavaPkg, + }, + { + filename: "/some/path-with-version-5.4.3/wagon-webdav-1.0.2-beta-2.2.3-hudson.jar", + version: "1.0.2-beta-2.2.3-hudson", + extension: "jar", + name: "wagon-webdav", + ty: pkg.JavaPkg, + }, + { + filename: "/some/path-with-version-5.4.3/windows-remote-command-1.0.jar", + version: "1.0", + extension: "jar", + name: "windows-remote-command", + ty: pkg.JavaPkg, + }, + { + filename: "/some/path-with-version-5.4.3/wagon-http-lightweight-1.0.5-beta-2.jar", + version: "1.0.5-beta-2", + extension: "jar", + name: "wagon-http-lightweight", + ty: pkg.JavaPkg, + }, + { + filename: "/hudson.war:WEB-INF/lib/commons-jelly-1.1-hudson-20100305.jar", + version: "1.1-hudson-20100305", + extension: "jar", + name: "commons-jelly", + ty: pkg.JavaPkg, + }, + { + filename: "/hudson.war:WEB-INF/lib/jtidy-4aug2000r7-dev-hudson-1.jar", + // I don't see how we can reliably account for this case + //version: "4aug2000r7-dev-hudson-1", + version: "", + extension: "jar", + name: "jtidy", + ty: pkg.JavaPkg, + }, + { + filename: "/hudson.war:WEB-INF/lib/trilead-ssh2-build212-hudson-5.jar", + // I don't see how we can reliably account for this case + //version: "build212-hudson-5", + version: "5", + extension: "jar", + // name: "trilead-ssh2", + name: "trilead-ssh2-build212-hudson", + ty: pkg.JavaPkg, + }, + { + filename: "/hudson.war:WEB-INF/lib/guava-r06.jar", + version: "r06", + extension: "jar", + name: "guava", + ty: pkg.JavaPkg, + }, } for _, test := range tests { t.Run(test.filename, func(t *testing.T) { obj := newJavaArchiveFilename(test.filename) + ty := obj.pkgType() + if ty != test.ty { + t.Errorf("mismatched type: %+v != %v", ty, test.ty) + } + version := obj.version() if version != test.version { dmp := diffmatchpatch.New() diff --git a/syft/cataloger/java/archive_parser.go b/syft/cataloger/java/archive_parser.go index fa9e619cd..72f0ff79b 100644 --- a/syft/cataloger/java/archive_parser.go +++ b/syft/cataloger/java/archive_parser.go @@ -66,13 +66,17 @@ func newJavaArchiveParser(virtualPath string, reader io.Reader, detectNested boo return nil, cleanupFn, fmt.Errorf("unable to read files from java archive: %w", err) } + // fetch the last element of the virtual path + virtualElements := strings.Split(virtualPath, ":") + currentFilepath := virtualElements[len(virtualElements)-1] + return &archiveParser{ discoveredPkgs: internal.NewStringSet(), fileManifest: fileManifest, virtualPath: virtualPath, archivePath: archivePath, contentPath: contentPath, - fileInfo: newJavaArchiveFilename(virtualPath), + fileInfo: newJavaArchiveFilename(currentFilepath), detectNested: detectNested, }, cleanupFn, nil } @@ -182,6 +186,7 @@ func (j *archiveParser) discoverPkgsFromPomProperties(parentPkg *pkg.Package) ([ if !strings.HasPrefix(propsObj.ArtifactID, parentPkg.Name) { vPathSuffix += ":" + propsObj.ArtifactID } + virtualPath := j.virtualPath + vPathSuffix // discovered props = new package p := pkg.Package{ @@ -191,7 +196,7 @@ func (j *archiveParser) discoverPkgsFromPomProperties(parentPkg *pkg.Package) ([ Type: pkg.JavaPkg, MetadataType: pkg.JavaMetadataType, Metadata: pkg.JavaMetadata{ - VirtualPath: j.virtualPath + vPathSuffix, + VirtualPath: virtualPath, PomProperties: propsObj, Parent: parentPkg, }, @@ -199,16 +204,24 @@ func (j *archiveParser) discoverPkgsFromPomProperties(parentPkg *pkg.Package) ([ pkgKey := uniquePkgKey(&p) - if !j.discoveredPkgs.Contains(pkgKey) { - // only keep packages we haven't seen yet - pkgs = append(pkgs, p) - } else if pkgKey == parentKey { + if pkgKey == parentKey || parentPkg.Metadata.(pkg.JavaMetadata).VirtualPath == virtualPath || len(contents) == 1 { // we've run across more information about our parent package, add this info to the parent package metadata + // the pom properties is typically a better source of information for name and version than the manifest + if p.Name != parentPkg.Name { + parentPkg.Name = p.Name + } + if p.Version != parentPkg.Version { + parentPkg.Version = p.Version + } + parentMetadata, ok := parentPkg.Metadata.(pkg.JavaMetadata) if ok { parentMetadata.PomProperties = propsObj parentPkg.Metadata = parentMetadata } + } else if !j.discoveredPkgs.Contains(pkgKey) { + // only keep packages we haven't seen yet (and are not related to the parent package) + pkgs = append(pkgs, p) } } } diff --git a/syft/cataloger/java/java_manifest.go b/syft/cataloger/java/java_manifest.go index c4ca715e2..2eaf8aac2 100644 --- a/syft/cataloger/java/java_manifest.go +++ b/syft/cataloger/java/java_manifest.go @@ -70,7 +70,7 @@ func parseJavaManifest(path string, reader io.Reader) (*pkg.JavaManifest, error) if len(sections) > 0 { manifest.Main = sections[0] if len(sections) > 1 { - manifest.Sections = make(map[string]map[string]string) + manifest.NamedSections = make(map[string]map[string]string) for i, s := range sections[1:] { name, ok := s["Name"] if !ok { @@ -82,7 +82,7 @@ func parseJavaManifest(path string, reader io.Reader) (*pkg.JavaManifest, error) } else { delete(s, "Name") } - manifest.Sections[name] = s + manifest.NamedSections[name] = s } } } @@ -117,10 +117,10 @@ func selectName(manifest *pkg.JavaManifest, filenameObj archiveFilename) string func selectVersion(manifest *pkg.JavaManifest, filenameObj archiveFilename) string { var version string switch { - case manifest.Main["Implementation-Version"] != "": - version = manifest.Main["Implementation-Version"] case filenameObj.version() != "": version = filenameObj.version() + case manifest.Main["Implementation-Version"] != "": + version = manifest.Main["Implementation-Version"] case manifest.Main["Specification-Version"] != "": version = manifest.Main["Specification-Version"] case manifest.Main["Plugin-Version"] != "": diff --git a/syft/cataloger/java/java_manifest_test.go b/syft/cataloger/java/java_manifest_test.go index fd6d096fb..727346dc6 100644 --- a/syft/cataloger/java/java_manifest_test.go +++ b/syft/cataloger/java/java_manifest_test.go @@ -45,7 +45,7 @@ func TestParseJavaManifest(t *testing.T) { "Archiver-Version": "Plexus Archiver", "Created-By": "Apache Maven 3.6.3", }, - Sections: map[string]map[string]string{ + NamedSections: map[string]map[string]string{ "thing-1": { "Built-By": "?", }, diff --git a/syft/pkg/java_metadata.go b/syft/pkg/java_metadata.go index a4a9c7578..d4852eb55 100644 --- a/syft/pkg/java_metadata.go +++ b/syft/pkg/java_metadata.go @@ -22,8 +22,8 @@ type PomProperties struct { // JavaManifest represents the fields of interest extracted from a Java archive's META-INF/MANIFEST.MF file. type JavaManifest struct { - Main map[string]string `json:"main,omitempty"` - Sections map[string]map[string]string `json:"sections,omitempty"` + Main map[string]string `json:"main,omitempty"` + NamedSections map[string]map[string]string `json:"namedSections,omitempty"` } func (m JavaMetadata) PackageURL() string {