From 51d38f8e592dbea672954c9d5c6c9f4bffc97b9b Mon Sep 17 00:00:00 2001 From: William Murphy Date: Thu, 31 Aug 2023 10:19:55 -0400 Subject: [PATCH] fix: in some cases, try to use pom info to guess name and version to top level jar (#2080) Otherwise, small renames like 'hudson-war-2.2.1.war' to 'hudson.war', would cause syft to incorrectly catolog the archive. Signed-off-by: Will Murphy --- syft/pkg/cataloger/java/archive_parser.go | 51 +++++++++++++++++-- .../regression_java_virtualpath_test.go | 36 +++++++++++++ .../Dockerfile | 7 +++ 3 files changed, 91 insertions(+), 3 deletions(-) create mode 100644 test/integration/regression_java_virtualpath_test.go create mode 100644 test/integration/test-fixtures/image-java-virtualpath-regression/Dockerfile diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index a9662be65..f6570cb79 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -149,7 +149,6 @@ func (j *archiveParser) parse() ([]pkg.Package, []artifact.Relationship, error) // discoverMainPackage parses the root Java manifest used as the parent package to all discovered nested packages. func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) { // search and parse java manifest files - // TODO: do we want to prefer or check for pom files over manifest here? manifestMatches := j.fileManifest.GlobMatch(manifestGlob) if len(manifestMatches) > 1 { return nil, fmt.Errorf("found multiple manifests in the jar: %+v", manifestMatches) @@ -186,9 +185,24 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) { // we use j.location because we want to associate the license declaration with where we discovered the contents in the manifest licenses := pkg.NewLicensesFromLocation(j.location, selectLicenses(manifest)...) + /* + We should name and version from, in this order: + 1. pom.properties if we find exactly 1 + 2. pom.xml if we find exactly 1 + 3. manifest + 4. filename + */ + name, version := j.guessMainPackageNameAndVersionFromPomInfo() + if name == "" { + name = selectName(manifest, j.fileInfo) + } + if version == "" { + version = selectVersion(manifest, j.fileInfo) + } return &pkg.Package{ - Name: selectName(manifest, j.fileInfo), - Version: selectVersion(manifest, j.fileInfo), + // TODO: maybe select name should just have a pom properties in it? + Name: name, + Version: version, Language: pkg.Java, Licenses: pkg.NewLicenseSet(licenses...), Locations: file.NewLocationSet( @@ -204,6 +218,37 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) { }, nil } +func (j *archiveParser) guessMainPackageNameAndVersionFromPomInfo() (string, string) { + pomPropertyMatches := j.fileManifest.GlobMatch(pomPropertiesGlob) + pomMatches := j.fileManifest.GlobMatch(pomXMLGlob) + var pomPropertiesObject pkg.PomProperties + var pomProjectObject pkg.PomProject + if len(pomPropertyMatches) == 1 || len(pomMatches) == 1 { + // we have exactly 1 pom.properties or pom.xml in the archive; assume it represents the + // package we're scanning if the names seem like a plausible match + properties, _ := pomPropertiesByParentPath(j.archivePath, j.location, pomPropertyMatches) + projects, _ := pomProjectByParentPath(j.archivePath, j.location, pomMatches) + + for parentPath, propertiesObj := range properties { + if propertiesObj.ArtifactID != "" && j.fileInfo.name != "" && strings.HasPrefix(propertiesObj.ArtifactID, j.fileInfo.name) { + pomPropertiesObject = propertiesObj + if proj, exists := projects[parentPath]; exists { + pomProjectObject = proj + } + } + } + } + name := pomPropertiesObject.ArtifactID + if name == "" { + name = pomProjectObject.ArtifactID + } + version := pomPropertiesObject.Version + if version == "" { + version = pomProjectObject.Version + } + return name, version +} + // discoverPkgsFromAllMavenFiles parses Maven POM properties/xml for a given // parent package, returning all listed Java packages found for each pom // properties discovered and potentially updating the given parentPkg with new diff --git a/test/integration/regression_java_virtualpath_test.go b/test/integration/regression_java_virtualpath_test.go new file mode 100644 index 000000000..659e5d017 --- /dev/null +++ b/test/integration/regression_java_virtualpath_test.go @@ -0,0 +1,36 @@ +package integration + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/source" +) + +func TestWarCatalogedCorrectlyIfRenamed(t *testing.T) { + // install hudson-war@2.2.1 and renames the file to `/hudson.war` + sbom, _ := catalogFixtureImage(t, "image-java-virtualpath-regression", source.SquashedScope, nil) + + badPURL := "pkg:maven/hudson/hudson@2.2.1" + goodPURL := "pkg:maven/org.jvnet.hudson.main/hudson-war@2.2.1" + foundCorrectPackage := false + badVirtualPath := "/hudson.war:org.jvnet.hudson.main:hudson-war" + goodVirtualPath := "/hudson.war" + for _, p := range sbom.Artifacts.Packages.Sorted() { + if p.Type == pkg.JavaPkg && strings.Contains(p.Name, "hudson") { + assert.NotEqual(t, badPURL, p.PURL, "must not find bad purl %q", badPURL) + virtPath := "" + if meta, ok := p.Metadata.(pkg.JavaMetadata); ok { + virtPath = meta.VirtualPath + if p.PURL == goodPURL && virtPath == goodVirtualPath { + foundCorrectPackage = true + } + } + assert.NotEqual(t, badVirtualPath, virtPath, "must not find bad virtual path %q", badVirtualPath) + } + } + assert.True(t, foundCorrectPackage, "must find correct package, but did not") +} diff --git a/test/integration/test-fixtures/image-java-virtualpath-regression/Dockerfile b/test/integration/test-fixtures/image-java-virtualpath-regression/Dockerfile new file mode 100644 index 000000000..a8d70f16a --- /dev/null +++ b/test/integration/test-fixtures/image-java-virtualpath-regression/Dockerfile @@ -0,0 +1,7 @@ +FROM alpine:latest + +RUN wget https://repo1.maven.org/maven2/org/jvnet/hudson/main/hudson-war/2.2.1/hudson-war-2.2.1.war + +RUN mv hudson-war-2.2.1.war hudson.war + +