fix: nondeterministic Java archive cataloging and improve groupID (#4118)

Signed-off-by: Keith Zantow <kzantow@gmail.com>
This commit is contained in:
Keith Zantow 2025-08-07 10:55:10 -04:00 committed by GitHub
parent d4d311155f
commit 8c6a2bcbb6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 78 additions and 17 deletions

View File

@ -1,15 +1,18 @@
package java package java
import ( import (
"cmp"
"context" "context"
"crypto" "crypto"
"fmt" "fmt"
"io" "io"
"iter"
"os" "os"
"path" "path"
"slices" "slices"
"strings" "strings"
"github.com/scylladb/go-set/strset"
"golang.org/x/exp/maps" "golang.org/x/exp/maps"
"github.com/anchore/syft/internal" "github.com/anchore/syft/internal"
@ -391,20 +394,32 @@ func (j *archiveParser) discoverMainPackageFromPomInfo(ctx context.Context) (gro
projects, _ := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob)) projects, _ := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
// map of all the artifacts in the pom properties, in order to chek exact match with the filename // map of all the artifacts in the pom properties, in order to chek exact match with the filename
artifactsMap := make(map[string]bool) artifactsMap := strset.New()
for _, propertiesObj := range properties { for _, propertiesObj := range properties {
artifactsMap[propertiesObj.ArtifactID] = true artifactsMap.Add(propertiesObj.ArtifactID)
} }
parentPaths := maps.Keys(properties) for parentPath, propertiesObj := range sortedIter(properties) {
slices.Sort(parentPaths) // the logic for selecting the best name is as follows:
for _, parentPath := range parentPaths { // if we find an artifact id AND group id which are both contained in the filename
propertiesObj := properties[parentPath] // OR if we have an artifact id that exactly matches the filename, prefer this
// OTHERWISE track the first matching pom properties with a pom.xml
// FINALLY return the first matching pom properties
if artifactIDMatchesFilename(propertiesObj.ArtifactID, j.fileInfo.name, artifactsMap) { if artifactIDMatchesFilename(propertiesObj.ArtifactID, j.fileInfo.name, artifactsMap) {
pomProperties = propertiesObj if pomProperties.ArtifactID == "" { // keep the first match, or overwrite if we find more specific entries
pomProperties = propertiesObj
}
if proj, exists := projects[parentPath]; exists { if proj, exists := projects[parentPath]; exists {
parsedPom = proj if parsedPom == nil { // keep the first matching artifact if we don't find an exact match or groupid + artfiact id match
break pomProperties = propertiesObj // set this, as it may not be the first entry found
parsedPom = proj
}
// if artifact ID is the entire filename or BOTH artifactID and groupID are contained in the artifact, prefer this match
if strings.Contains(j.fileInfo.name, propertiesObj.GroupID) || j.fileInfo.name == propertiesObj.ArtifactID {
pomProperties = propertiesObj // this is an exact match, use it
parsedPom = proj
break
}
} }
} }
} }
@ -429,12 +444,13 @@ func (j *archiveParser) discoverMainPackageFromPomInfo(ctx context.Context) (gro
return group, name, version, parsedPom return group, name, version, parsedPom
} }
func artifactIDMatchesFilename(artifactID, fileName string, artifactsMap map[string]bool) bool { // artifactIDMatchesFilename returns true if one starts with the other
func artifactIDMatchesFilename(artifactID, fileName string, artifactsMap *strset.Set) bool {
if artifactID == "" || fileName == "" { if artifactID == "" || fileName == "" {
return false return false
} }
// Ensure true is returned when filename matches the artifact ID, prevent random retrieval by checking prefix and suffix // Ensure true is returned when filename matches the artifact ID, prevent random retrieval by checking prefix and suffix
if _, exists := artifactsMap[fileName]; exists { if artifactsMap.Has(fileName) {
return artifactID == fileName return artifactID == fileName
} }
// Use fallback check with suffix and prefix if no POM properties file matches the exact artifact name // Use fallback check with suffix and prefix if no POM properties file matches the exact artifact name
@ -464,7 +480,7 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(ctx context.Context, paren
return nil, err return nil, err
} }
for parentPath, propertiesObj := range properties { for parentPath, propertiesObj := range sortedIter(properties) {
var parsedPom *parsedPomProject var parsedPom *parsedPomProject
if proj, exists := projects[parentPath]; exists { if proj, exists := projects[parentPath]; exists {
parsedPom = proj parsedPom = proj
@ -546,7 +562,7 @@ func discoverPkgsFromOpeners(ctx context.Context, location file.Location, opener
var pkgs []pkg.Package var pkgs []pkg.Package
var relationships []artifact.Relationship var relationships []artifact.Relationship
for pathWithinArchive, archiveOpener := range openers { for pathWithinArchive, archiveOpener := range sortedIter(openers) {
nestedPkgs, nestedRelationships, err := discoverPkgsFromOpener(ctx, location, pathWithinArchive, archiveOpener, cfg, parentPkg) nestedPkgs, nestedRelationships, err := discoverPkgsFromOpener(ctx, location, pathWithinArchive, archiveOpener, cfg, parentPkg)
if err != nil { if err != nil {
log.WithFields("location", location.Path(), "error", err).Debug("unable to discover java packages from opener") log.WithFields("location", location.Path(), "error", err).Debug("unable to discover java packages from opener")
@ -604,7 +620,7 @@ func pomPropertiesByParentPath(archivePath string, location file.Location, extra
} }
propertiesByParentPath := make(map[string]pkg.JavaPomProperties) propertiesByParentPath := make(map[string]pkg.JavaPomProperties)
for filePath, fileContents := range contentsOfMavenPropertiesFiles { for filePath, fileContents := range sortedIter(contentsOfMavenPropertiesFiles) {
pomProperties, err := parsePomProperties(filePath, strings.NewReader(fileContents)) pomProperties, err := parsePomProperties(filePath, strings.NewReader(fileContents))
if err != nil { if err != nil {
log.WithFields("contents-path", filePath, "location", location.Path(), "error", err).Debug("failed to parse pom.properties") log.WithFields("contents-path", filePath, "location", location.Path(), "error", err).Debug("failed to parse pom.properties")
@ -633,7 +649,7 @@ func pomProjectByParentPath(archivePath string, location file.Location, extractP
} }
projectByParentPath := make(map[string]*parsedPomProject) projectByParentPath := make(map[string]*parsedPomProject)
for filePath, fileContents := range contentsOfMavenProjectFiles { for filePath, fileContents := range sortedIter(contentsOfMavenProjectFiles) {
// TODO: when we support locations of paths within archives we should start passing the specific pom.xml location object instead of the top jar // TODO: when we support locations of paths within archives we should start passing the specific pom.xml location object instead of the top jar
pom, err := maven.ParsePomXML(strings.NewReader(fileContents)) pom, err := maven.ParsePomXML(strings.NewReader(fileContents))
if err != nil { if err != nil {
@ -784,3 +800,15 @@ func updateParentPackage(p pkg.Package, parentPkg *pkg.Package) {
parentPkg.Metadata = parentMetadata parentPkg.Metadata = parentMetadata
} }
} }
func sortedIter[K cmp.Ordered, V any](values map[K]V) iter.Seq2[K, V] {
return func(yield func(K, V) bool) {
keys := maps.Keys(values)
slices.Sort(keys)
for _, key := range keys {
if !yield(key, values[key]) {
return
}
}
}
}

View File

@ -1086,7 +1086,7 @@ func Test_artifactIDMatchesFilename(t *testing.T) {
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.want, artifactIDMatchesFilename(tt.artifactID, tt.fileName, nil)) assert.Equal(t, tt.want, artifactIDMatchesFilename(tt.artifactID, tt.fileName, strset.New()))
}) })
} }
} }
@ -1382,6 +1382,10 @@ func Test_deterministicMatchingPomProperties(t *testing.T) {
fixture: "multiple-matching-2.11.5", fixture: "multiple-matching-2.11.5",
expected: maven.NewID("org.multiple", "multiple-matching-1", "2.11.5"), expected: maven.NewID("org.multiple", "multiple-matching-1", "2.11.5"),
}, },
{
fixture: "org.multiple-thename",
expected: maven.NewID("org.multiple", "thename", "10.11.12"),
},
} }
for _, test := range tests { for _, test := range tests {

View File

@ -12,6 +12,7 @@ OPENSAML_CORE = opensaml-core-3.4.6
API_ALL_SOURCES = api-all-2.0.0-sources API_ALL_SOURCES = api-all-2.0.0-sources
SPRING_INSTRUMENTATION = spring-instrumentation-4.3.0-1.0 SPRING_INSTRUMENTATION = spring-instrumentation-4.3.0-1.0
MULTIPLE_MATCHING = multiple-matching-2.11.5 MULTIPLE_MATCHING = multiple-matching-2.11.5
ORG_MULTIPLE_THENAME = org.multiple-thename
.DEFAULT_GOAL := fixtures .DEFAULT_GOAL := fixtures
@ -48,6 +49,10 @@ $(CACHE_DIR)/$(MULTIPLE_MATCHING).jar:
mkdir -p $(CACHE_DIR) mkdir -p $(CACHE_DIR)
cd $(MULTIPLE_MATCHING) && zip -r $(CACHE_PATH)/$(MULTIPLE_MATCHING).jar . cd $(MULTIPLE_MATCHING) && zip -r $(CACHE_PATH)/$(MULTIPLE_MATCHING).jar .
$(CACHE_DIR)/$(ORG_MULTIPLE_THENAME).jar:
mkdir -p $(CACHE_DIR)
cd $(ORG_MULTIPLE_THENAME) && zip -r $(CACHE_PATH)/$(ORG_MULTIPLE_THENAME).jar .
# Jenkins plugins typically do not have the version included in the archive name, # Jenkins plugins typically do not have the version included in the archive name,
# so it is important to not include it in the generated test fixture # so it is important to not include it in the generated test fixture
$(CACHE_DIR)/gradle.hpi: $(CACHE_DIR)/gradle.hpi:

View File

@ -0,0 +1,2 @@
Manifest-Version: 1.0
Created-By: Multi

View File

@ -0,0 +1,3 @@
version=10.11.12
groupId=com.multiple
artifactId=thename

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.multiple</groupId>
<artifactId>thename</artifactId>
<version>10.11.12</version>
</project>

View File

@ -0,0 +1,3 @@
version=10.11.12
groupId=org.multiple
artifactId=thename

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.multiple</groupId>
<artifactId>thename</artifactId>
<version>10.11.12</version>
</project>