feat: Pom xml only archive parser (#4272)

fix: identifying jar files with a single pom.xml and no pom.properties file
fix: test works with pom.xml being found, used and reported in metadata
Signed-off-by: Doug Clarke <douglas.clarke@oracle.com>

test: check for current project path and use
Signed-off-by: Christopher Phillips <spiffcs@users.noreply.github.com>
---------
Signed-off-by: Doug Clarke <douglas.clarke@oracle.com>
Signed-off-by: Christopher Phillips <spiffcs@users.noreply.github.com>
Co-authored-by: Christopher Phillips <spiffcs@users.noreply.github.com>
This commit is contained in:
Doug Clarke 2025-10-13 15:59:08 -04:00 committed by GitHub
parent 2d1ada1d00
commit 760bd9a50a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 215 additions and 30 deletions

View File

@ -257,10 +257,14 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
return nil, err
}
name, version, lics, err := j.discoverNameVersionLicense(ctx, manifest)
name, version, lics, parsedPom, err := j.discoverNameVersionLicense(ctx, manifest)
if err != nil {
return nil, err
}
var pkgPomProject *pkg.JavaPomProject
if parsedPom != nil {
pkgPomProject = newPomProject(ctx, nil, parsedPom.path, parsedPom.project)
}
return &pkg.Package{
// TODO: maybe select name should just have a pom properties in it?
@ -275,12 +279,13 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
Metadata: pkg.JavaArchive{
VirtualPath: j.location.Path(),
Manifest: manifest,
PomProject: pkgPomProject,
ArchiveDigests: digests,
},
}, nil
}
func (j *archiveParser) discoverNameVersionLicense(ctx context.Context, manifest *pkg.JavaManifest) (string, string, []pkg.License, error) {
func (j *archiveParser) discoverNameVersionLicense(ctx context.Context, manifest *pkg.JavaManifest) (string, string, []pkg.License, *parsedPomProject, error) {
// we use j.location because we want to associate the license declaration with where we discovered the contents in the manifest
// TODO: when we support locations of paths within archives we should start passing the specific manifest location object instead of the top jar
lics := pkg.NewLicensesFromLocationWithContext(ctx, j.location, selectLicenses(manifest)...)
@ -302,7 +307,7 @@ func (j *archiveParser) discoverNameVersionLicense(ctx context.Context, manifest
if len(lics) == 0 {
fileLicenses, err := j.getLicenseFromFileInArchive(ctx)
if err != nil {
return "", "", nil, err
return "", "", nil, parsedPom, err
}
if fileLicenses != nil {
lics = append(lics, fileLicenses...)
@ -317,7 +322,7 @@ func (j *archiveParser) discoverNameVersionLicense(ctx context.Context, manifest
lics = j.findLicenseFromJavaMetadata(ctx, groupID, artifactID, version, parsedPom, manifest)
}
return artifactID, version, lics, nil
return artifactID, version, lics, parsedPom, nil
}
// findLicenseFromJavaMetadata attempts to find license information from all available maven metadata properties and pom info
@ -387,43 +392,93 @@ type parsedPomProject struct {
// discoverMainPackageFromPomInfo attempts to resolve maven groupId, artifactId, version and other info from found pom information
func (j *archiveParser) discoverMainPackageFromPomInfo(ctx context.Context) (group, name, version string, parsedPom *parsedPomProject) {
var pomProperties pkg.JavaPomProperties
// Find the pom.properties/pom.xml if the names seem like a plausible match
properties, _ := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
projects, _ := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
// map of all the artifacts in the pom properties, in order to chek exact match with the filename
artifactsMap := j.buildArtifactsMap(properties)
pomProperties, parsedPom := j.findBestPomMatch(properties, projects, artifactsMap)
parsedPom = j.handleSinglePomXML(properties, projects, parsedPom)
return j.resolveIdentity(ctx, pomProperties, parsedPom)
}
func (j *archiveParser) buildArtifactsMap(properties map[string]pkg.JavaPomProperties) *strset.Set {
artifactsMap := strset.New()
for _, propertiesObj := range properties {
artifactsMap.Add(propertiesObj.ArtifactID)
}
return artifactsMap
}
func (j *archiveParser) findBestPomMatch(properties map[string]pkg.JavaPomProperties,
projects map[string]*parsedPomProject, artifactsMap *strset.Set) (pkg.JavaPomProperties, *parsedPomProject) {
var pomProperties pkg.JavaPomProperties
var parsedPom *parsedPomProject
for parentPath, propertiesObj := range sortedIter(properties) {
// the logic for selecting the best name is as follows:
// if we find an artifact id AND group id which are both contained in the filename
// OR if we have an artifact id that exactly matches the filename, prefer this
// OTHERWISE track the first matching pom properties with a pom.xml
// FINALLY return the first matching pom properties
if artifactIDMatchesFilename(propertiesObj.ArtifactID, j.fileInfo.name, artifactsMap) {
if pomProperties.ArtifactID == "" { // keep the first match, or overwrite if we find more specific entries
pomProperties = propertiesObj
}
if proj, exists := projects[parentPath]; exists {
if parsedPom == nil { // keep the first matching artifact if we don't find an exact match or groupid + artfiact id match
pomProperties = propertiesObj // set this, as it may not be the first entry found
parsedPom = proj
}
// if artifact ID is the entire filename or BOTH artifactID and groupID are contained in the artifact, prefer this match
if strings.Contains(j.fileInfo.name, propertiesObj.GroupID) || j.fileInfo.name == propertiesObj.ArtifactID {
pomProperties = propertiesObj // this is an exact match, use it
parsedPom = proj
break
}
}
if !artifactIDMatchesFilename(propertiesObj.ArtifactID, j.fileInfo.name, artifactsMap) {
continue
}
pomProperties, parsedPom = j.updateMatchIfBetter(pomProperties, parsedPom, propertiesObj, parentPath, projects)
if j.isExactMatch(propertiesObj, parsedPom) {
break
}
}
return pomProperties, parsedPom
}
func (j *archiveParser) updateMatchIfBetter(currentProps pkg.JavaPomProperties, currentPom *parsedPomProject,
newProps pkg.JavaPomProperties, parentPath string, projects map[string]*parsedPomProject) (pkg.JavaPomProperties, *parsedPomProject) {
// Keep the first match
if currentProps.ArtifactID == "" {
proj, hasProject := projects[parentPath]
if hasProject {
return newProps, proj
}
return newProps, currentPom
}
proj, hasProject := projects[parentPath]
if !hasProject {
return currentProps, currentPom
}
// Keep the first matching artifact with a pom.xml
if currentPom == nil {
return newProps, proj
}
// Prefer exact matches
if j.isExactMatch(newProps, proj) {
return newProps, proj
}
return currentProps, currentPom
}
func (j *archiveParser) isExactMatch(props pkg.JavaPomProperties, pom *parsedPomProject) bool {
if pom == nil {
return false
}
return strings.Contains(j.fileInfo.name, props.GroupID) || j.fileInfo.name == props.ArtifactID
}
func (j *archiveParser) handleSinglePomXML(properties map[string]pkg.JavaPomProperties,
projects map[string]*parsedPomProject, currentPom *parsedPomProject) *parsedPomProject {
if len(properties) == 0 && len(projects) == 1 {
for _, projectsObj := range projects {
return projectsObj
}
}
return currentPom
}
func (j *archiveParser) resolveIdentity(ctx context.Context, pomProperties pkg.JavaPomProperties,
parsedPom *parsedPomProject) (group, name, version string, pom *parsedPomProject) {
group = pomProperties.GroupID
name = pomProperties.ArtifactID
version = pomProperties.Version

View File

@ -79,9 +79,12 @@ func TestSearchMavenForLicenses(t *testing.T) {
ReadCloser: fixture,
}, tc.detectNested, tc.config)
defer cleanupFn()
require.NoError(t, err)
// assert licenses are discovered from upstream
_, _, _, parsedPom := ap.discoverMainPackageFromPomInfo(context.Background())
require.NotNil(t, parsedPom, "expected to find pom information in the fixture")
require.NotNil(t, parsedPom.project, "expected parsedPom to have a project")
resolvedLicenses, _ := ap.maven.ResolveLicenses(context.Background(), parsedPom.project)
assert.Equal(t, tc.expectedLicenses, toPkgLicenses(ctx, nil, resolvedLicenses))
})
@ -148,10 +151,23 @@ func TestParseJar(t *testing.T) {
},
PomProperties: &pkg.JavaPomProperties{
Path: "META-INF/maven/io.jenkins.plugins/example-jenkins-plugin/pom.properties",
Name: "",
GroupID: "io.jenkins.plugins",
ArtifactID: "example-jenkins-plugin",
Version: "1.0-SNAPSHOT",
},
PomProject: &pkg.JavaPomProject{
Path: "META-INF/maven/io.jenkins.plugins/example-jenkins-plugin/pom.xml",
Name: "Example Jenkins Plugin",
GroupID: "io.jenkins.plugins",
ArtifactID: "example-jenkins-plugin",
Version: "1.0-SNAPSHOT",
Parent: &pkg.JavaPomParent{
GroupID: "org.jenkins-ci.plugins",
ArtifactID: "plugin",
Version: "4.46",
},
},
},
},
},
@ -189,6 +205,14 @@ func TestParseJar(t *testing.T) {
},
},
},
// PomProject: &pkg.JavaPomProject{
// Path: "META-INF/maven/io.jenkins.plugins/example-jenkins-plugin/pom.xml",
// Parent: &pkg.JavaPomParent{GroupID: "org.jenkins-ci.plugins", ArtifactID: "plugin", Version: "4.46"},
// GroupID: "io.jenkins.plugins",
// ArtifactID: "example-jenkins-plugin",
// Version: "1.0-SNAPSHOT",
// Name: "Example Jenkins Plugin",
// },
},
},
"joda-time": {
@ -286,6 +310,12 @@ func TestParseJar(t *testing.T) {
ArtifactID: "example-java-app-maven",
Version: "0.1.0",
},
PomProject: &pkg.JavaPomProject{
Path: "META-INF/maven/org.anchore/example-java-app-maven/pom.xml",
GroupID: "org.anchore",
ArtifactID: "example-java-app-maven",
Version: "0.1.0",
},
},
},
"joda-time": {
@ -1127,6 +1157,13 @@ func Test_parseJavaArchive_regressions(t *testing.T) {
GroupID: "org.apache.directory.api",
ArtifactID: "api-all",
Version: "2.0.0",
}, PomProject: &pkg.JavaPomProject{
Path: "META-INF/maven/org.apache.directory.api/api-all/pom.xml",
ArtifactID: "api-all",
GroupID: "org.apache.directory.api",
Version: "2.0.0",
Name: "Apache Directory API All",
Parent: &pkg.JavaPomParent{GroupID: "org.apache.directory.api", ArtifactID: "api-parent", Version: "2.0.0"},
},
},
}
@ -1163,6 +1200,46 @@ func Test_parseJavaArchive_regressions(t *testing.T) {
},
}
micronautAop := pkg.Package{
Name: "micronaut-aop",
Version: "4.9.11",
PURL: "pkg:maven/io.micronaut/micronaut-aop@4.9.11",
Locations: file.NewLocationSet(file.NewLocation("test-fixtures/jar-metadata/cache/micronaut-aop-4.9.11.jar")),
Type: pkg.JavaPkg,
Language: pkg.Java,
Metadata: pkg.JavaArchive{
VirtualPath: "test-fixtures/jar-metadata/cache/micronaut-aop-4.9.11.jar",
Manifest: &pkg.JavaManifest{
Main: []pkg.KeyValue{
{
Key: "Manifest-Version",
Value: "1.0",
},
{
Key: "Automatic-Module-Name",
Value: "io.micronaut.micronaut_aop",
},
{
Key: "Implementation-Version",
Value: "4.9.11",
},
{
Key: "Implementation-Title",
Value: "Micronaut Core",
},
},
}, PomProject: &pkg.JavaPomProject{
Path: "META-INF/maven/io.micronaut/micronaut-aop/pom.xml",
ArtifactID: "micronaut-aop",
GroupID: "io.micronaut",
Version: "4.9.11",
Name: "Micronaut Core",
Description: "Core components supporting the Micronaut Framework",
URL: "https://micronaut.io",
},
},
}
tests := []struct {
name string
fixtureName string
@ -1220,6 +1297,16 @@ func Test_parseJavaArchive_regressions(t *testing.T) {
{Key: "Specification-Version", Value: "2.15.2"},
},
},
PomProject: &pkg.JavaPomProject{
Path: "META-INF/maven/com.fasterxml.jackson.core/jackson-core/pom.xml",
ArtifactID: "jackson-core",
GroupID: "com.fasterxml.jackson.core",
Version: "2.15.2",
Name: "Jackson-core",
Description: "Core Jackson processing abstractions (aka Streaming API), implementation for JSON",
URL: "https://github.com/FasterXML/jackson-core",
Parent: &pkg.JavaPomParent{GroupID: "com.fasterxml.jackson", ArtifactID: "jackson-base", Version: "2.15.2"},
},
// not under test
//ArchiveDigests: []file.Digest{{Algorithm: "sha1", Value: "d8bc1d9c428c96fe447e2c429fc4304d141024df"}},
},
@ -1275,6 +1362,16 @@ func Test_parseJavaArchive_regressions(t *testing.T) {
{Key: "Specification-Version", Value: "2.15.2"},
},
},
PomProject: &pkg.JavaPomProject{
Path: "META-INF/maven/com.fasterxml.jackson.core/jackson-core/pom.xml",
ArtifactID: "jackson-core",
GroupID: "com.fasterxml.jackson.core",
Version: "2.15.2",
Name: "Jackson-core",
Description: "Core Jackson processing abstractions (aka Streaming API), implementation for JSON",
URL: "https://github.com/FasterXML/jackson-core",
Parent: &pkg.JavaPomParent{GroupID: "com.fasterxml.jackson", ArtifactID: "jackson-base", Version: "2.15.2"},
},
// not under test
//ArchiveDigests: []file.Digest{{Algorithm: "sha1", Value: "abd3e329270fc54a2acaceb45420fd5710ecefd5"}},
},
@ -1341,6 +1438,14 @@ func Test_parseJavaArchive_regressions(t *testing.T) {
},
},
},
{
name: "micronaut-aop",
fixtureName: "micronaut-aop-4.9.11",
fileExtension: "jar",
expectedPkgs: []pkg.Package{
micronautAop,
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {

View File

@ -13,6 +13,7 @@ API_ALL_SOURCES = api-all-2.0.0-sources
SPRING_INSTRUMENTATION = spring-instrumentation-4.3.0-1.0
MULTIPLE_MATCHING = multiple-matching-2.11.5
ORG_MULTIPLE_THENAME = org.multiple-thename
MICRONAUT_AOP = micronaut-aop-4.9.11
.DEFAULT_GOAL := fixtures
@ -23,7 +24,7 @@ fixtures: $(CACHE_DIR)
# requirement 2: 'fingerprint' goal to determine if the fixture input that indicates any existing cache should be busted
fingerprint: $(FINGERPRINT_FILE)
$(CACHE_DIR): $(CACHE_DIR)/$(JACKSON_CORE).jar $(CACHE_DIR)/$(SBT_JACKSON_CORE).jar $(CACHE_DIR)/$(OPENSAML_CORE).jar $(CACHE_DIR)/$(API_ALL_SOURCES).jar $(CACHE_DIR)/$(SPRING_INSTRUMENTATION).jar $(CACHE_DIR)/$(MULTIPLE_MATCHING).jar
$(CACHE_DIR): $(CACHE_DIR)/$(JACKSON_CORE).jar $(CACHE_DIR)/$(SBT_JACKSON_CORE).jar $(CACHE_DIR)/$(OPENSAML_CORE).jar $(CACHE_DIR)/$(API_ALL_SOURCES).jar $(CACHE_DIR)/$(SPRING_INSTRUMENTATION).jar $(CACHE_DIR)/$(MULTIPLE_MATCHING).jar $(CACHE_DIR)/$(MICRONAUT_AOP).jar
$(CACHE_DIR)/$(JACKSON_CORE).jar:
mkdir -p $(CACHE_DIR)
@ -53,6 +54,10 @@ $(CACHE_DIR)/$(ORG_MULTIPLE_THENAME).jar:
mkdir -p $(CACHE_DIR)
cd $(ORG_MULTIPLE_THENAME) && zip -r $(CACHE_PATH)/$(ORG_MULTIPLE_THENAME).jar .
$(CACHE_DIR)/$(MICRONAUT_AOP).jar:
mkdir -p $(CACHE_DIR)
cd $(MICRONAUT_AOP) && zip -r $(CACHE_PATH)/$(MICRONAUT_AOP).jar .
# Jenkins plugins typically do not have the version included in the archive name,
# so it is important to not include it in the generated test fixture
$(CACHE_DIR)/gradle.hpi:

View File

@ -0,0 +1,5 @@
Manifest-Version: 1.0
Automatic-Module-Name: io.micronaut.micronaut_aop
Implementation-Version: 4.9.11
Implementation-Title: Micronaut Core

View File

@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<!-- This module was also published with a richer model, Gradle metadata, -->
<!-- which should be used instead. Do not delete the following line which -->
<!-- is to indicate to Gradle or any Gradle module metadata file consumer -->
<!-- that they should prefer consuming it instead. -->
<!-- do_not_remove: published-with-gradle-metadata -->
<modelVersion>4.0.0</modelVersion>
<groupId>io.micronaut</groupId>
<artifactId>micronaut-aop</artifactId>
<version>4.9.11</version>
<name>Micronaut Core</name>
<description>Core components supporting the Micronaut Framework</description>
<url>https://micronaut.io</url>
</project>