feat: add support for detecting packages in JARs

Support for detection of packages present in a JAR has been added. It
can be enabeld via the `DetectContainedPackages` config flag.

Signed-off-by: Patrick Pichler <git@patrickpichler.dev>
This commit is contained in:
Patrick Pichler 2025-09-04 15:36:05 +02:00 committed by Patrick Pichler
parent f12788da78
commit 65e58ba33d
13 changed files with 4545 additions and 7 deletions

View File

@ -3,7 +3,7 @@ package internal
const ( const (
// JSONSchemaVersion is the current schema version output by the JSON encoder // JSONSchemaVersion is the current schema version output by the JSON encoder
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment. // This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
JSONSchemaVersion = "16.1.0" JSONSchemaVersion = "16.1.1"
// Changelog // Changelog
// 16.1.0 - reformulated the python pdm fields (added "URL" and removed the unused "path" field). // 16.1.0 - reformulated the python pdm fields (added "URL" and removed the unused "path" field).

View File

@ -29,6 +29,9 @@ func TestGlobMatch(t *testing.T) {
{"a*a*a*a*b", strings.Repeat("a", 100), false}, {"a*a*a*a*b", strings.Repeat("a", 100), false},
{"*x", "xxx", true}, {"*x", "xxx", true},
{"/home/place/**", "/home/place/a/thing", true}, {"/home/place/**", "/home/place/a/thing", true},
{"/org/test/**/*.class", "/org/test/system/files/Hello.class", true},
{"**/*.class", "/org/test/system/files/Hello.class", true},
{"**/*.class", "Hello.class", false},
} }
for _, test := range tests { for _, test := range tests {

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
{ {
"$schema": "https://json-schema.org/draft/2020-12/schema", "$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "anchore.io/schema/syft/json/16.1.0/document", "$id": "anchore.io/schema/syft/json/16.1.1/document",
"$ref": "#/$defs/Document", "$ref": "#/$defs/Document",
"$defs": { "$defs": {
"AlpmDbEntry": { "AlpmDbEntry": {
@ -1648,11 +1648,19 @@
}, },
"type": "array", "type": "array",
"description": "ArchiveDigests is cryptographic hashes of the archive file" "description": "ArchiveDigests is cryptographic hashes of the archive file"
},
"containedPackages": {
"items": {
"type": "string"
},
"type": "array",
"description": "ContainedPackages is a list of all package names contained in the jar"
} }
}, },
"type": "object", "type": "object",
"required": [ "required": [
"virtualPath" "virtualPath",
"containedPackages"
], ],
"description": "JavaArchive encapsulates all Java ecosystem metadata for a package as well as an (optional) parent relationship." "description": "JavaArchive encapsulates all Java ecosystem metadata for a package as well as an (optional) parent relationship."
}, },

View File

@ -261,6 +261,12 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
pkgPomProject = newPomProject(ctx, j.maven, parsedPom.path, parsedPom.project) pkgPomProject = newPomProject(ctx, j.maven, parsedPom.path, parsedPom.project)
} }
var containedPackages []string
if j.cfg.DetectContainedPackages {
containedPackages = j.discoverContainedPackages()
}
return &pkg.Package{ return &pkg.Package{
// TODO: maybe select name should just have a pom properties in it? // TODO: maybe select name should just have a pom properties in it?
Name: name, Name: name,
@ -272,10 +278,11 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
), ),
Type: j.fileInfo.pkgType(), Type: j.fileInfo.pkgType(),
Metadata: pkg.JavaArchive{ Metadata: pkg.JavaArchive{
VirtualPath: j.location.Path(), VirtualPath: j.location.Path(),
Manifest: manifest, Manifest: manifest,
PomProject: pkgPomProject, PomProject: pkgPomProject,
ArchiveDigests: digests, ArchiveDigests: digests,
ContainedPackages: containedPackages,
}, },
}, nil }, nil
} }
@ -880,3 +887,63 @@ func sortedIter[K cmp.Ordered, V any](values map[K]V) iter.Seq2[K, V] {
} }
} }
} }
// Tests if the given string is a valid multi-release version as specified by
// https://docs.oracle.com/en/java/javase/11/docs/specs/jar/jar.html#multi-release-jar-files
func isValidMultiReleaseVersion(s string) bool {
if s == "" {
return false
}
if s == "9" {
return true
}
// 0 is not allowed
if s[0] < '1' || s[0] > '9' {
return false
}
// Ony digits are allowed
return strings.IndexFunc(s, func(r rune) bool {
return r < '0' || r > '9'
}) != -1
}
func (j *archiveParser) discoverContainedPackages() []string {
pkgSet := strset.New()
classes := j.fileManifest.GlobMatch(false, "**/*.class")
for _, c := range classes {
parts := strings.Split(c, "/")
if len(parts) > 3 && parts[0] == "META-INF" && parts[1] == "versions" && isValidMultiReleaseVersion(parts[2]) {
// Strip the version specific prefix, as we are interested in all packages in the JAR.
parts = parts[3:]
}
// Ignore any unnamed packages.
if len(parts) <= 1 {
continue
}
// Skip any non version specific classes in META-INF and ignore WEB-INF.
if parts[0] == "META-INF" || parts[0] == "WEB-INF" {
continue
}
pkgName := strings.Join(parts[:len(parts)-1], ".")
pkgSet.Add(pkgName)
}
if pkgSet.Size() == 0 {
return nil
}
pkgs := pkgSet.List()
slices.Sort(pkgs)
return pkgs
}

View File

@ -204,6 +204,89 @@ func TestParseJar(t *testing.T) {
}, },
}, },
}, },
ContainedPackages: []string{
"hello",
"net.bytebuddy",
"net.bytebuddy.agent.builder",
"net.bytebuddy.asm",
"net.bytebuddy.build",
"net.bytebuddy.description",
"net.bytebuddy.description.annotation",
"net.bytebuddy.description.enumeration",
"net.bytebuddy.description.field",
"net.bytebuddy.description.method",
"net.bytebuddy.description.modifier",
"net.bytebuddy.description.type",
"net.bytebuddy.dynamic",
"net.bytebuddy.dynamic.loading",
"net.bytebuddy.dynamic.scaffold",
"net.bytebuddy.dynamic.scaffold.inline",
"net.bytebuddy.dynamic.scaffold.subclass",
"net.bytebuddy.implementation",
"net.bytebuddy.implementation.attribute",
"net.bytebuddy.implementation.auxiliary",
"net.bytebuddy.implementation.bind",
"net.bytebuddy.implementation.bind.annotation",
"net.bytebuddy.implementation.bytecode",
"net.bytebuddy.implementation.bytecode.assign",
"net.bytebuddy.implementation.bytecode.assign.primitive",
"net.bytebuddy.implementation.bytecode.assign.reference",
"net.bytebuddy.implementation.bytecode.collection",
"net.bytebuddy.implementation.bytecode.constant",
"net.bytebuddy.implementation.bytecode.member",
"net.bytebuddy.jar.asm",
"net.bytebuddy.jar.asm.commons",
"net.bytebuddy.jar.asm.signature",
"net.bytebuddy.matcher",
"net.bytebuddy.pool",
"net.bytebuddy.utility",
"net.bytebuddy.utility.dispatcher",
"net.bytebuddy.utility.nullability",
"net.bytebuddy.utility.privilege",
"net.bytebuddy.utility.visitor",
"org.joda.time",
"org.joda.time.base",
"org.joda.time.chrono",
"org.joda.time.convert",
"org.joda.time.field",
"org.joda.time.format",
"org.joda.time.tz",
},
},
},
"byte-buddy": {
Name: "byte-buddy",
Version: "1.17.5",
PURL: "pkg:maven/net.bytebuddy/byte-buddy@1.17.5",
Licenses: pkg.NewLicenseSet(),
Language: pkg.Java,
Type: pkg.JavaPkg,
Metadata: pkg.JavaArchive{
// ensure that nested packages with different names than that of the parent are appended as
// a suffix on the virtual path
VirtualPath: "test-fixtures/java-builds/packages/example-java-app-gradle-0.1.0.jar:net.bytebuddy:byte-buddy",
PomProperties: &pkg.JavaPomProperties{
Path: "META-INF/maven/net.bytebuddy/byte-buddy/pom.properties",
GroupID: "net.bytebuddy",
ArtifactID: "byte-buddy",
Version: "1.17.5",
},
PomProject: &pkg.JavaPomProject{
Path: "META-INF/maven/net.bytebuddy/byte-buddy/pom.xml",
Parent: &pkg.JavaPomParent{
GroupID: "net.bytebuddy",
ArtifactID: "byte-buddy-parent",
Version: "1.17.5",
},
GroupID: "net.bytebuddy",
ArtifactID: "byte-buddy",
Version: "1.17.5",
Name: "Byte Buddy (without dependencies)",
Description: "Byte Buddy is a Java library for creating Java classes at run time. " +
"This artifact is a build of Byte Buddy with all ASM dependencies repackaged " +
"into its own name space.",
URL: "",
},
// PomProject: &pkg.JavaPomProject{ // PomProject: &pkg.JavaPomProject{
// Path: "META-INF/maven/io.jenkins.plugins/example-jenkins-plugin/pom.xml", // Path: "META-INF/maven/io.jenkins.plugins/example-jenkins-plugin/pom.xml",
// Parent: &pkg.JavaPomParent{GroupID: "org.jenkins-ci.plugins", ArtifactID: "plugin", Version: "4.46"}, // Parent: &pkg.JavaPomParent{GroupID: "org.jenkins-ci.plugins", ArtifactID: "plugin", Version: "4.46"},
@ -315,6 +398,89 @@ func TestParseJar(t *testing.T) {
ArtifactID: "example-java-app-maven", ArtifactID: "example-java-app-maven",
Version: "0.1.0", Version: "0.1.0",
}, },
ContainedPackages: []string{
"hello",
"net.bytebuddy",
"net.bytebuddy.agent.builder",
"net.bytebuddy.asm",
"net.bytebuddy.build",
"net.bytebuddy.description",
"net.bytebuddy.description.annotation",
"net.bytebuddy.description.enumeration",
"net.bytebuddy.description.field",
"net.bytebuddy.description.method",
"net.bytebuddy.description.modifier",
"net.bytebuddy.description.type",
"net.bytebuddy.dynamic",
"net.bytebuddy.dynamic.loading",
"net.bytebuddy.dynamic.scaffold",
"net.bytebuddy.dynamic.scaffold.inline",
"net.bytebuddy.dynamic.scaffold.subclass",
"net.bytebuddy.implementation",
"net.bytebuddy.implementation.attribute",
"net.bytebuddy.implementation.auxiliary",
"net.bytebuddy.implementation.bind",
"net.bytebuddy.implementation.bind.annotation",
"net.bytebuddy.implementation.bytecode",
"net.bytebuddy.implementation.bytecode.assign",
"net.bytebuddy.implementation.bytecode.assign.primitive",
"net.bytebuddy.implementation.bytecode.assign.reference",
"net.bytebuddy.implementation.bytecode.collection",
"net.bytebuddy.implementation.bytecode.constant",
"net.bytebuddy.implementation.bytecode.member",
"net.bytebuddy.jar.asm",
"net.bytebuddy.jar.asm.commons",
"net.bytebuddy.jar.asm.signature",
"net.bytebuddy.matcher",
"net.bytebuddy.pool",
"net.bytebuddy.utility",
"net.bytebuddy.utility.dispatcher",
"net.bytebuddy.utility.nullability",
"net.bytebuddy.utility.privilege",
"net.bytebuddy.utility.visitor",
"org.joda.time",
"org.joda.time.base",
"org.joda.time.chrono",
"org.joda.time.convert",
"org.joda.time.field",
"org.joda.time.format",
"org.joda.time.tz",
},
},
},
"byte-buddy": {
Name: "byte-buddy",
Version: "1.17.5",
PURL: "pkg:maven/net.bytebuddy/byte-buddy@1.17.5",
Licenses: pkg.NewLicenseSet(),
Language: pkg.Java,
Type: pkg.JavaPkg,
Metadata: pkg.JavaArchive{
// ensure that nested packages with different names than that of the parent are appended as
// a suffix on the virtual path
VirtualPath: "test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.jar:net.bytebuddy:byte-buddy",
PomProperties: &pkg.JavaPomProperties{
Path: "META-INF/maven/net.bytebuddy/byte-buddy/pom.properties",
GroupID: "net.bytebuddy",
ArtifactID: "byte-buddy",
Version: "1.17.5",
},
PomProject: &pkg.JavaPomProject{
Path: "META-INF/maven/net.bytebuddy/byte-buddy/pom.xml",
Parent: &pkg.JavaPomParent{
GroupID: "net.bytebuddy",
ArtifactID: "byte-buddy-parent",
Version: "1.17.5",
},
GroupID: "net.bytebuddy",
ArtifactID: "byte-buddy",
Version: "1.17.5",
Name: "Byte Buddy (without dependencies)",
Description: "Byte Buddy is a Java library for creating Java classes at run time. " +
"This artifact is a build of Byte Buddy with all ASM dependencies repackaged " +
"into its own name space.",
URL: "",
},
}, },
}, },
"joda-time": { "joda-time": {
@ -371,6 +537,7 @@ func TestParseJar(t *testing.T) {
cfg := ArchiveCatalogerConfig{ cfg := ArchiveCatalogerConfig{
UseNetwork: false, UseNetwork: false,
UseMavenLocalRepository: false, UseMavenLocalRepository: false,
DetectContainedPackages: true,
} }
parser, cleanupFn, err := newJavaArchiveParser(context.Background(), parser, cleanupFn, err := newJavaArchiveParser(context.Background(),
file.LocationReadCloser{ file.LocationReadCloser{

View File

@ -33,6 +33,10 @@ type ArchiveCatalogerConfig struct {
// ResolveTransitiveDependencies enables resolving transitive dependencies for java packages found within archives. // ResolveTransitiveDependencies enables resolving transitive dependencies for java packages found within archives.
// app-config: java.resolve-transitive-dependencies // app-config: java.resolve-transitive-dependencies
ResolveTransitiveDependencies bool `yaml:"resolve-transitive-dependencies" json:"resolve-transitive-dependencies" mapstructure:"resolve-transitive-dependencies"` ResolveTransitiveDependencies bool `yaml:"resolve-transitive-dependencies" json:"resolve-transitive-dependencies" mapstructure:"resolve-transitive-dependencies"`
// DetectContainedPackages enables collecting all package names contained in a jar.
// app-config: java.detect-contained-packages
DetectContainedPackages bool `yaml:"detect-contained-packages" json:"detect-contained-packages" mapstructure:"detect-contained-packages"`
} }
func DefaultArchiveCatalogerConfig() ArchiveCatalogerConfig { func DefaultArchiveCatalogerConfig() ArchiveCatalogerConfig {
@ -45,6 +49,7 @@ func DefaultArchiveCatalogerConfig() ArchiveCatalogerConfig {
MavenBaseURL: strings.Join(mavenCfg.Repositories, ","), MavenBaseURL: strings.Join(mavenCfg.Repositories, ","),
MaxParentRecursiveDepth: mavenCfg.MaxParentRecursiveDepth, MaxParentRecursiveDepth: mavenCfg.MaxParentRecursiveDepth,
ResolveTransitiveDependencies: false, ResolveTransitiveDependencies: false,
DetectContainedPackages: false,
} }
} }
@ -81,6 +86,11 @@ func (j ArchiveCatalogerConfig) WithArchiveTraversal(search cataloging.ArchiveSe
return j return j
} }
func (j ArchiveCatalogerConfig) WithDetectContainedPackages(detectContainedPackages bool) ArchiveCatalogerConfig {
j.DetectContainedPackages = detectContainedPackages
return j
}
func (j ArchiveCatalogerConfig) mavenConfig() maven.Config { func (j ArchiveCatalogerConfig) mavenConfig() maven.Config {
return maven.Config{ return maven.Config{
UseNetwork: j.UseNetwork, UseNetwork: j.UseNetwork,

View File

@ -23,6 +23,7 @@ func Test_parseTarWrappedJavaArchive(t *testing.T) {
expected: []string{ expected: []string{
"example-java-app-maven", "example-java-app-maven",
"joda-time", "joda-time",
"byte-buddy",
}, },
}, },
{ {
@ -30,6 +31,7 @@ func Test_parseTarWrappedJavaArchive(t *testing.T) {
expected: []string{ expected: []string{
"example-java-app-maven", "example-java-app-maven",
"joda-time", "joda-time",
"byte-buddy",
}, },
}, },
} }

View File

@ -21,6 +21,7 @@ targetCompatibility = 1.8
dependencies { dependencies {
implementation "joda-time:joda-time:2.2" implementation "joda-time:joda-time:2.2"
implementation "net.bytebuddy:byte-buddy:1.17.5"
testImplementation "junit:junit:4.12" testImplementation "junit:junit:4.12"
} }
// end::dependencies[] // end::dependencies[]
@ -37,6 +38,8 @@ jar {
from { from {
configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) } configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) }
} }
duplicatesStrategy = DuplicatesStrategy.EXCLUDE
} }
// end::jar[] // end::jar[]

View File

@ -3,5 +3,6 @@
# This file is expected to be part of source control. # This file is expected to be part of source control.
joda-time:joda-time:2.2=compileClasspath,runtimeClasspath,testCompileClasspath,testRuntimeClasspath joda-time:joda-time:2.2=compileClasspath,runtimeClasspath,testCompileClasspath,testRuntimeClasspath
junit:junit:4.12=testCompileClasspath,testRuntimeClasspath junit:junit:4.12=testCompileClasspath,testRuntimeClasspath
net.bytebuddy:byte-buddy:1.17.5=compileClasspath,runtimeClasspath,testCompileClasspath,testRuntimeClasspath
org.hamcrest:hamcrest-core:1.3=testCompileClasspath,testRuntimeClasspath org.hamcrest:hamcrest-core:1.3=testCompileClasspath,testRuntimeClasspath
empty=annotationProcessor,testAnnotationProcessor empty=annotationProcessor,testAnnotationProcessor

View File

@ -14,6 +14,13 @@
</properties> </properties>
<dependencies> <dependencies>
<!-- tag::bytebuddy[] -->
<dependency>
<groupId>net.bytebuddy</groupId>
<artifactId>byte-buddy</artifactId>
<version>1.17.5</version>
</dependency>
<!-- end::bytebuddy[] -->
<!-- tag::joda[] --> <!-- tag::joda[] -->
<dependency> <dependency>
<groupId>joda-time</groupId> <groupId>joda-time</groupId>

View File

@ -22,6 +22,7 @@ func Test_parseZipWrappedJavaArchive(t *testing.T) {
expected: []string{ expected: []string{
"example-java-app-maven", "example-java-app-maven",
"joda-time", "joda-time",
"byte-buddy",
}, },
}, },
} }

View File

@ -114,6 +114,9 @@ type JavaArchive struct {
// ArchiveDigests is cryptographic hashes of the archive file // ArchiveDigests is cryptographic hashes of the archive file
ArchiveDigests []file.Digest `hash:"ignore" json:"digest,omitempty"` ArchiveDigests []file.Digest `hash:"ignore" json:"digest,omitempty"`
// ContainedPackages is a list of all package names contained in the jar
ContainedPackages []string `mapstructure:"ContainedPackages" json:"containedPackages"`
// Parent is reference to parent package (for nested archives) // Parent is reference to parent package (for nested archives)
Parent *Package `hash:"ignore" json:"-"` Parent *Package `hash:"ignore" json:"-"`
} }