Read a license from a parent pom stored in Maven Central (#2228)

* Read a license from a parent pom stored in Maven Central
---------
Signed-off-by: Colm O hEigeartaigh <coheigea@apache.org>
Signed-off-by: Christopher Phillips <christopher.phillips@anchore.com>
Co-authored-by: Christopher Phillips <christopher.phillips@anchore.com>
This commit is contained in:
Colm O hEigeartaigh 2023-10-30 21:48:16 +00:00 committed by GitHub
parent 262423b1e8
commit 78ac2f9797
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 264 additions and 41 deletions

View File

@ -601,6 +601,15 @@ golang:
# if unset this defaults to $GONOPROXY # if unset this defaults to $GONOPROXY
# SYFT_GOLANG_NOPROXY env var # SYFT_GOLANG_NOPROXY env var
no-proxy: "" no-proxy: ""
java:
# when running across pom.xml files that could have more information, syft will
# explicitly search maven for license information by querying the online pom when this is true eg:
# https://repo1.maven.org/maven2/org/springframework/boot/spring-boot-starter-test/3.1.5/spring-boot-starter-test-3.1.5.pom
# this option is helpful for when the parent pom has this information,
# but it is not accessible from within the final built artifact
search-maven-for-licenses: false
maven-url: "https://repo1.maven.org/maven2"
linux-kernel: linux-kernel:
# whether to catalog linux kernel modules found within lib/modules/** directories # whether to catalog linux kernel modules found within lib/modules/** directories

View File

@ -14,6 +14,7 @@ import (
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/pkg/cataloger" "github.com/anchore/syft/syft/pkg/cataloger"
golangCataloger "github.com/anchore/syft/syft/pkg/cataloger/golang" golangCataloger "github.com/anchore/syft/syft/pkg/cataloger/golang"
javaCataloger "github.com/anchore/syft/syft/pkg/cataloger/java"
"github.com/anchore/syft/syft/pkg/cataloger/kernel" "github.com/anchore/syft/syft/pkg/cataloger/kernel"
pythonCataloger "github.com/anchore/syft/syft/pkg/cataloger/python" pythonCataloger "github.com/anchore/syft/syft/pkg/cataloger/python"
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
@ -23,6 +24,7 @@ type Catalog struct {
Catalogers []string `yaml:"catalogers" json:"catalogers" mapstructure:"catalogers"` Catalogers []string `yaml:"catalogers" json:"catalogers" mapstructure:"catalogers"`
Package pkg `yaml:"package" json:"package" mapstructure:"package"` Package pkg `yaml:"package" json:"package" mapstructure:"package"`
Golang golang `yaml:"golang" json:"golang" mapstructure:"golang"` Golang golang `yaml:"golang" json:"golang" mapstructure:"golang"`
Java java `yaml:"java" json:"java" mapstructure:"java"`
LinuxKernel linuxKernel `yaml:"linux-kernel" json:"linux-kernel" mapstructure:"linux-kernel"` LinuxKernel linuxKernel `yaml:"linux-kernel" json:"linux-kernel" mapstructure:"linux-kernel"`
Python python `yaml:"python" json:"python" mapstructure:"python"` Python python `yaml:"python" json:"python" mapstructure:"python"`
FileMetadata fileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"` FileMetadata fileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"`
@ -137,6 +139,9 @@ func (cfg Catalog) ToCatalogerConfig() cataloger.Config {
LinuxKernel: kernel.LinuxCatalogerConfig{ LinuxKernel: kernel.LinuxCatalogerConfig{
CatalogModules: cfg.LinuxKernel.CatalogModules, CatalogModules: cfg.LinuxKernel.CatalogModules,
}, },
Java: javaCataloger.DefaultCatalogerOpts().
WithSearchMavenForLicenses(cfg.Java.SearchMavenForLicenses).
WithMavenCentralURL(cfg.Java.MavenURL),
Python: pythonCataloger.CatalogerConfig{ Python: pythonCataloger.CatalogerConfig{
GuessUnpinnedRequirements: cfg.Python.GuessUnpinnedRequirements, GuessUnpinnedRequirements: cfg.Python.GuessUnpinnedRequirements,
}, },

View File

@ -0,0 +1,6 @@
package options
type java struct {
SearchMavenForLicenses bool `yaml:"search-maven-for-licenses" json:"search-maven-for-licenses" mapstructure:"search-maven-for-licenses"`
MavenURL string `yaml:"maven-url" json:"maven-url" mapstructure:"maven-url"`
}

View File

@ -49,7 +49,7 @@ func ImageCatalogers(cfg Config) []pkg.Cataloger {
deb.NewDpkgdbCataloger(), deb.NewDpkgdbCataloger(),
dotnet.NewDotnetPortableExecutableCataloger(), dotnet.NewDotnetPortableExecutableCataloger(),
golang.NewGoModuleBinaryCataloger(cfg.Golang), golang.NewGoModuleBinaryCataloger(cfg.Golang),
java.NewJavaCataloger(cfg.Java()), java.NewJavaCataloger(cfg.JavaConfig()),
java.NewNativeImageCataloger(), java.NewNativeImageCataloger(),
javascript.NewPackageCataloger(), javascript.NewPackageCataloger(),
nix.NewStoreCataloger(), nix.NewStoreCataloger(),
@ -81,7 +81,7 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger {
golang.NewGoModFileCataloger(cfg.Golang), golang.NewGoModFileCataloger(cfg.Golang),
golang.NewGoModuleBinaryCataloger(cfg.Golang), golang.NewGoModuleBinaryCataloger(cfg.Golang),
haskell.NewHackageCataloger(), haskell.NewHackageCataloger(),
java.NewJavaCataloger(cfg.Java()), java.NewJavaCataloger(cfg.JavaConfig()),
java.NewJavaGradleLockfileCataloger(), java.NewJavaGradleLockfileCataloger(),
java.NewJavaPomCataloger(), java.NewJavaPomCataloger(),
java.NewNativeImageCataloger(), java.NewNativeImageCataloger(),
@ -120,7 +120,7 @@ func AllCatalogers(cfg Config) []pkg.Cataloger {
golang.NewGoModFileCataloger(cfg.Golang), golang.NewGoModFileCataloger(cfg.Golang),
golang.NewGoModuleBinaryCataloger(cfg.Golang), golang.NewGoModuleBinaryCataloger(cfg.Golang),
haskell.NewHackageCataloger(), haskell.NewHackageCataloger(),
java.NewJavaCataloger(cfg.Java()), java.NewJavaCataloger(cfg.JavaConfig()),
java.NewJavaGradleLockfileCataloger(), java.NewJavaGradleLockfileCataloger(),
java.NewJavaPomCataloger(), java.NewJavaPomCataloger(),
java.NewNativeImageCataloger(), java.NewNativeImageCataloger(),

View File

@ -13,6 +13,7 @@ type Config struct {
Golang golang.GoCatalogerOpts Golang golang.GoCatalogerOpts
LinuxKernel kernel.LinuxCatalogerConfig LinuxKernel kernel.LinuxCatalogerConfig
Python python.CatalogerConfig Python python.CatalogerConfig
Java java.CatalogerOpts
Catalogers []string Catalogers []string
Parallelism int Parallelism int
ExcludeBinaryOverlapByOwnership bool ExcludeBinaryOverlapByOwnership bool
@ -24,13 +25,18 @@ func DefaultConfig() Config {
Parallelism: 1, Parallelism: 1,
LinuxKernel: kernel.DefaultLinuxCatalogerConfig(), LinuxKernel: kernel.DefaultLinuxCatalogerConfig(),
Python: python.DefaultCatalogerConfig(), Python: python.DefaultCatalogerConfig(),
Java: java.DefaultCatalogerOpts(),
ExcludeBinaryOverlapByOwnership: true, ExcludeBinaryOverlapByOwnership: true,
} }
} }
func (c Config) Java() java.Config { // JavaConfig merges relevant config values from Config to return a java.Config struct.
// Values like IncludeUnindexedArchives and IncludeIndexedArchives are used across catalogers
// and are not specific to Java requiring this merge.
func (c Config) JavaConfig() java.Config {
return java.Config{ return java.Config{
SearchUnindexedArchives: c.Search.IncludeUnindexedArchives, SearchUnindexedArchives: c.Search.IncludeUnindexedArchives,
SearchIndexedArchives: c.Search.IncludeIndexedArchives, SearchIndexedArchives: c.Search.IncludeIndexedArchives,
SearchMavenForLicenses: c.Java.SearchMavenForLicenses,
} }
} }

View File

@ -3,9 +3,15 @@ package java
import ( import (
"crypto" "crypto"
"fmt" "fmt"
"io"
"net/http"
"net/url"
"os" "os"
"path" "path"
"strings" "strings"
"time"
"github.com/vifraa/gopom"
intFile "github.com/anchore/syft/internal/file" intFile "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/licenses" "github.com/anchore/syft/internal/licenses"
@ -16,8 +22,6 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic" "github.com/anchore/syft/syft/pkg/cataloger/generic"
) )
var _ generic.Parser = parseJavaArchive
var archiveFormatGlobs = []string{ var archiveFormatGlobs = []string{
"**/*.jar", "**/*.jar",
"**/*.war", "**/*.war",
@ -49,11 +53,20 @@ type archiveParser struct {
contentPath string contentPath string
fileInfo archiveFilename fileInfo archiveFilename
detectNested bool detectNested bool
cfg Config
}
type genericArchiveParserAdapter struct {
cfg Config
}
func newGenericArchiveParserAdapter(cfg Config) genericArchiveParserAdapter {
return genericArchiveParserAdapter{cfg: cfg}
} }
// parseJavaArchive is a parser function for java archive contents, returning all Java libraries and nested archives. // parseJavaArchive is a parser function for java archive contents, returning all Java libraries and nested archives.
func parseJavaArchive(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { func (gap genericArchiveParserAdapter) parseJavaArchive(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
parser, cleanupFn, err := newJavaArchiveParser(reader, true) parser, cleanupFn, err := newJavaArchiveParser(reader, true, gap.cfg)
// note: even on error, we should always run cleanup functions // note: even on error, we should always run cleanup functions
defer cleanupFn() defer cleanupFn()
if err != nil { if err != nil {
@ -72,7 +85,7 @@ func uniquePkgKey(groupID string, p *pkg.Package) string {
// newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover // newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover
// and parse nested archives or ignore them. // and parse nested archives or ignore them.
func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool) (*archiveParser, func(), error) { func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg Config) (*archiveParser, func(), error) {
// fetch the last element of the virtual path // fetch the last element of the virtual path
virtualElements := strings.Split(reader.AccessPath(), ":") virtualElements := strings.Split(reader.AccessPath(), ":")
currentFilepath := virtualElements[len(virtualElements)-1] currentFilepath := virtualElements[len(virtualElements)-1]
@ -94,6 +107,7 @@ func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool) (*a
contentPath: contentPath, contentPath: contentPath,
fileInfo: newJavaArchiveFilename(currentFilepath), fileInfo: newJavaArchiveFilename(currentFilepath),
detectNested: detectNested, detectNested: detectNested,
cfg: cfg,
}, cleanupFn, nil }, cleanupFn, nil
} }
@ -248,7 +262,7 @@ func (j *archiveParser) guessMainPackageNameAndVersionFromPomInfo() (name, versi
pomPropertyMatches := j.fileManifest.GlobMatch(false, pomPropertiesGlob) pomPropertyMatches := j.fileManifest.GlobMatch(false, pomPropertiesGlob)
pomMatches := j.fileManifest.GlobMatch(false, pomXMLGlob) pomMatches := j.fileManifest.GlobMatch(false, pomXMLGlob)
var pomPropertiesObject pkg.JavaPomProperties var pomPropertiesObject pkg.JavaPomProperties
var pomProjectObject parsedPomProject var pomProjectObject *parsedPomProject
if len(pomPropertyMatches) == 1 || len(pomMatches) == 1 { if len(pomPropertyMatches) == 1 || len(pomMatches) == 1 {
// we have exactly 1 pom.properties or pom.xml in the archive; assume it represents the // we have exactly 1 pom.properties or pom.xml in the archive; assume it represents the
// package we're scanning if the names seem like a plausible match // package we're scanning if the names seem like a plausible match
@ -265,14 +279,22 @@ func (j *archiveParser) guessMainPackageNameAndVersionFromPomInfo() (name, versi
} }
} }
name = pomPropertiesObject.ArtifactID name = pomPropertiesObject.ArtifactID
if name == "" && pomProjectObject.JavaPomProject != nil { if name == "" && pomProjectObject != nil {
name = pomProjectObject.ArtifactID name = pomProjectObject.ArtifactID
} }
version = pomPropertiesObject.Version version = pomPropertiesObject.Version
if version == "" && pomProjectObject.JavaPomProject != nil { if version == "" && pomProjectObject != nil {
version = pomProjectObject.Version version = pomProjectObject.Version
} }
return name, version, pomProjectObject.Licenses if pomProjectObject != nil && j.cfg.SearchMavenForLicenses {
findPomLicenses(pomProjectObject)
}
if pomProjectObject != nil {
licenses = pomProjectObject.Licenses
}
return name, version, licenses
} }
func artifactIDMatchesFilename(artifactID, fileName string) bool { func artifactIDMatchesFilename(artifactID, fileName string) bool {
@ -282,6 +304,92 @@ func artifactIDMatchesFilename(artifactID, fileName string) bool {
return strings.HasPrefix(artifactID, fileName) || strings.HasSuffix(fileName, artifactID) return strings.HasPrefix(artifactID, fileName) || strings.HasSuffix(fileName, artifactID)
} }
func findPomLicenses(pomProjectObject *parsedPomProject) {
// If we don't have any licenses until now, and if we have a parent Pom, then we'll check the parent pom in maven central for licenses.
if pomProjectObject != nil && pomProjectObject.Parent != nil && len(pomProjectObject.Licenses) == 0 {
parentPom, err := getPomFromMavenCentral(pomProjectObject.Parent.GroupID, pomProjectObject.Parent.ArtifactID, pomProjectObject.Parent.Version)
if err != nil {
// We don't want to abort here as the parent pom might not exist in Maven Central, we'll just log the error
log.Tracef("unable to get parent pom from Maven central: %v", err)
return
}
parentLicenses := parseLicensesFromPom(parentPom)
if len(parentLicenses) > 0 || parentPom == nil || parentPom.Parent == nil {
for _, licenseName := range parentLicenses {
pomProjectObject.Licenses = append(pomProjectObject.Licenses, pkg.NewLicenseFromFields(licenseName, "", nil))
}
}
}
}
func formatMavenPomURL(groupID, artifactID, version string) (requestURL string, err error) {
// groupID needs to go from maven.org -> maven/org
urlPath := strings.Split(groupID, ".")
artifactPom := fmt.Sprintf("%s-%s.pom", artifactID, version)
urlPath = append(urlPath, artifactID, version, artifactPom)
// ex:"https://repo1.maven.org/maven2/groupID/artifactID/artifactPom
requestURL, err = url.JoinPath(MavenBaseURL, urlPath...)
if err != nil {
return requestURL, fmt.Errorf("could not construct maven url: %w", err)
}
return requestURL, err
}
func getPomFromMavenCentral(groupID, artifactID, version string) (*gopom.Project, error) {
requestURL, err := formatMavenPomURL(groupID, artifactID, version)
if err != nil {
return nil, err
}
log.Tracef("trying to fetch parent pom from Maven central %s", requestURL)
mavenRequest, err := http.NewRequest(http.MethodGet, requestURL, nil)
if err != nil {
return nil, fmt.Errorf("unable to format request for Maven central: %w", err)
}
httpClient := &http.Client{
Timeout: time.Second * 10,
}
resp, err := httpClient.Do(mavenRequest)
if err != nil {
return nil, fmt.Errorf("unable to get pom from Maven central: %w", err)
}
defer func() {
if err := resp.Body.Close(); err != nil {
log.Errorf("unable to close body: %+v", err)
}
}()
bytes, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err)
}
pom, err := decodePomXML(strings.NewReader(string(bytes)))
if err != nil {
return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err)
}
return &pom, nil
}
func parseLicensesFromPom(pom *gopom.Project) []string {
var licenses []string
if pom != nil && pom.Licenses != nil {
for _, license := range *pom.Licenses {
if license.Name != nil {
licenses = append(licenses, *license.Name)
} else if license.URL != nil {
licenses = append(licenses, *license.URL)
}
}
}
return licenses
}
// discoverPkgsFromAllMavenFiles parses Maven POM properties/xml for a given // discoverPkgsFromAllMavenFiles parses Maven POM properties/xml for a given
// parent package, returning all listed Java packages found for each pom // parent package, returning all listed Java packages found for each pom
// properties discovered and potentially updating the given parentPkg with new // properties discovered and potentially updating the given parentPkg with new
@ -308,7 +416,7 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([
for parentPath, propertiesObj := range properties { for parentPath, propertiesObj := range properties {
var pomProject *parsedPomProject var pomProject *parsedPomProject
if proj, exists := projects[parentPath]; exists { if proj, exists := projects[parentPath]; exists {
pomProject = &proj pomProject = proj
} }
pkgFromPom := newPackageFromMavenData(propertiesObj, pomProject, parentPkg, j.location) pkgFromPom := newPackageFromMavenData(propertiesObj, pomProject, parentPkg, j.location)
@ -370,28 +478,28 @@ func (j *archiveParser) getLicenseFromFileInArchive() ([]pkg.License, error) {
func (j *archiveParser) discoverPkgsFromNestedArchives(parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) { func (j *archiveParser) discoverPkgsFromNestedArchives(parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) {
// we know that all java archives are zip formatted files, so we can use the shared zip helper // we know that all java archives are zip formatted files, so we can use the shared zip helper
return discoverPkgsFromZip(j.location, j.archivePath, j.contentPath, j.fileManifest, parentPkg) return discoverPkgsFromZip(j.location, j.archivePath, j.contentPath, j.fileManifest, parentPkg, j.cfg)
} }
// discoverPkgsFromZip finds Java archives within Java archives, returning all listed Java packages found and // discoverPkgsFromZip finds Java archives within Java archives, returning all listed Java packages found and
// associating each discovered package to the given parent package. // associating each discovered package to the given parent package.
func discoverPkgsFromZip(location file.Location, archivePath, contentPath string, fileManifest intFile.ZipFileManifest, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) { func discoverPkgsFromZip(location file.Location, archivePath, contentPath string, fileManifest intFile.ZipFileManifest, parentPkg *pkg.Package, cfg Config) ([]pkg.Package, []artifact.Relationship, error) {
// search and parse pom.properties files & fetch the contents // search and parse pom.properties files & fetch the contents
openers, err := intFile.ExtractFromZipToUniqueTempFile(archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...) openers, err := intFile.ExtractFromZipToUniqueTempFile(archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("unable to extract files from zip: %w", err) return nil, nil, fmt.Errorf("unable to extract files from zip: %w", err)
} }
return discoverPkgsFromOpeners(location, openers, parentPkg) return discoverPkgsFromOpeners(location, openers, parentPkg, cfg)
} }
// discoverPkgsFromOpeners finds Java archives within the given files and associates them with the given parent package. // discoverPkgsFromOpeners finds Java archives within the given files and associates them with the given parent package.
func discoverPkgsFromOpeners(location file.Location, openers map[string]intFile.Opener, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) { func discoverPkgsFromOpeners(location file.Location, openers map[string]intFile.Opener, parentPkg *pkg.Package, cfg Config) ([]pkg.Package, []artifact.Relationship, error) {
var pkgs []pkg.Package var pkgs []pkg.Package
var relationships []artifact.Relationship var relationships []artifact.Relationship
for pathWithinArchive, archiveOpener := range openers { for pathWithinArchive, archiveOpener := range openers {
nestedPkgs, nestedRelationships, err := discoverPkgsFromOpener(location, pathWithinArchive, archiveOpener) nestedPkgs, nestedRelationships, err := discoverPkgsFromOpener(location, pathWithinArchive, archiveOpener, cfg)
if err != nil { if err != nil {
log.WithFields("location", location.AccessPath()).Warnf("unable to discover java packages from opener: %+v", err) log.WithFields("location", location.AccessPath()).Warnf("unable to discover java packages from opener: %+v", err)
continue continue
@ -415,7 +523,7 @@ func discoverPkgsFromOpeners(location file.Location, openers map[string]intFile.
} }
// discoverPkgsFromOpener finds Java archives within the given file. // discoverPkgsFromOpener finds Java archives within the given file.
func discoverPkgsFromOpener(location file.Location, pathWithinArchive string, archiveOpener intFile.Opener) ([]pkg.Package, []artifact.Relationship, error) { func discoverPkgsFromOpener(location file.Location, pathWithinArchive string, archiveOpener intFile.Opener, cfg Config) ([]pkg.Package, []artifact.Relationship, error) {
archiveReadCloser, err := archiveOpener.Open() archiveReadCloser, err := archiveOpener.Open()
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("unable to open archived file from tempdir: %w", err) return nil, nil, fmt.Errorf("unable to open archived file from tempdir: %w", err)
@ -429,7 +537,8 @@ func discoverPkgsFromOpener(location file.Location, pathWithinArchive string, ar
nestedPath := fmt.Sprintf("%s:%s", location.AccessPath(), pathWithinArchive) nestedPath := fmt.Sprintf("%s:%s", location.AccessPath(), pathWithinArchive)
nestedLocation := file.NewLocationFromCoordinates(location.Coordinates) nestedLocation := file.NewLocationFromCoordinates(location.Coordinates)
nestedLocation.VirtualPath = nestedPath nestedLocation.VirtualPath = nestedPath
nestedPkgs, nestedRelationships, err := parseJavaArchive(nil, nil, file.LocationReadCloser{ gap := newGenericArchiveParserAdapter(cfg)
nestedPkgs, nestedRelationships, err := gap.parseJavaArchive(nil, nil, file.LocationReadCloser{
Location: nestedLocation, Location: nestedLocation,
ReadCloser: archiveReadCloser, ReadCloser: archiveReadCloser,
}) })
@ -469,13 +578,13 @@ func pomPropertiesByParentPath(archivePath string, location file.Location, extra
return propertiesByParentPath, nil return propertiesByParentPath, nil
} }
func pomProjectByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]parsedPomProject, error) { func pomProjectByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]*parsedPomProject, error) {
contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...) contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to extract maven files: %w", err) return nil, fmt.Errorf("unable to extract maven files: %w", err)
} }
projectByParentPath := make(map[string]parsedPomProject) projectByParentPath := make(map[string]*parsedPomProject)
for filePath, fileContents := range contentsOfMavenProjectFiles { for filePath, fileContents := range contentsOfMavenProjectFiles {
// TODO: when we support locations of paths within archives we should start passing the specific pom.xml location object instead of the top jar // TODO: when we support locations of paths within archives we should start passing the specific pom.xml location object instead of the top jar
pomProject, err := parsePomXMLProject(filePath, strings.NewReader(fileContents), location) pomProject, err := parsePomXMLProject(filePath, strings.NewReader(fileContents), location)
@ -488,12 +597,13 @@ func pomProjectByParentPath(archivePath string, location file.Location, extractP
continue continue
} }
if pomProject.Version == "" || pomProject.ArtifactID == "" { // If we don't have a version, then maybe the parent pom has it...
if (pomProject.Parent == nil && pomProject.Version == "") || pomProject.ArtifactID == "" {
// TODO: if there is no parentPkg (no java manifest) one of these poms could be the parent. We should discover the right parent and attach the correct info accordingly to each discovered package // TODO: if there is no parentPkg (no java manifest) one of these poms could be the parent. We should discover the right parent and attach the correct info accordingly to each discovered package
continue continue
} }
projectByParentPath[path.Dir(filePath)] = *pomProject projectByParentPath[path.Dir(filePath)] = pomProject
} }
return projectByParentPath, nil return projectByParentPath, nil
} }

View File

@ -44,6 +44,32 @@ func generateJavaBuildFixture(t *testing.T, fixturePath string) {
run(t, cmd) run(t, cmd)
} }
func TestFormatMavenURL(t *testing.T) {
tests := []struct {
name string
groupID string
artifactID string
version string
expected string
}{
{
name: "formatMavenURL correctly assembles the pom URL",
groupID: "org.springframework.boot",
artifactID: "spring-boot-starter-test",
version: "3.1.5",
expected: "https://repo1.maven.org/maven2/org/springframework/boot/spring-boot-starter-test/3.1.5/spring-boot-starter-test-3.1.5.pom",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
requestURL, err := formatMavenPomURL(tc.groupID, tc.artifactID, tc.version)
assert.NoError(t, err, "expected no err; got %w", err)
assert.Equal(t, tc.expected, requestURL)
})
}
}
func TestParseJar(t *testing.T) { func TestParseJar(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
@ -232,8 +258,9 @@ func TestParseJar(t *testing.T) {
return &l return &l
}(), }(),
), ),
), Language: pkg.Java, ),
Type: pkg.JavaPkg, Language: pkg.Java,
Type: pkg.JavaPkg,
Metadata: pkg.JavaArchive{ Metadata: pkg.JavaArchive{
// ensure that nested packages with different names than that of the parent are appended as // ensure that nested packages with different names than that of the parent are appended as
// a suffix on the virtual path // a suffix on the virtual path
@ -276,7 +303,7 @@ func TestParseJar(t *testing.T) {
parser, cleanupFn, err := newJavaArchiveParser(file.LocationReadCloser{ parser, cleanupFn, err := newJavaArchiveParser(file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()), Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture, ReadCloser: fixture,
}, false) }, false, Config{SearchMavenForLicenses: false})
defer cleanupFn() defer cleanupFn()
require.NoError(t, err) require.NoError(t, err)
@ -542,8 +569,9 @@ func TestParseNestedJar(t *testing.T) {
fixture, err := os.Open(test.fixture) fixture, err := os.Open(test.fixture)
require.NoError(t, err) require.NoError(t, err)
gap := newGenericArchiveParserAdapter(Config{})
actual, _, err := parseJavaArchive(nil, nil, file.LocationReadCloser{ actual, _, err := gap.parseJavaArchive(nil, nil, file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()), Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture, ReadCloser: fixture,
}) })
@ -1121,11 +1149,12 @@ func Test_parseJavaArchive_regressions(t *testing.T) {
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
gap := newGenericArchiveParserAdapter(Config{})
pkgtest.NewCatalogTester(). pkgtest.NewCatalogTester().
FromFile(t, generateJavaMetadataJarFixture(t, tt.fixtureName)). FromFile(t, generateJavaMetadataJarFixture(t, tt.fixtureName)).
Expects(tt.expectedPkgs, tt.expectedRelationships). Expects(tt.expectedPkgs, tt.expectedRelationships).
WithCompareOptions(cmpopts.IgnoreFields(pkg.JavaArchive{}, "ArchiveDigests")). WithCompareOptions(cmpopts.IgnoreFields(pkg.JavaArchive{}, "ArchiveDigests")).
TestParser(t, parseJavaArchive) TestParser(t, gap.parseJavaArchive)
}) })
} }
} }

View File

@ -9,17 +9,21 @@ import (
// NewJavaCataloger returns a new Java archive cataloger object. // NewJavaCataloger returns a new Java archive cataloger object.
func NewJavaCataloger(cfg Config) *generic.Cataloger { func NewJavaCataloger(cfg Config) *generic.Cataloger {
gap := newGenericArchiveParserAdapter(cfg)
c := generic.NewCataloger("java-cataloger"). c := generic.NewCataloger("java-cataloger").
WithParserByGlobs(parseJavaArchive, archiveFormatGlobs...) WithParserByGlobs(gap.parseJavaArchive, archiveFormatGlobs...)
if cfg.SearchIndexedArchives { if cfg.SearchIndexedArchives {
// java archives wrapped within zip files // java archives wrapped within zip files
c.WithParserByGlobs(parseZipWrappedJavaArchive, genericZipGlobs...) gzp := newGenericZipWrappedJavaArchiveParser(cfg)
c.WithParserByGlobs(gzp.parseZipWrappedJavaArchive, genericZipGlobs...)
} }
if cfg.SearchUnindexedArchives { if cfg.SearchUnindexedArchives {
// java archives wrapped within tar files // java archives wrapped within tar files
c.WithParserByGlobs(parseTarWrappedJavaArchive, genericTarGlobs...) gtp := newGenericTarWrappedJavaArchiveParser(cfg)
c.WithParserByGlobs(gtp.parseTarWrappedJavaArchive, genericTarGlobs...)
} }
return c return c
} }

View File

@ -3,4 +3,6 @@ package java
type Config struct { type Config struct {
SearchUnindexedArchives bool SearchUnindexedArchives bool
SearchIndexedArchives bool SearchIndexedArchives bool
SearchMavenForLicenses bool
MavenBaseURL string
} }

View File

@ -0,0 +1,27 @@
package java
const MavenBaseURL = "https://repo1.maven.org/maven2"
type CatalogerOpts struct {
SearchMavenForLicenses bool
MavenURL string
}
func (j CatalogerOpts) WithSearchMavenForLicenses(input bool) CatalogerOpts {
j.SearchMavenForLicenses = input
return j
}
func (j CatalogerOpts) WithMavenCentralURL(input string) CatalogerOpts {
if input != "" {
j.MavenURL = input
}
return j
}
func DefaultCatalogerOpts() CatalogerOpts {
return CatalogerOpts{
SearchMavenForLicenses: false,
MavenURL: MavenBaseURL,
}
}

View File

@ -45,7 +45,18 @@ var genericTarGlobs = []string{
// note: for compressed tars this is an extremely expensive operation and can lead to performance degradation. This is // note: for compressed tars this is an extremely expensive operation and can lead to performance degradation. This is
// due to the fact that there is no central directory header (say as in zip), which means that in order to get // due to the fact that there is no central directory header (say as in zip), which means that in order to get
// a file listing within the archive you must decompress the entire archive and seek through all of the entries. // a file listing within the archive you must decompress the entire archive and seek through all of the entries.
func parseTarWrappedJavaArchive(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
type genericTarWrappedJavaArchiveParser struct {
cfg Config
}
func newGenericTarWrappedJavaArchiveParser(cfg Config) genericTarWrappedJavaArchiveParser {
return genericTarWrappedJavaArchiveParser{
cfg: cfg,
}
}
func (gtp genericTarWrappedJavaArchiveParser) parseTarWrappedJavaArchive(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(reader.AccessPath(), reader) contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(reader.AccessPath(), reader)
// note: even on error, we should always run cleanup functions // note: even on error, we should always run cleanup functions
defer cleanupFn() defer cleanupFn()
@ -54,14 +65,14 @@ func parseTarWrappedJavaArchive(_ file.Resolver, _ *generic.Environment, reader
} }
// look for java archives within the tar archive // look for java archives within the tar archive
return discoverPkgsFromTar(reader.Location, archivePath, contentPath) return discoverPkgsFromTar(reader.Location, archivePath, contentPath, gtp.cfg)
} }
func discoverPkgsFromTar(location file.Location, archivePath, contentPath string) ([]pkg.Package, []artifact.Relationship, error) { func discoverPkgsFromTar(location file.Location, archivePath, contentPath string, cfg Config) ([]pkg.Package, []artifact.Relationship, error) {
openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(archivePath, contentPath, archiveFormatGlobs...) openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(archivePath, contentPath, archiveFormatGlobs...)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("unable to extract files from tar: %w", err) return nil, nil, fmt.Errorf("unable to extract files from tar: %w", err)
} }
return discoverPkgsFromOpeners(location, openers, nil) return discoverPkgsFromOpeners(location, openers, nil, cfg)
} }

View File

@ -40,7 +40,8 @@ func Test_parseTarWrappedJavaArchive(t *testing.T) {
t.Fatalf("failed to open fixture: %+v", err) t.Fatalf("failed to open fixture: %+v", err)
} }
actualPkgs, _, err := parseTarWrappedJavaArchive(nil, nil, file.LocationReadCloser{ gtp := newGenericTarWrappedJavaArchiveParser(Config{})
actualPkgs, _, err := gtp.parseTarWrappedJavaArchive(nil, nil, file.LocationReadCloser{
Location: file.NewLocation(test.fixture), Location: file.NewLocation(test.fixture),
ReadCloser: fixture, ReadCloser: fixture,
}) })

View File

@ -17,7 +17,18 @@ var genericZipGlobs = []string{
// TODO: when the generic archive cataloger is implemented, this should be removed (https://github.com/anchore/syft/issues/246) // TODO: when the generic archive cataloger is implemented, this should be removed (https://github.com/anchore/syft/issues/246)
// parseZipWrappedJavaArchive is a parser function for java archive contents contained within arbitrary zip files. // parseZipWrappedJavaArchive is a parser function for java archive contents contained within arbitrary zip files.
func parseZipWrappedJavaArchive(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
type genericZipWrappedJavaArchiveParser struct {
cfg Config
}
func newGenericZipWrappedJavaArchiveParser(cfg Config) genericZipWrappedJavaArchiveParser {
return genericZipWrappedJavaArchiveParser{
cfg: cfg,
}
}
func (gzp genericZipWrappedJavaArchiveParser) parseZipWrappedJavaArchive(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(reader.AccessPath(), reader) contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(reader.AccessPath(), reader)
// note: even on error, we should always run cleanup functions // note: even on error, we should always run cleanup functions
defer cleanupFn() defer cleanupFn()
@ -35,5 +46,5 @@ func parseZipWrappedJavaArchive(_ file.Resolver, _ *generic.Environment, reader
} }
// look for java archives within the zip archive // look for java archives within the zip archive
return discoverPkgsFromZip(reader.Location, archivePath, contentPath, fileManifest, nil) return discoverPkgsFromZip(reader.Location, archivePath, contentPath, fileManifest, nil, gzp.cfg)
} }

View File

@ -33,7 +33,9 @@ func Test_parseZipWrappedJavaArchive(t *testing.T) {
t.Fatalf("failed to open fixture: %+v", err) t.Fatalf("failed to open fixture: %+v", err)
} }
actualPkgs, _, err := parseZipWrappedJavaArchive(nil, nil, file.LocationReadCloser{ gzp := newGenericZipWrappedJavaArchiveParser(Config{})
actualPkgs, _, err := gzp.parseZipWrappedJavaArchive(nil, nil, file.LocationReadCloser{
Location: file.NewLocation(test.fixture), Location: file.NewLocation(test.fixture),
ReadCloser: fixture, ReadCloser: fixture,
}) })