mirror of
https://github.com/anchore/syft.git
synced 2025-11-18 17:03:17 +01:00
feat: add the option to retrieve remote licenses for projects defined in a maven pom (#2409)
Signed-off-by: Colm O hEigeartaigh <coheigea@apache.org>
This commit is contained in:
parent
790ecc6f28
commit
e789e0714d
@ -83,7 +83,7 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger {
|
||||
haskell.NewHackageCataloger(),
|
||||
java.NewArchiveCataloger(cfg.JavaConfig()),
|
||||
java.NewGradleLockfileCataloger(),
|
||||
java.NewPomCataloger(),
|
||||
java.NewPomCataloger(cfg.JavaConfig()),
|
||||
java.NewNativeImageCataloger(),
|
||||
javascript.NewLockCataloger(cfg.Javascript),
|
||||
nix.NewStoreCataloger(),
|
||||
@ -122,7 +122,7 @@ func AllCatalogers(cfg Config) []pkg.Cataloger {
|
||||
haskell.NewHackageCataloger(),
|
||||
java.NewArchiveCataloger(cfg.JavaConfig()),
|
||||
java.NewGradleLockfileCataloger(),
|
||||
java.NewPomCataloger(),
|
||||
java.NewPomCataloger(cfg.JavaConfig()),
|
||||
java.NewNativeImageCataloger(),
|
||||
javascript.NewLockCataloger(cfg.Javascript),
|
||||
javascript.NewPackageCataloger(),
|
||||
|
||||
@ -3,15 +3,9 @@ package java
|
||||
import (
|
||||
"crypto"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/vifraa/gopom"
|
||||
|
||||
intFile "github.com/anchore/syft/internal/file"
|
||||
"github.com/anchore/syft/internal/licenses"
|
||||
@ -359,98 +353,6 @@ func findPomLicenses(pomProjectObject *parsedPomProject, cfg ArchiveCatalogerCon
|
||||
}
|
||||
}
|
||||
|
||||
func formatMavenPomURL(groupID, artifactID, version, mavenBaseURL string) (requestURL string, err error) {
|
||||
// groupID needs to go from maven.org -> maven/org
|
||||
urlPath := strings.Split(groupID, ".")
|
||||
artifactPom := fmt.Sprintf("%s-%s.pom", artifactID, version)
|
||||
urlPath = append(urlPath, artifactID, version, artifactPom)
|
||||
|
||||
// ex:"https://repo1.maven.org/maven2/groupID/artifactID/artifactPom
|
||||
requestURL, err = url.JoinPath(mavenBaseURL, urlPath...)
|
||||
if err != nil {
|
||||
return requestURL, fmt.Errorf("could not construct maven url: %w", err)
|
||||
}
|
||||
return requestURL, err
|
||||
}
|
||||
|
||||
func recursivelyFindLicensesFromParentPom(groupID, artifactID, version string, cfg ArchiveCatalogerConfig) []string {
|
||||
var licenses []string
|
||||
// As there can be nested parent poms, we'll recursively check for licenses until we reach the max depth
|
||||
for i := 0; i < cfg.MaxParentRecursiveDepth; i++ {
|
||||
parentPom, err := getPomFromMavenRepo(groupID, artifactID, version, cfg.MavenBaseURL)
|
||||
if err != nil {
|
||||
// We don't want to abort here as the parent pom might not exist in Maven Central, we'll just log the error
|
||||
log.Tracef("unable to get parent pom from Maven central: %v", err)
|
||||
return []string{}
|
||||
}
|
||||
parentLicenses := parseLicensesFromPom(parentPom)
|
||||
if len(parentLicenses) > 0 || parentPom == nil || parentPom.Parent == nil {
|
||||
licenses = parentLicenses
|
||||
break
|
||||
}
|
||||
|
||||
groupID = *parentPom.Parent.GroupID
|
||||
artifactID = *parentPom.Parent.ArtifactID
|
||||
version = *parentPom.Parent.Version
|
||||
}
|
||||
|
||||
return licenses
|
||||
}
|
||||
|
||||
func getPomFromMavenRepo(groupID, artifactID, version, mavenBaseURL string) (*gopom.Project, error) {
|
||||
requestURL, err := formatMavenPomURL(groupID, artifactID, version, mavenBaseURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
log.Tracef("trying to fetch parent pom from Maven central %s", requestURL)
|
||||
|
||||
mavenRequest, err := http.NewRequest(http.MethodGet, requestURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to format request for Maven central: %w", err)
|
||||
}
|
||||
|
||||
httpClient := &http.Client{
|
||||
Timeout: time.Second * 10,
|
||||
}
|
||||
|
||||
resp, err := httpClient.Do(mavenRequest)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to get pom from Maven central: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := resp.Body.Close(); err != nil {
|
||||
log.Errorf("unable to close body: %+v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
bytes, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err)
|
||||
}
|
||||
|
||||
pom, err := decodePomXML(strings.NewReader(string(bytes)))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err)
|
||||
}
|
||||
|
||||
return &pom, nil
|
||||
}
|
||||
|
||||
func parseLicensesFromPom(pom *gopom.Project) []string {
|
||||
var licenses []string
|
||||
if pom != nil && pom.Licenses != nil {
|
||||
for _, license := range *pom.Licenses {
|
||||
if license.Name != nil {
|
||||
licenses = append(licenses, *license.Name)
|
||||
} else if license.URL != nil {
|
||||
licenses = append(licenses, *license.URL)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return licenses
|
||||
}
|
||||
|
||||
// discoverPkgsFromAllMavenFiles parses Maven POM properties/xml for a given
|
||||
// parent package, returning all listed Java packages found for each pom
|
||||
// properties discovered and potentially updating the given parentPkg with new
|
||||
|
||||
@ -31,9 +31,11 @@ func NewArchiveCataloger(cfg ArchiveCatalogerConfig) *generic.Cataloger {
|
||||
|
||||
// NewPomCataloger returns a cataloger capable of parsing dependencies from a pom.xml file.
|
||||
// Pom files list dependencies that maybe not be locally installed yet.
|
||||
func NewPomCataloger() pkg.Cataloger {
|
||||
func NewPomCataloger(cfg ArchiveCatalogerConfig) pkg.Cataloger {
|
||||
gap := newGenericArchiveParserAdapter(cfg)
|
||||
|
||||
return generic.NewCataloger("java-pom-cataloger").
|
||||
WithParserByGlobs(parserPomXML, "**/pom.xml")
|
||||
WithParserByGlobs(gap.parserPomXML, "**/pom.xml")
|
||||
}
|
||||
|
||||
// NewGradleLockfileCataloger returns a cataloger capable of parsing dependencies from a gradle.lockfile file.
|
||||
|
||||
@ -89,7 +89,15 @@ func Test_POMCataloger_Globs(t *testing.T) {
|
||||
pkgtest.NewCatalogTester().
|
||||
FromDirectory(t, test.fixture).
|
||||
ExpectsResolverContentQueries(test.expected).
|
||||
TestCataloger(t, NewPomCataloger())
|
||||
TestCataloger(t,
|
||||
NewPomCataloger(
|
||||
ArchiveCatalogerConfig{
|
||||
ArchiveSearchConfig: cataloging.ArchiveSearchConfig{
|
||||
IncludeIndexedArchives: true,
|
||||
IncludeUnindexedArchives: true,
|
||||
},
|
||||
},
|
||||
))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
106
syft/pkg/cataloger/java/maven_repo_utils.go
Normal file
106
syft/pkg/cataloger/java/maven_repo_utils.go
Normal file
@ -0,0 +1,106 @@
|
||||
package java
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/vifraa/gopom"
|
||||
|
||||
"github.com/anchore/syft/internal/log"
|
||||
)
|
||||
|
||||
func formatMavenPomURL(groupID, artifactID, version, mavenBaseURL string) (requestURL string, err error) {
|
||||
// groupID needs to go from maven.org -> maven/org
|
||||
urlPath := strings.Split(groupID, ".")
|
||||
artifactPom := fmt.Sprintf("%s-%s.pom", artifactID, version)
|
||||
urlPath = append(urlPath, artifactID, version, artifactPom)
|
||||
|
||||
// ex:"https://repo1.maven.org/maven2/groupID/artifactID/artifactPom
|
||||
requestURL, err = url.JoinPath(mavenBaseURL, urlPath...)
|
||||
if err != nil {
|
||||
return requestURL, fmt.Errorf("could not construct maven url: %w", err)
|
||||
}
|
||||
return requestURL, err
|
||||
}
|
||||
|
||||
func recursivelyFindLicensesFromParentPom(groupID, artifactID, version string, cfg ArchiveCatalogerConfig) []string {
|
||||
var licenses []string
|
||||
// As there can be nested parent poms, we'll recursively check for licenses until we reach the max depth
|
||||
for i := 0; i < cfg.MaxParentRecursiveDepth; i++ {
|
||||
parentPom, err := getPomFromMavenRepo(groupID, artifactID, version, cfg.MavenBaseURL)
|
||||
if err != nil {
|
||||
// We don't want to abort here as the parent pom might not exist in Maven Central, we'll just log the error
|
||||
log.Tracef("unable to get parent pom from Maven central: %v", err)
|
||||
return []string{}
|
||||
}
|
||||
parentLicenses := parseLicensesFromPom(parentPom)
|
||||
if len(parentLicenses) > 0 || parentPom == nil || parentPom.Parent == nil {
|
||||
licenses = parentLicenses
|
||||
break
|
||||
}
|
||||
|
||||
groupID = *parentPom.Parent.GroupID
|
||||
artifactID = *parentPom.Parent.ArtifactID
|
||||
version = *parentPom.Parent.Version
|
||||
}
|
||||
|
||||
return licenses
|
||||
}
|
||||
|
||||
func getPomFromMavenRepo(groupID, artifactID, version, mavenBaseURL string) (*gopom.Project, error) {
|
||||
requestURL, err := formatMavenPomURL(groupID, artifactID, version, mavenBaseURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
log.Tracef("trying to fetch parent pom from Maven central %s", requestURL)
|
||||
|
||||
mavenRequest, err := http.NewRequest(http.MethodGet, requestURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to format request for Maven central: %w", err)
|
||||
}
|
||||
|
||||
httpClient := &http.Client{
|
||||
Timeout: time.Second * 10,
|
||||
}
|
||||
|
||||
resp, err := httpClient.Do(mavenRequest)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to get pom from Maven central: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := resp.Body.Close(); err != nil {
|
||||
log.Errorf("unable to close body: %+v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
bytes, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err)
|
||||
}
|
||||
|
||||
pom, err := decodePomXML(strings.NewReader(string(bytes)))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err)
|
||||
}
|
||||
|
||||
return &pom, nil
|
||||
}
|
||||
|
||||
func parseLicensesFromPom(pom *gopom.Project) []string {
|
||||
var licenses []string
|
||||
if pom != nil && pom.Licenses != nil {
|
||||
for _, license := range *pom.Licenses {
|
||||
if license.Name != nil {
|
||||
licenses = append(licenses, *license.Name)
|
||||
} else if license.URL != nil {
|
||||
licenses = append(licenses, *license.URL)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return licenses
|
||||
}
|
||||
@ -24,7 +24,7 @@ const pomXMLGlob = "*pom.xml"
|
||||
|
||||
var propertyMatcher = regexp.MustCompile("[$][{][^}]+[}]")
|
||||
|
||||
func parserPomXML(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
func (gap genericArchiveParserAdapter) parserPomXML(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
pom, err := decodePomXML(reader)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
@ -36,6 +36,7 @@ func parserPomXML(_ file.Resolver, _ *generic.Environment, reader file.LocationR
|
||||
p := newPackageFromPom(
|
||||
pom,
|
||||
dep,
|
||||
gap.cfg,
|
||||
reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
|
||||
)
|
||||
if p.Name == "" {
|
||||
@ -97,7 +98,7 @@ func newPomProject(path string, p gopom.Project, location file.Location) *parsed
|
||||
}
|
||||
}
|
||||
|
||||
func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, locations ...file.Location) pkg.Package {
|
||||
func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, cfg ArchiveCatalogerConfig, locations ...file.Location) pkg.Package {
|
||||
m := pkg.JavaArchive{
|
||||
PomProperties: &pkg.JavaPomProperties{
|
||||
GroupID: resolveProperty(pom, dep.GroupID, "groupId"),
|
||||
@ -109,10 +110,26 @@ func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, locations ...fil
|
||||
name := safeString(dep.ArtifactID)
|
||||
version := resolveProperty(pom, dep.Version, "version")
|
||||
|
||||
licenses := make([]pkg.License, 0)
|
||||
if version != "" && cfg.UseNetwork {
|
||||
parentLicenses := recursivelyFindLicensesFromParentPom(
|
||||
m.PomProperties.GroupID,
|
||||
m.PomProperties.ArtifactID,
|
||||
version,
|
||||
cfg)
|
||||
|
||||
if len(parentLicenses) > 0 {
|
||||
for _, licenseName := range parentLicenses {
|
||||
licenses = append(licenses, pkg.NewLicenseFromFields(licenseName, "", nil))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
p := pkg.Package{
|
||||
Name: name,
|
||||
Version: version,
|
||||
Locations: file.NewLocationSet(locations...),
|
||||
Licenses: pkg.NewLicenseSet(licenses...),
|
||||
PURL: packageURL(name, version, m),
|
||||
Language: pkg.Java,
|
||||
Type: pkg.JavaPkg, // TODO: should we differentiate between packages from jar/war/zip versus packages from a pom.xml that were not installed yet?
|
||||
|
||||
@ -11,6 +11,7 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/vifraa/gopom"
|
||||
|
||||
"github.com/anchore/syft/syft/cataloging"
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/license"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
@ -61,7 +62,15 @@ func Test_parserPomXML(t *testing.T) {
|
||||
for i := range test.expected {
|
||||
test.expected[i].Locations.Add(file.NewLocation(test.input))
|
||||
}
|
||||
pkgtest.TestFileParser(t, test.input, parserPomXML, test.expected, nil)
|
||||
|
||||
gap := newGenericArchiveParserAdapter(ArchiveCatalogerConfig{
|
||||
ArchiveSearchConfig: cataloging.ArchiveSearchConfig{
|
||||
IncludeIndexedArchives: true,
|
||||
IncludeUnindexedArchives: true,
|
||||
},
|
||||
})
|
||||
|
||||
pkgtest.TestFileParser(t, test.input, gap.parserPomXML, test.expected, nil)
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -276,7 +285,14 @@ func Test_parseCommonsTextPomXMLProject(t *testing.T) {
|
||||
for i := range test.expected {
|
||||
test.expected[i].Locations.Add(file.NewLocation(test.input))
|
||||
}
|
||||
pkgtest.TestFileParser(t, test.input, parserPomXML, test.expected, nil)
|
||||
|
||||
gap := newGenericArchiveParserAdapter(ArchiveCatalogerConfig{
|
||||
ArchiveSearchConfig: cataloging.ArchiveSearchConfig{
|
||||
IncludeIndexedArchives: true,
|
||||
IncludeUnindexedArchives: true,
|
||||
},
|
||||
})
|
||||
pkgtest.TestFileParser(t, test.input, gap.parserPomXML, test.expected, nil)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user