mirror of
https://github.com/anchore/syft.git
synced 2025-11-18 17:03:17 +01:00
feat: add the option to retrieve remote licenses for projects defined in a maven pom (#2409)
Signed-off-by: Colm O hEigeartaigh <coheigea@apache.org>
This commit is contained in:
parent
790ecc6f28
commit
e789e0714d
@ -83,7 +83,7 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger {
|
|||||||
haskell.NewHackageCataloger(),
|
haskell.NewHackageCataloger(),
|
||||||
java.NewArchiveCataloger(cfg.JavaConfig()),
|
java.NewArchiveCataloger(cfg.JavaConfig()),
|
||||||
java.NewGradleLockfileCataloger(),
|
java.NewGradleLockfileCataloger(),
|
||||||
java.NewPomCataloger(),
|
java.NewPomCataloger(cfg.JavaConfig()),
|
||||||
java.NewNativeImageCataloger(),
|
java.NewNativeImageCataloger(),
|
||||||
javascript.NewLockCataloger(cfg.Javascript),
|
javascript.NewLockCataloger(cfg.Javascript),
|
||||||
nix.NewStoreCataloger(),
|
nix.NewStoreCataloger(),
|
||||||
@ -122,7 +122,7 @@ func AllCatalogers(cfg Config) []pkg.Cataloger {
|
|||||||
haskell.NewHackageCataloger(),
|
haskell.NewHackageCataloger(),
|
||||||
java.NewArchiveCataloger(cfg.JavaConfig()),
|
java.NewArchiveCataloger(cfg.JavaConfig()),
|
||||||
java.NewGradleLockfileCataloger(),
|
java.NewGradleLockfileCataloger(),
|
||||||
java.NewPomCataloger(),
|
java.NewPomCataloger(cfg.JavaConfig()),
|
||||||
java.NewNativeImageCataloger(),
|
java.NewNativeImageCataloger(),
|
||||||
javascript.NewLockCataloger(cfg.Javascript),
|
javascript.NewLockCataloger(cfg.Javascript),
|
||||||
javascript.NewPackageCataloger(),
|
javascript.NewPackageCataloger(),
|
||||||
|
|||||||
@ -3,15 +3,9 @@ package java
|
|||||||
import (
|
import (
|
||||||
"crypto"
|
"crypto"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"net/http"
|
|
||||||
"net/url"
|
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/vifraa/gopom"
|
|
||||||
|
|
||||||
intFile "github.com/anchore/syft/internal/file"
|
intFile "github.com/anchore/syft/internal/file"
|
||||||
"github.com/anchore/syft/internal/licenses"
|
"github.com/anchore/syft/internal/licenses"
|
||||||
@ -359,98 +353,6 @@ func findPomLicenses(pomProjectObject *parsedPomProject, cfg ArchiveCatalogerCon
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func formatMavenPomURL(groupID, artifactID, version, mavenBaseURL string) (requestURL string, err error) {
|
|
||||||
// groupID needs to go from maven.org -> maven/org
|
|
||||||
urlPath := strings.Split(groupID, ".")
|
|
||||||
artifactPom := fmt.Sprintf("%s-%s.pom", artifactID, version)
|
|
||||||
urlPath = append(urlPath, artifactID, version, artifactPom)
|
|
||||||
|
|
||||||
// ex:"https://repo1.maven.org/maven2/groupID/artifactID/artifactPom
|
|
||||||
requestURL, err = url.JoinPath(mavenBaseURL, urlPath...)
|
|
||||||
if err != nil {
|
|
||||||
return requestURL, fmt.Errorf("could not construct maven url: %w", err)
|
|
||||||
}
|
|
||||||
return requestURL, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func recursivelyFindLicensesFromParentPom(groupID, artifactID, version string, cfg ArchiveCatalogerConfig) []string {
|
|
||||||
var licenses []string
|
|
||||||
// As there can be nested parent poms, we'll recursively check for licenses until we reach the max depth
|
|
||||||
for i := 0; i < cfg.MaxParentRecursiveDepth; i++ {
|
|
||||||
parentPom, err := getPomFromMavenRepo(groupID, artifactID, version, cfg.MavenBaseURL)
|
|
||||||
if err != nil {
|
|
||||||
// We don't want to abort here as the parent pom might not exist in Maven Central, we'll just log the error
|
|
||||||
log.Tracef("unable to get parent pom from Maven central: %v", err)
|
|
||||||
return []string{}
|
|
||||||
}
|
|
||||||
parentLicenses := parseLicensesFromPom(parentPom)
|
|
||||||
if len(parentLicenses) > 0 || parentPom == nil || parentPom.Parent == nil {
|
|
||||||
licenses = parentLicenses
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
groupID = *parentPom.Parent.GroupID
|
|
||||||
artifactID = *parentPom.Parent.ArtifactID
|
|
||||||
version = *parentPom.Parent.Version
|
|
||||||
}
|
|
||||||
|
|
||||||
return licenses
|
|
||||||
}
|
|
||||||
|
|
||||||
func getPomFromMavenRepo(groupID, artifactID, version, mavenBaseURL string) (*gopom.Project, error) {
|
|
||||||
requestURL, err := formatMavenPomURL(groupID, artifactID, version, mavenBaseURL)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
log.Tracef("trying to fetch parent pom from Maven central %s", requestURL)
|
|
||||||
|
|
||||||
mavenRequest, err := http.NewRequest(http.MethodGet, requestURL, nil)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("unable to format request for Maven central: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
httpClient := &http.Client{
|
|
||||||
Timeout: time.Second * 10,
|
|
||||||
}
|
|
||||||
|
|
||||||
resp, err := httpClient.Do(mavenRequest)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("unable to get pom from Maven central: %w", err)
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
if err := resp.Body.Close(); err != nil {
|
|
||||||
log.Errorf("unable to close body: %+v", err)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
bytes, err := io.ReadAll(resp.Body)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
pom, err := decodePomXML(strings.NewReader(string(bytes)))
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return &pom, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func parseLicensesFromPom(pom *gopom.Project) []string {
|
|
||||||
var licenses []string
|
|
||||||
if pom != nil && pom.Licenses != nil {
|
|
||||||
for _, license := range *pom.Licenses {
|
|
||||||
if license.Name != nil {
|
|
||||||
licenses = append(licenses, *license.Name)
|
|
||||||
} else if license.URL != nil {
|
|
||||||
licenses = append(licenses, *license.URL)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return licenses
|
|
||||||
}
|
|
||||||
|
|
||||||
// discoverPkgsFromAllMavenFiles parses Maven POM properties/xml for a given
|
// discoverPkgsFromAllMavenFiles parses Maven POM properties/xml for a given
|
||||||
// parent package, returning all listed Java packages found for each pom
|
// parent package, returning all listed Java packages found for each pom
|
||||||
// properties discovered and potentially updating the given parentPkg with new
|
// properties discovered and potentially updating the given parentPkg with new
|
||||||
|
|||||||
@ -31,9 +31,11 @@ func NewArchiveCataloger(cfg ArchiveCatalogerConfig) *generic.Cataloger {
|
|||||||
|
|
||||||
// NewPomCataloger returns a cataloger capable of parsing dependencies from a pom.xml file.
|
// NewPomCataloger returns a cataloger capable of parsing dependencies from a pom.xml file.
|
||||||
// Pom files list dependencies that maybe not be locally installed yet.
|
// Pom files list dependencies that maybe not be locally installed yet.
|
||||||
func NewPomCataloger() pkg.Cataloger {
|
func NewPomCataloger(cfg ArchiveCatalogerConfig) pkg.Cataloger {
|
||||||
|
gap := newGenericArchiveParserAdapter(cfg)
|
||||||
|
|
||||||
return generic.NewCataloger("java-pom-cataloger").
|
return generic.NewCataloger("java-pom-cataloger").
|
||||||
WithParserByGlobs(parserPomXML, "**/pom.xml")
|
WithParserByGlobs(gap.parserPomXML, "**/pom.xml")
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewGradleLockfileCataloger returns a cataloger capable of parsing dependencies from a gradle.lockfile file.
|
// NewGradleLockfileCataloger returns a cataloger capable of parsing dependencies from a gradle.lockfile file.
|
||||||
|
|||||||
@ -89,7 +89,15 @@ func Test_POMCataloger_Globs(t *testing.T) {
|
|||||||
pkgtest.NewCatalogTester().
|
pkgtest.NewCatalogTester().
|
||||||
FromDirectory(t, test.fixture).
|
FromDirectory(t, test.fixture).
|
||||||
ExpectsResolverContentQueries(test.expected).
|
ExpectsResolverContentQueries(test.expected).
|
||||||
TestCataloger(t, NewPomCataloger())
|
TestCataloger(t,
|
||||||
|
NewPomCataloger(
|
||||||
|
ArchiveCatalogerConfig{
|
||||||
|
ArchiveSearchConfig: cataloging.ArchiveSearchConfig{
|
||||||
|
IncludeIndexedArchives: true,
|
||||||
|
IncludeUnindexedArchives: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
106
syft/pkg/cataloger/java/maven_repo_utils.go
Normal file
106
syft/pkg/cataloger/java/maven_repo_utils.go
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
package java
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/vifraa/gopom"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/internal/log"
|
||||||
|
)
|
||||||
|
|
||||||
|
func formatMavenPomURL(groupID, artifactID, version, mavenBaseURL string) (requestURL string, err error) {
|
||||||
|
// groupID needs to go from maven.org -> maven/org
|
||||||
|
urlPath := strings.Split(groupID, ".")
|
||||||
|
artifactPom := fmt.Sprintf("%s-%s.pom", artifactID, version)
|
||||||
|
urlPath = append(urlPath, artifactID, version, artifactPom)
|
||||||
|
|
||||||
|
// ex:"https://repo1.maven.org/maven2/groupID/artifactID/artifactPom
|
||||||
|
requestURL, err = url.JoinPath(mavenBaseURL, urlPath...)
|
||||||
|
if err != nil {
|
||||||
|
return requestURL, fmt.Errorf("could not construct maven url: %w", err)
|
||||||
|
}
|
||||||
|
return requestURL, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func recursivelyFindLicensesFromParentPom(groupID, artifactID, version string, cfg ArchiveCatalogerConfig) []string {
|
||||||
|
var licenses []string
|
||||||
|
// As there can be nested parent poms, we'll recursively check for licenses until we reach the max depth
|
||||||
|
for i := 0; i < cfg.MaxParentRecursiveDepth; i++ {
|
||||||
|
parentPom, err := getPomFromMavenRepo(groupID, artifactID, version, cfg.MavenBaseURL)
|
||||||
|
if err != nil {
|
||||||
|
// We don't want to abort here as the parent pom might not exist in Maven Central, we'll just log the error
|
||||||
|
log.Tracef("unable to get parent pom from Maven central: %v", err)
|
||||||
|
return []string{}
|
||||||
|
}
|
||||||
|
parentLicenses := parseLicensesFromPom(parentPom)
|
||||||
|
if len(parentLicenses) > 0 || parentPom == nil || parentPom.Parent == nil {
|
||||||
|
licenses = parentLicenses
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
groupID = *parentPom.Parent.GroupID
|
||||||
|
artifactID = *parentPom.Parent.ArtifactID
|
||||||
|
version = *parentPom.Parent.Version
|
||||||
|
}
|
||||||
|
|
||||||
|
return licenses
|
||||||
|
}
|
||||||
|
|
||||||
|
func getPomFromMavenRepo(groupID, artifactID, version, mavenBaseURL string) (*gopom.Project, error) {
|
||||||
|
requestURL, err := formatMavenPomURL(groupID, artifactID, version, mavenBaseURL)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
log.Tracef("trying to fetch parent pom from Maven central %s", requestURL)
|
||||||
|
|
||||||
|
mavenRequest, err := http.NewRequest(http.MethodGet, requestURL, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to format request for Maven central: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
httpClient := &http.Client{
|
||||||
|
Timeout: time.Second * 10,
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := httpClient.Do(mavenRequest)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to get pom from Maven central: %w", err)
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
if err := resp.Body.Close(); err != nil {
|
||||||
|
log.Errorf("unable to close body: %+v", err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
bytes, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
pom, err := decodePomXML(strings.NewReader(string(bytes)))
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &pom, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseLicensesFromPom(pom *gopom.Project) []string {
|
||||||
|
var licenses []string
|
||||||
|
if pom != nil && pom.Licenses != nil {
|
||||||
|
for _, license := range *pom.Licenses {
|
||||||
|
if license.Name != nil {
|
||||||
|
licenses = append(licenses, *license.Name)
|
||||||
|
} else if license.URL != nil {
|
||||||
|
licenses = append(licenses, *license.URL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return licenses
|
||||||
|
}
|
||||||
@ -24,7 +24,7 @@ const pomXMLGlob = "*pom.xml"
|
|||||||
|
|
||||||
var propertyMatcher = regexp.MustCompile("[$][{][^}]+[}]")
|
var propertyMatcher = regexp.MustCompile("[$][{][^}]+[}]")
|
||||||
|
|
||||||
func parserPomXML(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
func (gap genericArchiveParserAdapter) parserPomXML(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
||||||
pom, err := decodePomXML(reader)
|
pom, err := decodePomXML(reader)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
@ -36,6 +36,7 @@ func parserPomXML(_ file.Resolver, _ *generic.Environment, reader file.LocationR
|
|||||||
p := newPackageFromPom(
|
p := newPackageFromPom(
|
||||||
pom,
|
pom,
|
||||||
dep,
|
dep,
|
||||||
|
gap.cfg,
|
||||||
reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
|
reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
|
||||||
)
|
)
|
||||||
if p.Name == "" {
|
if p.Name == "" {
|
||||||
@ -97,7 +98,7 @@ func newPomProject(path string, p gopom.Project, location file.Location) *parsed
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, locations ...file.Location) pkg.Package {
|
func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, cfg ArchiveCatalogerConfig, locations ...file.Location) pkg.Package {
|
||||||
m := pkg.JavaArchive{
|
m := pkg.JavaArchive{
|
||||||
PomProperties: &pkg.JavaPomProperties{
|
PomProperties: &pkg.JavaPomProperties{
|
||||||
GroupID: resolveProperty(pom, dep.GroupID, "groupId"),
|
GroupID: resolveProperty(pom, dep.GroupID, "groupId"),
|
||||||
@ -109,10 +110,26 @@ func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, locations ...fil
|
|||||||
name := safeString(dep.ArtifactID)
|
name := safeString(dep.ArtifactID)
|
||||||
version := resolveProperty(pom, dep.Version, "version")
|
version := resolveProperty(pom, dep.Version, "version")
|
||||||
|
|
||||||
|
licenses := make([]pkg.License, 0)
|
||||||
|
if version != "" && cfg.UseNetwork {
|
||||||
|
parentLicenses := recursivelyFindLicensesFromParentPom(
|
||||||
|
m.PomProperties.GroupID,
|
||||||
|
m.PomProperties.ArtifactID,
|
||||||
|
version,
|
||||||
|
cfg)
|
||||||
|
|
||||||
|
if len(parentLicenses) > 0 {
|
||||||
|
for _, licenseName := range parentLicenses {
|
||||||
|
licenses = append(licenses, pkg.NewLicenseFromFields(licenseName, "", nil))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
p := pkg.Package{
|
p := pkg.Package{
|
||||||
Name: name,
|
Name: name,
|
||||||
Version: version,
|
Version: version,
|
||||||
Locations: file.NewLocationSet(locations...),
|
Locations: file.NewLocationSet(locations...),
|
||||||
|
Licenses: pkg.NewLicenseSet(licenses...),
|
||||||
PURL: packageURL(name, version, m),
|
PURL: packageURL(name, version, m),
|
||||||
Language: pkg.Java,
|
Language: pkg.Java,
|
||||||
Type: pkg.JavaPkg, // TODO: should we differentiate between packages from jar/war/zip versus packages from a pom.xml that were not installed yet?
|
Type: pkg.JavaPkg, // TODO: should we differentiate between packages from jar/war/zip versus packages from a pom.xml that were not installed yet?
|
||||||
|
|||||||
@ -11,6 +11,7 @@ import (
|
|||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
"github.com/vifraa/gopom"
|
"github.com/vifraa/gopom"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/syft/cataloging"
|
||||||
"github.com/anchore/syft/syft/file"
|
"github.com/anchore/syft/syft/file"
|
||||||
"github.com/anchore/syft/syft/license"
|
"github.com/anchore/syft/syft/license"
|
||||||
"github.com/anchore/syft/syft/pkg"
|
"github.com/anchore/syft/syft/pkg"
|
||||||
@ -61,7 +62,15 @@ func Test_parserPomXML(t *testing.T) {
|
|||||||
for i := range test.expected {
|
for i := range test.expected {
|
||||||
test.expected[i].Locations.Add(file.NewLocation(test.input))
|
test.expected[i].Locations.Add(file.NewLocation(test.input))
|
||||||
}
|
}
|
||||||
pkgtest.TestFileParser(t, test.input, parserPomXML, test.expected, nil)
|
|
||||||
|
gap := newGenericArchiveParserAdapter(ArchiveCatalogerConfig{
|
||||||
|
ArchiveSearchConfig: cataloging.ArchiveSearchConfig{
|
||||||
|
IncludeIndexedArchives: true,
|
||||||
|
IncludeUnindexedArchives: true,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
pkgtest.TestFileParser(t, test.input, gap.parserPomXML, test.expected, nil)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -276,7 +285,14 @@ func Test_parseCommonsTextPomXMLProject(t *testing.T) {
|
|||||||
for i := range test.expected {
|
for i := range test.expected {
|
||||||
test.expected[i].Locations.Add(file.NewLocation(test.input))
|
test.expected[i].Locations.Add(file.NewLocation(test.input))
|
||||||
}
|
}
|
||||||
pkgtest.TestFileParser(t, test.input, parserPomXML, test.expected, nil)
|
|
||||||
|
gap := newGenericArchiveParserAdapter(ArchiveCatalogerConfig{
|
||||||
|
ArchiveSearchConfig: cataloging.ArchiveSearchConfig{
|
||||||
|
IncludeIndexedArchives: true,
|
||||||
|
IncludeUnindexedArchives: true,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
pkgtest.TestFileParser(t, test.input, gap.parserPomXML, test.expected, nil)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user