feat: add the option to retrieve remote licenses for projects defined in a maven pom (#2409)

Signed-off-by: Colm O hEigeartaigh <coheigea@apache.org>
This commit is contained in:
Colm O hEigeartaigh 2023-12-12 19:02:36 +00:00 committed by GitHub
parent 790ecc6f28
commit e789e0714d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 158 additions and 107 deletions

View File

@ -83,7 +83,7 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger {
haskell.NewHackageCataloger(), haskell.NewHackageCataloger(),
java.NewArchiveCataloger(cfg.JavaConfig()), java.NewArchiveCataloger(cfg.JavaConfig()),
java.NewGradleLockfileCataloger(), java.NewGradleLockfileCataloger(),
java.NewPomCataloger(), java.NewPomCataloger(cfg.JavaConfig()),
java.NewNativeImageCataloger(), java.NewNativeImageCataloger(),
javascript.NewLockCataloger(cfg.Javascript), javascript.NewLockCataloger(cfg.Javascript),
nix.NewStoreCataloger(), nix.NewStoreCataloger(),
@ -122,7 +122,7 @@ func AllCatalogers(cfg Config) []pkg.Cataloger {
haskell.NewHackageCataloger(), haskell.NewHackageCataloger(),
java.NewArchiveCataloger(cfg.JavaConfig()), java.NewArchiveCataloger(cfg.JavaConfig()),
java.NewGradleLockfileCataloger(), java.NewGradleLockfileCataloger(),
java.NewPomCataloger(), java.NewPomCataloger(cfg.JavaConfig()),
java.NewNativeImageCataloger(), java.NewNativeImageCataloger(),
javascript.NewLockCataloger(cfg.Javascript), javascript.NewLockCataloger(cfg.Javascript),
javascript.NewPackageCataloger(), javascript.NewPackageCataloger(),

View File

@ -3,15 +3,9 @@ package java
import ( import (
"crypto" "crypto"
"fmt" "fmt"
"io"
"net/http"
"net/url"
"os" "os"
"path" "path"
"strings" "strings"
"time"
"github.com/vifraa/gopom"
intFile "github.com/anchore/syft/internal/file" intFile "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/licenses" "github.com/anchore/syft/internal/licenses"
@ -359,98 +353,6 @@ func findPomLicenses(pomProjectObject *parsedPomProject, cfg ArchiveCatalogerCon
} }
} }
func formatMavenPomURL(groupID, artifactID, version, mavenBaseURL string) (requestURL string, err error) {
// groupID needs to go from maven.org -> maven/org
urlPath := strings.Split(groupID, ".")
artifactPom := fmt.Sprintf("%s-%s.pom", artifactID, version)
urlPath = append(urlPath, artifactID, version, artifactPom)
// ex:"https://repo1.maven.org/maven2/groupID/artifactID/artifactPom
requestURL, err = url.JoinPath(mavenBaseURL, urlPath...)
if err != nil {
return requestURL, fmt.Errorf("could not construct maven url: %w", err)
}
return requestURL, err
}
func recursivelyFindLicensesFromParentPom(groupID, artifactID, version string, cfg ArchiveCatalogerConfig) []string {
var licenses []string
// As there can be nested parent poms, we'll recursively check for licenses until we reach the max depth
for i := 0; i < cfg.MaxParentRecursiveDepth; i++ {
parentPom, err := getPomFromMavenRepo(groupID, artifactID, version, cfg.MavenBaseURL)
if err != nil {
// We don't want to abort here as the parent pom might not exist in Maven Central, we'll just log the error
log.Tracef("unable to get parent pom from Maven central: %v", err)
return []string{}
}
parentLicenses := parseLicensesFromPom(parentPom)
if len(parentLicenses) > 0 || parentPom == nil || parentPom.Parent == nil {
licenses = parentLicenses
break
}
groupID = *parentPom.Parent.GroupID
artifactID = *parentPom.Parent.ArtifactID
version = *parentPom.Parent.Version
}
return licenses
}
func getPomFromMavenRepo(groupID, artifactID, version, mavenBaseURL string) (*gopom.Project, error) {
requestURL, err := formatMavenPomURL(groupID, artifactID, version, mavenBaseURL)
if err != nil {
return nil, err
}
log.Tracef("trying to fetch parent pom from Maven central %s", requestURL)
mavenRequest, err := http.NewRequest(http.MethodGet, requestURL, nil)
if err != nil {
return nil, fmt.Errorf("unable to format request for Maven central: %w", err)
}
httpClient := &http.Client{
Timeout: time.Second * 10,
}
resp, err := httpClient.Do(mavenRequest)
if err != nil {
return nil, fmt.Errorf("unable to get pom from Maven central: %w", err)
}
defer func() {
if err := resp.Body.Close(); err != nil {
log.Errorf("unable to close body: %+v", err)
}
}()
bytes, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err)
}
pom, err := decodePomXML(strings.NewReader(string(bytes)))
if err != nil {
return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err)
}
return &pom, nil
}
func parseLicensesFromPom(pom *gopom.Project) []string {
var licenses []string
if pom != nil && pom.Licenses != nil {
for _, license := range *pom.Licenses {
if license.Name != nil {
licenses = append(licenses, *license.Name)
} else if license.URL != nil {
licenses = append(licenses, *license.URL)
}
}
}
return licenses
}
// discoverPkgsFromAllMavenFiles parses Maven POM properties/xml for a given // discoverPkgsFromAllMavenFiles parses Maven POM properties/xml for a given
// parent package, returning all listed Java packages found for each pom // parent package, returning all listed Java packages found for each pom
// properties discovered and potentially updating the given parentPkg with new // properties discovered and potentially updating the given parentPkg with new

View File

@ -31,9 +31,11 @@ func NewArchiveCataloger(cfg ArchiveCatalogerConfig) *generic.Cataloger {
// NewPomCataloger returns a cataloger capable of parsing dependencies from a pom.xml file. // NewPomCataloger returns a cataloger capable of parsing dependencies from a pom.xml file.
// Pom files list dependencies that maybe not be locally installed yet. // Pom files list dependencies that maybe not be locally installed yet.
func NewPomCataloger() pkg.Cataloger { func NewPomCataloger(cfg ArchiveCatalogerConfig) pkg.Cataloger {
gap := newGenericArchiveParserAdapter(cfg)
return generic.NewCataloger("java-pom-cataloger"). return generic.NewCataloger("java-pom-cataloger").
WithParserByGlobs(parserPomXML, "**/pom.xml") WithParserByGlobs(gap.parserPomXML, "**/pom.xml")
} }
// NewGradleLockfileCataloger returns a cataloger capable of parsing dependencies from a gradle.lockfile file. // NewGradleLockfileCataloger returns a cataloger capable of parsing dependencies from a gradle.lockfile file.

View File

@ -89,7 +89,15 @@ func Test_POMCataloger_Globs(t *testing.T) {
pkgtest.NewCatalogTester(). pkgtest.NewCatalogTester().
FromDirectory(t, test.fixture). FromDirectory(t, test.fixture).
ExpectsResolverContentQueries(test.expected). ExpectsResolverContentQueries(test.expected).
TestCataloger(t, NewPomCataloger()) TestCataloger(t,
NewPomCataloger(
ArchiveCatalogerConfig{
ArchiveSearchConfig: cataloging.ArchiveSearchConfig{
IncludeIndexedArchives: true,
IncludeUnindexedArchives: true,
},
},
))
}) })
} }
} }

View File

@ -0,0 +1,106 @@
package java
import (
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/vifraa/gopom"
"github.com/anchore/syft/internal/log"
)
func formatMavenPomURL(groupID, artifactID, version, mavenBaseURL string) (requestURL string, err error) {
// groupID needs to go from maven.org -> maven/org
urlPath := strings.Split(groupID, ".")
artifactPom := fmt.Sprintf("%s-%s.pom", artifactID, version)
urlPath = append(urlPath, artifactID, version, artifactPom)
// ex:"https://repo1.maven.org/maven2/groupID/artifactID/artifactPom
requestURL, err = url.JoinPath(mavenBaseURL, urlPath...)
if err != nil {
return requestURL, fmt.Errorf("could not construct maven url: %w", err)
}
return requestURL, err
}
func recursivelyFindLicensesFromParentPom(groupID, artifactID, version string, cfg ArchiveCatalogerConfig) []string {
var licenses []string
// As there can be nested parent poms, we'll recursively check for licenses until we reach the max depth
for i := 0; i < cfg.MaxParentRecursiveDepth; i++ {
parentPom, err := getPomFromMavenRepo(groupID, artifactID, version, cfg.MavenBaseURL)
if err != nil {
// We don't want to abort here as the parent pom might not exist in Maven Central, we'll just log the error
log.Tracef("unable to get parent pom from Maven central: %v", err)
return []string{}
}
parentLicenses := parseLicensesFromPom(parentPom)
if len(parentLicenses) > 0 || parentPom == nil || parentPom.Parent == nil {
licenses = parentLicenses
break
}
groupID = *parentPom.Parent.GroupID
artifactID = *parentPom.Parent.ArtifactID
version = *parentPom.Parent.Version
}
return licenses
}
func getPomFromMavenRepo(groupID, artifactID, version, mavenBaseURL string) (*gopom.Project, error) {
requestURL, err := formatMavenPomURL(groupID, artifactID, version, mavenBaseURL)
if err != nil {
return nil, err
}
log.Tracef("trying to fetch parent pom from Maven central %s", requestURL)
mavenRequest, err := http.NewRequest(http.MethodGet, requestURL, nil)
if err != nil {
return nil, fmt.Errorf("unable to format request for Maven central: %w", err)
}
httpClient := &http.Client{
Timeout: time.Second * 10,
}
resp, err := httpClient.Do(mavenRequest)
if err != nil {
return nil, fmt.Errorf("unable to get pom from Maven central: %w", err)
}
defer func() {
if err := resp.Body.Close(); err != nil {
log.Errorf("unable to close body: %+v", err)
}
}()
bytes, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err)
}
pom, err := decodePomXML(strings.NewReader(string(bytes)))
if err != nil {
return nil, fmt.Errorf("unable to parse pom from Maven central: %w", err)
}
return &pom, nil
}
func parseLicensesFromPom(pom *gopom.Project) []string {
var licenses []string
if pom != nil && pom.Licenses != nil {
for _, license := range *pom.Licenses {
if license.Name != nil {
licenses = append(licenses, *license.Name)
} else if license.URL != nil {
licenses = append(licenses, *license.URL)
}
}
}
return licenses
}

View File

@ -24,7 +24,7 @@ const pomXMLGlob = "*pom.xml"
var propertyMatcher = regexp.MustCompile("[$][{][^}]+[}]") var propertyMatcher = regexp.MustCompile("[$][{][^}]+[}]")
func parserPomXML(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { func (gap genericArchiveParserAdapter) parserPomXML(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
pom, err := decodePomXML(reader) pom, err := decodePomXML(reader)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
@ -36,6 +36,7 @@ func parserPomXML(_ file.Resolver, _ *generic.Environment, reader file.LocationR
p := newPackageFromPom( p := newPackageFromPom(
pom, pom,
dep, dep,
gap.cfg,
reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
) )
if p.Name == "" { if p.Name == "" {
@ -97,7 +98,7 @@ func newPomProject(path string, p gopom.Project, location file.Location) *parsed
} }
} }
func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, locations ...file.Location) pkg.Package { func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, cfg ArchiveCatalogerConfig, locations ...file.Location) pkg.Package {
m := pkg.JavaArchive{ m := pkg.JavaArchive{
PomProperties: &pkg.JavaPomProperties{ PomProperties: &pkg.JavaPomProperties{
GroupID: resolveProperty(pom, dep.GroupID, "groupId"), GroupID: resolveProperty(pom, dep.GroupID, "groupId"),
@ -109,10 +110,26 @@ func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, locations ...fil
name := safeString(dep.ArtifactID) name := safeString(dep.ArtifactID)
version := resolveProperty(pom, dep.Version, "version") version := resolveProperty(pom, dep.Version, "version")
licenses := make([]pkg.License, 0)
if version != "" && cfg.UseNetwork {
parentLicenses := recursivelyFindLicensesFromParentPom(
m.PomProperties.GroupID,
m.PomProperties.ArtifactID,
version,
cfg)
if len(parentLicenses) > 0 {
for _, licenseName := range parentLicenses {
licenses = append(licenses, pkg.NewLicenseFromFields(licenseName, "", nil))
}
}
}
p := pkg.Package{ p := pkg.Package{
Name: name, Name: name,
Version: version, Version: version,
Locations: file.NewLocationSet(locations...), Locations: file.NewLocationSet(locations...),
Licenses: pkg.NewLicenseSet(licenses...),
PURL: packageURL(name, version, m), PURL: packageURL(name, version, m),
Language: pkg.Java, Language: pkg.Java,
Type: pkg.JavaPkg, // TODO: should we differentiate between packages from jar/war/zip versus packages from a pom.xml that were not installed yet? Type: pkg.JavaPkg, // TODO: should we differentiate between packages from jar/war/zip versus packages from a pom.xml that were not installed yet?

View File

@ -11,6 +11,7 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/vifraa/gopom" "github.com/vifraa/gopom"
"github.com/anchore/syft/syft/cataloging"
"github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/license" "github.com/anchore/syft/syft/license"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
@ -61,7 +62,15 @@ func Test_parserPomXML(t *testing.T) {
for i := range test.expected { for i := range test.expected {
test.expected[i].Locations.Add(file.NewLocation(test.input)) test.expected[i].Locations.Add(file.NewLocation(test.input))
} }
pkgtest.TestFileParser(t, test.input, parserPomXML, test.expected, nil)
gap := newGenericArchiveParserAdapter(ArchiveCatalogerConfig{
ArchiveSearchConfig: cataloging.ArchiveSearchConfig{
IncludeIndexedArchives: true,
IncludeUnindexedArchives: true,
},
})
pkgtest.TestFileParser(t, test.input, gap.parserPomXML, test.expected, nil)
}) })
} }
} }
@ -276,7 +285,14 @@ func Test_parseCommonsTextPomXMLProject(t *testing.T) {
for i := range test.expected { for i := range test.expected {
test.expected[i].Locations.Add(file.NewLocation(test.input)) test.expected[i].Locations.Add(file.NewLocation(test.input))
} }
pkgtest.TestFileParser(t, test.input, parserPomXML, test.expected, nil)
gap := newGenericArchiveParserAdapter(ArchiveCatalogerConfig{
ArchiveSearchConfig: cataloging.ArchiveSearchConfig{
IncludeIndexedArchives: true,
IncludeUnindexedArchives: true,
},
})
pkgtest.TestFileParser(t, test.input, gap.parserPomXML, test.expected, nil)
}) })
} }
} }