diff --git a/cmd/syft/internal/options/catalog.go b/cmd/syft/internal/options/catalog.go index c7e0e4f6d..787859af0 100644 --- a/cmd/syft/internal/options/catalog.go +++ b/cmd/syft/internal/options/catalog.go @@ -179,7 +179,8 @@ func (cfg Catalog) ToPackagesConfig() pkgcataloging.Config { WithMavenLocalRepositoryDir(cfg.Java.MavenLocalRepositoryDir). WithUseNetwork(*multiLevelOption(false, enrichmentEnabled(cfg.Enrich, task.Java, task.Maven), cfg.Java.UseNetwork)). WithMavenBaseURL(cfg.Java.MavenURL). - WithArchiveTraversal(archiveSearch, cfg.Java.MaxParentRecursiveDepth), + WithArchiveTraversal(archiveSearch, cfg.Java.MaxParentRecursiveDepth). + WithResolveTransitiveDependencies(cfg.Java.ResolveTransitiveDependencies), } } diff --git a/cmd/syft/internal/options/java.go b/cmd/syft/internal/options/java.go index 6244de44f..80849f242 100644 --- a/cmd/syft/internal/options/java.go +++ b/cmd/syft/internal/options/java.go @@ -6,22 +6,24 @@ import ( ) type javaConfig struct { - UseNetwork *bool `yaml:"use-network" json:"use-network" mapstructure:"use-network"` - UseMavenLocalRepository *bool `yaml:"use-maven-local-repository" json:"use-maven-local-repository" mapstructure:"use-maven-local-repository"` - MavenLocalRepositoryDir string `yaml:"maven-local-repository-dir" json:"maven-local-repository-dir" mapstructure:"maven-local-repository-dir"` - MavenURL string `yaml:"maven-url" json:"maven-url" mapstructure:"maven-url"` - MaxParentRecursiveDepth int `yaml:"max-parent-recursive-depth" json:"max-parent-recursive-depth" mapstructure:"max-parent-recursive-depth"` + UseNetwork *bool `yaml:"use-network" json:"use-network" mapstructure:"use-network"` + UseMavenLocalRepository *bool `yaml:"use-maven-local-repository" json:"use-maven-local-repository" mapstructure:"use-maven-local-repository"` + MavenLocalRepositoryDir string `yaml:"maven-local-repository-dir" json:"maven-local-repository-dir" mapstructure:"maven-local-repository-dir"` + MavenURL string `yaml:"maven-url" json:"maven-url" mapstructure:"maven-url"` + MaxParentRecursiveDepth int `yaml:"max-parent-recursive-depth" json:"max-parent-recursive-depth" mapstructure:"max-parent-recursive-depth"` + ResolveTransitiveDependencies bool `yaml:"resolve-transitive-dependencies" json:"resolve-transitive-dependencies" mapstructure:"resolve-transitive-dependencies"` } func defaultJavaConfig() javaConfig { def := java.DefaultArchiveCatalogerConfig() return javaConfig{ - UseNetwork: nil, // this defaults to false, which is the API default - MaxParentRecursiveDepth: def.MaxParentRecursiveDepth, - UseMavenLocalRepository: nil, // this defaults to false, which is the API default - MavenLocalRepositoryDir: def.MavenLocalRepositoryDir, - MavenURL: def.MavenBaseURL, + UseNetwork: nil, // this defaults to false, which is the API default + MaxParentRecursiveDepth: def.MaxParentRecursiveDepth, + UseMavenLocalRepository: nil, // this defaults to false, which is the API default + MavenLocalRepositoryDir: def.MavenLocalRepositoryDir, + MavenURL: def.MavenBaseURL, + ResolveTransitiveDependencies: def.ResolveTransitiveDependencies, } } @@ -43,4 +45,5 @@ TIP: If you want to download all required pom files to the local repository with build, run 'mvn help:effective-pom' before performing the scan with syft.`) descriptions.Add(&o.MavenLocalRepositoryDir, `override the default location of the local Maven repository. the default is the subdirectory '.m2/repository' in your home directory`) + descriptions.Add(&o.ResolveTransitiveDependencies, `resolve transient dependencies such as those defined in a dependency's POM on Maven central`) } diff --git a/cmd/syft/internal/test/integration/catalog_packages_cases_test.go b/cmd/syft/internal/test/integration/catalog_packages_cases_test.go index a8ea39d09..d9da923be 100644 --- a/cmd/syft/internal/test/integration/catalog_packages_cases_test.go +++ b/cmd/syft/internal/test/integration/catalog_packages_cases_test.go @@ -265,7 +265,7 @@ var dirOnlyTestCases = []testCase{ name: "find java packages including pom.xml", // directory scans can include packages that have yet to be installed pkgType: pkg.JavaPkg, pkgLanguage: pkg.Java, - duplicates: 1, // joda-time is included in both pom.xml AND the .jar collection + duplicates: 2, // joda-time and example-java-app-maven are included in both pom.xml AND the .jar collection pkgInfo: map[string]string{ "example-java-app-maven": "0.1.0", "joda-time": "2.9.2", diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index d084ac375..e0ed965b2 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -10,7 +10,6 @@ import ( "slices" "strings" - "github.com/vifraa/gopom" "golang.org/x/exp/maps" "github.com/anchore/syft/internal" @@ -22,6 +21,7 @@ import ( "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg/cataloger/generic" + "github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven" ) var archiveFormatGlobs = []string{ @@ -57,7 +57,7 @@ type archiveParser struct { fileInfo archiveFilename detectNested bool cfg ArchiveCatalogerConfig - maven *mavenResolver + maven *maven.Resolver licenseScanner licenses.Scanner } @@ -69,15 +69,20 @@ func newGenericArchiveParserAdapter(cfg ArchiveCatalogerConfig) genericArchivePa return genericArchiveParserAdapter{cfg: cfg} } -// parseJavaArchive is a parser function for java archive contents, returning all Java libraries and nested archives. +// parseJavaArchive is a parser function for java archive contents, returning all Java libraries and nested archives func (gap genericArchiveParserAdapter) parseJavaArchive(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { + return gap.processJavaArchive(ctx, reader, nil) +} + +// processJavaArchive processes an archive for java contents, returning all Java libraries and nested archives +func (gap genericArchiveParserAdapter) processJavaArchive(ctx context.Context, reader file.LocationReadCloser, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) { parser, cleanupFn, err := newJavaArchiveParser(ctx, reader, true, gap.cfg) // note: even on error, we should always run cleanup functions defer cleanupFn() if err != nil { return nil, nil, err } - return parser.parse(ctx) + return parser.parse(ctx, parentPkg) } // uniquePkgKey creates a unique string to identify the given package. @@ -115,34 +120,62 @@ func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, d fileInfo: newJavaArchiveFilename(currentFilepath), detectNested: detectNested, cfg: cfg, - maven: newMavenResolver(nil, cfg), + maven: maven.NewResolver(nil, cfg.mavenConfig()), licenseScanner: licenseScanner, }, cleanupFn, nil } // parse the loaded archive and return all packages found. -func (j *archiveParser) parse(ctx context.Context) ([]pkg.Package, []artifact.Relationship, error) { +func (j *archiveParser) parse(ctx context.Context, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) { var pkgs []pkg.Package var relationships []artifact.Relationship // find the parent package from the java manifest - parentPkg, err := j.discoverMainPackage(ctx) + mainPkg, err := j.discoverMainPackage(ctx) if err != nil { return nil, nil, fmt.Errorf("could not generate package from %s: %w", j.location, err) } // find aux packages from pom.properties/pom.xml and potentially modify the existing parentPkg // NOTE: we cannot generate sha1 digests from packages discovered via pom.properties/pom.xml - auxPkgs, err := j.discoverPkgsFromAllMavenFiles(ctx, parentPkg) + // IMPORTANT!: discoverPkgsFromAllMavenFiles may change mainPkg information, so needs to be called before SetID and before copying for relationships, etc. + auxPkgs, err := j.discoverPkgsFromAllMavenFiles(ctx, mainPkg) if err != nil { return nil, nil, err } - pkgs = append(pkgs, auxPkgs...) + + if mainPkg != nil { + finalizePackage(mainPkg) + pkgs = append(pkgs, *mainPkg) + + if parentPkg != nil { + relationships = append(relationships, artifact.Relationship{ + From: *mainPkg, + To: *parentPkg, + Type: artifact.DependencyOfRelationship, + }) + } + } + + for i := range auxPkgs { + auxPkg := &auxPkgs[i] + + finalizePackage(auxPkg) + pkgs = append(pkgs, *auxPkg) + + if mainPkg != nil { + relationships = append(relationships, artifact.Relationship{ + From: *auxPkg, + To: *mainPkg, + Type: artifact.DependencyOfRelationship, + }) + } + } var errs error if j.detectNested { // find nested java archive packages - nestedPkgs, nestedRelationships, err := j.discoverPkgsFromNestedArchives(ctx, parentPkg) + nestedPkgs, nestedRelationships, err := j.discoverPkgsFromNestedArchives(ctx, mainPkg) if err != nil { errs = unknown.Append(errs, j.location, err) } @@ -157,29 +190,6 @@ func (j *archiveParser) parse(ctx context.Context) ([]pkg.Package, []artifact.Re } } - // lastly, add the parent package to the list (assuming the parent exists) - if parentPkg != nil { - pkgs = append([]pkg.Package{*parentPkg}, pkgs...) - } - - // add pURLs to all packages found - // note: since package information may change after initial creation when parsing multiple locations within the - // jar, we wait until the conclusion of the parsing process before synthesizing pURLs. - for i := range pkgs { - p := &pkgs[i] - if m, ok := p.Metadata.(pkg.JavaArchive); ok { - p.PURL = packageURL(p.Name, p.Version, m) - - if strings.Contains(p.PURL, "io.jenkins.plugins") || strings.Contains(p.PURL, "org.jenkins-ci.plugins") { - p.Type = pkg.JenkinsPluginPkg - } - } else { - log.WithFields("package", p.String()).Warn("unable to extract java metadata to generate purl") - } - - p.SetID() - } - if len(pkgs) == 0 { errs = unknown.Appendf(errs, j.location, "no package identified in archive") } @@ -187,6 +197,22 @@ func (j *archiveParser) parse(ctx context.Context) ([]pkg.Package, []artifact.Re return pkgs, relationships, errs } +// finalizePackage potentially updates some package information such as classifying the package as a Jenkins plugin, +// sets the PURL, and calls p.SetID() +func finalizePackage(p *pkg.Package) { + if m, ok := p.Metadata.(pkg.JavaArchive); ok { + p.PURL = packageURL(p.Name, p.Version, m) + + if strings.Contains(p.PURL, "io.jenkins.plugins") || strings.Contains(p.PURL, "org.jenkins-ci.plugins") { + p.Type = pkg.JenkinsPluginPkg + } + } else { + log.WithFields("package", p.String()).Warn("unable to extract java metadata to generate purl") + } + + p.SetID() +} + // discoverMainPackage parses the root Java manifest used as the parent package to all discovered nested packages. func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package, error) { // search and parse java manifest files @@ -297,18 +323,18 @@ func (j *archiveParser) findLicenseFromJavaMetadata(ctx context.Context, groupID } var err error - var pomLicenses []gopom.License + var pomLicenses []maven.License if parsedPom != nil { - pomLicenses, err = j.maven.resolveLicenses(ctx, parsedPom.project) + pomLicenses, err = j.maven.ResolveLicenses(ctx, parsedPom.project) if err != nil { - log.WithFields("error", err, "mavenID", j.maven.getMavenID(ctx, parsedPom.project)).Trace("error attempting to resolve pom licenses") + log.WithFields("error", err, "mavenID", j.maven.ResolveID(ctx, parsedPom.project)).Trace("error attempting to resolve pom licenses") } } if err == nil && len(pomLicenses) == 0 { - pomLicenses, err = j.maven.findLicenses(ctx, groupID, artifactID, version) + pomLicenses, err = j.maven.FindLicenses(ctx, groupID, artifactID, version) if err != nil { - log.WithFields("error", err, "mavenID", mavenID{groupID, artifactID, version}).Trace("error attempting to find licenses") + log.WithFields("error", err, "mavenID", maven.NewID(groupID, artifactID, version)).Trace("error attempting to find licenses") } } @@ -316,26 +342,37 @@ func (j *archiveParser) findLicenseFromJavaMetadata(ctx context.Context, groupID // Try removing the last part of the groupId, as sometimes it duplicates the artifactId packages := strings.Split(groupID, ".") groupID = strings.Join(packages[:len(packages)-1], ".") - pomLicenses, err = j.maven.findLicenses(ctx, groupID, artifactID, version) + pomLicenses, err = j.maven.FindLicenses(ctx, groupID, artifactID, version) if err != nil { - log.WithFields("error", err, "mavenID", mavenID{groupID, artifactID, version}).Trace("error attempting to find sub-group licenses") + log.WithFields("error", err, "mavenID", maven.NewID(groupID, artifactID, version)).Trace("error attempting to find sub-group licenses") } } return toPkgLicenses(&j.location, pomLicenses) } -func toPkgLicenses(location *file.Location, licenses []gopom.License) []pkg.License { +func toPkgLicenses(location *file.Location, licenses []maven.License) []pkg.License { var out []pkg.License for _, license := range licenses { - out = append(out, pkg.NewLicenseFromFields(deref(license.Name), deref(license.URL), location)) + name := "" + if license.Name != nil { + name = *license.Name + } + url := "" + if license.URL != nil { + url = *license.URL + } + if name == "" && url == "" { + continue + } + out = append(out, pkg.NewLicenseFromFields(name, url, location)) } return out } type parsedPomProject struct { path string - project *gopom.Project + project *maven.Project } // discoverMainPackageFromPomInfo attempts to resolve maven groupId, artifactId, version and other info from found pom information @@ -370,7 +407,7 @@ func (j *archiveParser) discoverMainPackageFromPomInfo(ctx context.Context) (gro version = pomProperties.Version if parsedPom != nil && parsedPom.project != nil { - id := j.maven.getMavenID(ctx, parsedPom.project) + id := j.maven.ResolveID(ctx, parsedPom.project) if group == "" { group = id.GroupID } @@ -507,7 +544,7 @@ func discoverPkgsFromOpeners(ctx context.Context, location file.Location, opener var relationships []artifact.Relationship for pathWithinArchive, archiveOpener := range openers { - nestedPkgs, nestedRelationships, err := discoverPkgsFromOpener(ctx, location, pathWithinArchive, archiveOpener, cfg) + nestedPkgs, nestedRelationships, err := discoverPkgsFromOpener(ctx, location, pathWithinArchive, archiveOpener, cfg, parentPkg) if err != nil { log.WithFields("location", location.Path()).Warnf("unable to discover java packages from opener: %+v", err) continue @@ -531,7 +568,7 @@ func discoverPkgsFromOpeners(ctx context.Context, location file.Location, opener } // discoverPkgsFromOpener finds Java archives within the given file. -func discoverPkgsFromOpener(ctx context.Context, location file.Location, pathWithinArchive string, archiveOpener intFile.Opener, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) { +func discoverPkgsFromOpener(ctx context.Context, location file.Location, pathWithinArchive string, archiveOpener intFile.Opener, cfg ArchiveCatalogerConfig, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) { archiveReadCloser, err := archiveOpener.Open() if err != nil { return nil, nil, fmt.Errorf("unable to open archived file from tempdir: %w", err) @@ -546,10 +583,10 @@ func discoverPkgsFromOpener(ctx context.Context, location file.Location, pathWit nestedLocation := file.NewLocationFromCoordinates(location.Coordinates) nestedLocation.AccessPath = nestedPath gap := newGenericArchiveParserAdapter(cfg) - nestedPkgs, nestedRelationships, err := gap.parseJavaArchive(ctx, nil, nil, file.LocationReadCloser{ + nestedPkgs, nestedRelationships, err := gap.processJavaArchive(ctx, file.LocationReadCloser{ Location: nestedLocation, ReadCloser: archiveReadCloser, - }) + }, parentPkg) if err != nil { return nil, nil, fmt.Errorf("unable to process nested java archive (%s): %w", pathWithinArchive, err) } @@ -595,7 +632,7 @@ func pomProjectByParentPath(archivePath string, location file.Location, extractP projectByParentPath := make(map[string]*parsedPomProject) for filePath, fileContents := range contentsOfMavenProjectFiles { // TODO: when we support locations of paths within archives we should start passing the specific pom.xml location object instead of the top jar - pom, err := decodePomXML(strings.NewReader(fileContents)) + pom, err := maven.ParsePomXML(strings.NewReader(fileContents)) if err != nil { log.WithFields("contents-path", filePath, "location", location.Path()).Warnf("failed to parse pom.xml: %+v", err) continue @@ -614,7 +651,7 @@ func pomProjectByParentPath(archivePath string, location file.Location, extractP // newPackageFromMavenData processes a single Maven POM properties for a given parent package, returning all listed Java packages found and // associating each discovered package to the given parent package. Note the pom.xml is optional, the pom.properties is not. -func newPackageFromMavenData(ctx context.Context, r *mavenResolver, pomProperties pkg.JavaPomProperties, parsedPom *parsedPomProject, parentPkg *pkg.Package, location file.Location) *pkg.Package { +func newPackageFromMavenData(ctx context.Context, r *maven.Resolver, pomProperties pkg.JavaPomProperties, parsedPom *parsedPomProject, parentPkg *pkg.Package, location file.Location) *pkg.Package { // keep the artifact name within the virtual path if this package does not match the parent package vPathSuffix := "" groupID := "" @@ -639,23 +676,20 @@ func newPackageFromMavenData(ctx context.Context, r *mavenResolver, pomPropertie var pkgPomProject *pkg.JavaPomProject var err error - var pomLicenses []gopom.License + var pomLicenses []maven.License if parsedPom == nil { // If we have no pom.xml, check maven central using pom.properties - pomLicenses, err = r.findLicenses(ctx, pomProperties.GroupID, pomProperties.ArtifactID, pomProperties.Version) + pomLicenses, err = r.FindLicenses(ctx, pomProperties.GroupID, pomProperties.ArtifactID, pomProperties.Version) } else { pkgPomProject = newPomProject(ctx, r, parsedPom.path, parsedPom.project) - pomLicenses, err = r.resolveLicenses(ctx, parsedPom.project) + pomLicenses, err = r.ResolveLicenses(ctx, parsedPom.project) } if err != nil { - log.WithFields("error", err, "mavenID", mavenID{pomProperties.GroupID, pomProperties.ArtifactID, pomProperties.Version}).Trace("error attempting to resolve licenses") + log.WithFields("error", err, "mavenID", maven.NewID(pomProperties.GroupID, pomProperties.ArtifactID, pomProperties.Version)).Trace("error attempting to resolve licenses") } - licenses := make([]pkg.License, 0) - for _, license := range pomLicenses { - licenses = append(licenses, pkg.NewLicenseFromFields(deref(license.Name), deref(license.URL), &location)) - } + licenseSet := pkg.NewLicenseSet(toPkgLicenses(&location, pomLicenses)...) p := pkg.Package{ Name: pomProperties.ArtifactID, @@ -663,7 +697,7 @@ func newPackageFromMavenData(ctx context.Context, r *mavenResolver, pomPropertie Locations: file.NewLocationSet( location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), ), - Licenses: pkg.NewLicenseSet(licenses...), + Licenses: licenseSet, Language: pkg.Java, Type: pomProperties.PkgTypeIndicated(), Metadata: pkg.JavaArchive{ diff --git a/syft/pkg/cataloger/java/archive_parser_test.go b/syft/pkg/cataloger/java/archive_parser_test.go index b0aad4faf..b1779bbf1 100644 --- a/syft/pkg/cataloger/java/archive_parser_test.go +++ b/syft/pkg/cataloger/java/archive_parser_test.go @@ -18,7 +18,6 @@ import ( "github.com/scylladb/go-set/strset" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/vifraa/gopom" "github.com/anchore/syft/internal/licenses" "github.com/anchore/syft/syft/artifact" @@ -26,10 +25,12 @@ import ( "github.com/anchore/syft/syft/license" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" + "github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven" + maventest "github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven/test" ) func TestSearchMavenForLicenses(t *testing.T) { - url := mockMavenRepo(t) + url := maventest.MockRepo(t, "internal/maven/test-fixtures/maven-repo") ctx := licenses.SetContextLicenseScanner(context.Background(), licenses.TestingOnlyScanner()) @@ -83,8 +84,8 @@ func TestSearchMavenForLicenses(t *testing.T) { // assert licenses are discovered from upstream _, _, _, parsedPom := ap.discoverMainPackageFromPomInfo(context.Background()) - licenses, _ := ap.maven.resolveLicenses(context.Background(), parsedPom.project) - assert.Equal(t, tc.expectedLicenses, toPkgLicenses(nil, licenses)) + resolvedLicenses, _ := ap.maven.ResolveLicenses(context.Background(), parsedPom.project) + assert.Equal(t, tc.expectedLicenses, toPkgLicenses(nil, resolvedLicenses)) }) } } @@ -362,9 +363,11 @@ func TestParseJar(t *testing.T) { defer cleanupFn() require.NoError(t, err) - actual, _, err := parser.parse(context.Background()) + actual, _, err := parser.parse(context.Background(), nil) if test.wantErr != nil { test.wantErr(t, err) + } else { + require.NoError(t, err) } if len(actual) != len(test.expected) { @@ -635,10 +638,10 @@ func TestParseNestedJar(t *testing.T) { require.NoError(t, err) gap := newGenericArchiveParserAdapter(ArchiveCatalogerConfig{}) - actual, _, err := gap.parseJavaArchive(context.Background(), nil, nil, file.LocationReadCloser{ + actual, _, err := gap.processJavaArchive(context.Background(), file.LocationReadCloser{ Location: file.NewLocation(fixture.Name()), ReadCloser: fixture, - }) + }, nil) require.NoError(t, err) expectedNameVersionPairSet := strset.New() @@ -776,8 +779,8 @@ func Test_newPackageFromMavenData(t *testing.T) { Version: "1.0", }, project: &parsedPomProject{ - project: &gopom.Project{ - Parent: &gopom.Parent{ + project: &maven.Project{ + Parent: &maven.Parent{ GroupID: ptr("some-parent-group-id"), ArtifactID: ptr("some-parent-artifact-id"), Version: ptr("1.0-parent"), @@ -788,7 +791,7 @@ func Test_newPackageFromMavenData(t *testing.T) { Version: ptr("1.0"), Description: ptr("desc"), URL: ptr("aweso.me"), - Licenses: &[]gopom.License{ + Licenses: &[]maven.License{ { Name: ptr("MIT"), URL: ptr("https://opensource.org/licenses/MIT"), @@ -1052,7 +1055,7 @@ func Test_newPackageFromMavenData(t *testing.T) { } test.expectedParent.Locations = locations - r := newMavenResolver(nil, DefaultArchiveCatalogerConfig()) + r := maven.NewResolver(nil, maven.DefaultConfig()) actualPackage := newPackageFromMavenData(context.Background(), r, test.props, test.project, test.parent, file.NewLocation(virtualPath)) if test.expectedPackage == nil { require.Nil(t, actualPackage) @@ -1093,6 +1096,76 @@ func Test_artifactIDMatchesFilename(t *testing.T) { } func Test_parseJavaArchive_regressions(t *testing.T) { + apiAll := pkg.Package{ + Name: "api-all", + Version: "2.0.0", + Type: pkg.JavaPkg, + Language: pkg.Java, + PURL: "pkg:maven/org.apache.directory.api/api-all@2.0.0", + Locations: file.NewLocationSet(file.NewLocation("test-fixtures/jar-metadata/cache/api-all-2.0.0-sources.jar")), + Metadata: pkg.JavaArchive{ + VirtualPath: "test-fixtures/jar-metadata/cache/api-all-2.0.0-sources.jar", + Manifest: &pkg.JavaManifest{ + Main: []pkg.KeyValue{ + { + Key: "Manifest-Version", + Value: "1.0", + }, + { + Key: "Built-By", + Value: "elecharny", + }, + { + Key: "Created-By", + Value: "Apache Maven 3.6.0", + }, + { + Key: "Build-Jdk", + Value: "1.8.0_191", + }, + }, + }, + PomProperties: &pkg.JavaPomProperties{ + Path: "META-INF/maven/org.apache.directory.api/api-all/pom.properties", + GroupID: "org.apache.directory.api", + ArtifactID: "api-all", + Version: "2.0.0", + }, + }, + } + + apiAsn1Api := pkg.Package{ + Name: "api-asn1-api", + Version: "2.0.0", + PURL: "pkg:maven/org.apache.directory.api/api-asn1-api@2.0.0", + Locations: file.NewLocationSet(file.NewLocation("test-fixtures/jar-metadata/cache/api-all-2.0.0-sources.jar")), + Type: pkg.JavaPkg, + Language: pkg.Java, + Metadata: pkg.JavaArchive{ + VirtualPath: "test-fixtures/jar-metadata/cache/api-all-2.0.0-sources.jar:org.apache.directory.api:api-asn1-api", + PomProperties: &pkg.JavaPomProperties{ + Path: "META-INF/maven/org.apache.directory.api/api-asn1-api/pom.properties", + GroupID: "org.apache.directory.api", + ArtifactID: "api-asn1-api", + Version: "2.0.0", + }, + PomProject: &pkg.JavaPomProject{ + Path: "META-INF/maven/org.apache.directory.api/api-asn1-api/pom.xml", + ArtifactID: "api-asn1-api", + GroupID: "org.apache.directory.api", + Version: "2.0.0", + Name: "Apache Directory API ASN.1 API", + Description: "ASN.1 API", + Parent: &pkg.JavaPomParent{ + GroupID: "org.apache.directory.api", + ArtifactID: "api-asn1-parent", + Version: "2.0.0", + }, + }, + Parent: &apiAll, + }, + } + tests := []struct { name string fixtureName string @@ -1214,73 +1287,14 @@ func Test_parseJavaArchive_regressions(t *testing.T) { fixtureName: "api-all-2.0.0-sources", assignParent: true, expectedPkgs: []pkg.Package{ + apiAll, + apiAsn1Api, + }, + expectedRelationships: []artifact.Relationship{ { - Name: "api-all", - Version: "2.0.0", - Type: pkg.JavaPkg, - Language: pkg.Java, - PURL: "pkg:maven/org.apache.directory.api/api-all@2.0.0", - Locations: file.NewLocationSet(file.NewLocation("test-fixtures/jar-metadata/cache/api-all-2.0.0-sources.jar")), - Metadata: pkg.JavaArchive{ - VirtualPath: "test-fixtures/jar-metadata/cache/api-all-2.0.0-sources.jar", - Manifest: &pkg.JavaManifest{ - Main: []pkg.KeyValue{ - { - Key: "Manifest-Version", - Value: "1.0", - }, - { - Key: "Built-By", - Value: "elecharny", - }, - { - Key: "Created-By", - Value: "Apache Maven 3.6.0", - }, - { - Key: "Build-Jdk", - Value: "1.8.0_191", - }, - }, - }, - PomProperties: &pkg.JavaPomProperties{ - Path: "META-INF/maven/org.apache.directory.api/api-all/pom.properties", - GroupID: "org.apache.directory.api", - ArtifactID: "api-all", - Version: "2.0.0", - }, - }, - }, - { - Name: "api-asn1-api", - Version: "2.0.0", - PURL: "pkg:maven/org.apache.directory.api/api-asn1-api@2.0.0", - Locations: file.NewLocationSet(file.NewLocation("test-fixtures/jar-metadata/cache/api-all-2.0.0-sources.jar")), - Type: pkg.JavaPkg, - Language: pkg.Java, - Metadata: pkg.JavaArchive{ - VirtualPath: "test-fixtures/jar-metadata/cache/api-all-2.0.0-sources.jar:org.apache.directory.api:api-asn1-api", - PomProperties: &pkg.JavaPomProperties{ - Path: "META-INF/maven/org.apache.directory.api/api-asn1-api/pom.properties", - GroupID: "org.apache.directory.api", - ArtifactID: "api-asn1-api", - Version: "2.0.0", - }, - PomProject: &pkg.JavaPomProject{ - Path: "META-INF/maven/org.apache.directory.api/api-asn1-api/pom.xml", - ArtifactID: "api-asn1-api", - GroupID: "org.apache.directory.api", - Version: "2.0.0", - Name: "Apache Directory API ASN.1 API", - Description: "ASN.1 API", - Parent: &pkg.JavaPomParent{ - GroupID: "org.apache.directory.api", - ArtifactID: "api-asn1-parent", - Version: "2.0.0", - }, - }, - Parent: nil, - }, + From: apiAsn1Api, + To: apiAll, + Type: artifact.DependencyOfRelationship, }, }, }, @@ -1364,11 +1378,11 @@ func Test_deterministicMatchingPomProperties(t *testing.T) { tests := []struct { fixture string - expected mavenID + expected maven.ID }{ { fixture: "multiple-matching-2.11.5", - expected: mavenID{"org.multiple", "multiple-matching-1", "2.11.5"}, + expected: maven.NewID("org.multiple", "multiple-matching-1", "2.11.5"), }, } @@ -1391,7 +1405,7 @@ func Test_deterministicMatchingPomProperties(t *testing.T) { require.NoError(t, err) groupID, artifactID, version, _ := parser.discoverMainPackageFromPomInfo(context.TODO()) - require.Equal(t, test.expected, mavenID{groupID, artifactID, version}) + require.Equal(t, test.expected, maven.NewID(groupID, artifactID, version)) }() } }) @@ -1401,10 +1415,7 @@ func Test_deterministicMatchingPomProperties(t *testing.T) { func assignParent(parent *pkg.Package, childPackages ...pkg.Package) { for i, jp := range childPackages { if v, ok := jp.Metadata.(pkg.JavaArchive); ok { - parent := *parent - // PURL are not calculated after the fact for parent - parent.PURL = "" - v.Parent = &parent + v.Parent = parent childPackages[i].Metadata = v } } diff --git a/syft/pkg/cataloger/java/config.go b/syft/pkg/cataloger/java/config.go index 29096d59b..ed85a308b 100644 --- a/syft/pkg/cataloger/java/config.go +++ b/syft/pkg/cataloger/java/config.go @@ -1,8 +1,11 @@ package java -import "github.com/anchore/syft/syft/cataloging" +import ( + "strings" -const mavenBaseURL = "https://repo1.maven.org/maven2" + "github.com/anchore/syft/syft/cataloging" + "github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven" +) type ArchiveCatalogerConfig struct { cataloging.ArchiveSearchConfig `yaml:",inline" json:"" mapstructure:",squash"` @@ -11,16 +14,19 @@ type ArchiveCatalogerConfig struct { MavenLocalRepositoryDir string `yaml:"maven-localrepository-dir" json:"maven-localrepository-dir" mapstructure:"maven-localrepository-dir"` MavenBaseURL string `yaml:"maven-base-url" json:"maven-base-url" mapstructure:"maven-base-url"` MaxParentRecursiveDepth int `yaml:"max-parent-recursive-depth" json:"max-parent-recursive-depth" mapstructure:"max-parent-recursive-depth"` + ResolveTransitiveDependencies bool `yaml:"resolve-transitive-dependencies" json:"resolve-transitive-dependencies" mapstructure:"resolve-transitive-dependencies"` } func DefaultArchiveCatalogerConfig() ArchiveCatalogerConfig { + mavenCfg := maven.DefaultConfig() return ArchiveCatalogerConfig{ - ArchiveSearchConfig: cataloging.DefaultArchiveSearchConfig(), - UseNetwork: false, - UseMavenLocalRepository: false, - MavenLocalRepositoryDir: defaultMavenLocalRepoDir(), - MavenBaseURL: mavenBaseURL, - MaxParentRecursiveDepth: 0, // unlimited + ArchiveSearchConfig: cataloging.DefaultArchiveSearchConfig(), + UseNetwork: mavenCfg.UseNetwork, + UseMavenLocalRepository: mavenCfg.UseLocalRepository, + MavenLocalRepositoryDir: mavenCfg.LocalRepositoryDir, + MavenBaseURL: strings.Join(mavenCfg.Repositories, ","), + MaxParentRecursiveDepth: mavenCfg.MaxParentRecursiveDepth, + ResolveTransitiveDependencies: false, } } @@ -46,8 +52,23 @@ func (j ArchiveCatalogerConfig) WithMavenBaseURL(input string) ArchiveCatalogerC return j } +func (j ArchiveCatalogerConfig) WithResolveTransitiveDependencies(resolveTransitiveDependencies bool) ArchiveCatalogerConfig { + j.ResolveTransitiveDependencies = resolveTransitiveDependencies + return j +} + func (j ArchiveCatalogerConfig) WithArchiveTraversal(search cataloging.ArchiveSearchConfig, maxDepth int) ArchiveCatalogerConfig { j.MaxParentRecursiveDepth = maxDepth j.ArchiveSearchConfig = search return j } + +func (j ArchiveCatalogerConfig) mavenConfig() maven.Config { + return maven.Config{ + UseNetwork: j.UseNetwork, + UseLocalRepository: j.UseMavenLocalRepository, + LocalRepositoryDir: j.MavenLocalRepositoryDir, + Repositories: strings.Split(j.MavenBaseURL, ","), + MaxParentRecursiveDepth: j.MaxParentRecursiveDepth, + } +} diff --git a/syft/pkg/cataloger/java/maven_utils.go b/syft/pkg/cataloger/java/internal/maven/config.go similarity index 56% rename from syft/pkg/cataloger/java/maven_utils.go rename to syft/pkg/cataloger/java/internal/maven/config.go index 9d365e151..1bdcb7119 100644 --- a/syft/pkg/cataloger/java/maven_utils.go +++ b/syft/pkg/cataloger/java/internal/maven/config.go @@ -1,4 +1,4 @@ -package java +package maven import ( "encoding/xml" @@ -15,6 +15,35 @@ import ( "github.com/anchore/syft/internal/log" ) +const mavenBaseURL = "https://repo1.maven.org/maven2" + +type Config struct { + // UseNetwork instructs the maven resolver to use network operations to resolve maven artifacts + UseNetwork bool `yaml:"use-network" json:"use-network" mapstructure:"use-network"` + + // Repositories are the set of remote repositories the network resolution should use + Repositories []string `yaml:"maven-repositories" json:"maven-repositories" mapstructure:"maven-repositories"` + + // UseLocalRepository instructs the maven resolver to look in the host maven cache, usually ~/.m2/repository + UseLocalRepository bool `yaml:"use-maven-local-repository" json:"use-maven-local-repository" mapstructure:"use-maven-local-repository"` + + // LocalRepositoryDir is an alternate directory to use to look up the local repository + LocalRepositoryDir string `yaml:"maven-local-repository-dir" json:"maven-local-repository-dir" mapstructure:"maven-local-repository-dir"` + + // MaxParentRecursiveDepth allows for a maximum depth to use when recursively resolving parent poms and other information, 0 disables any maximum + MaxParentRecursiveDepth int `yaml:"max-parent-recursive-depth" json:"max-parent-recursive-depth" mapstructure:"max-parent-recursive-depth"` +} + +func DefaultConfig() Config { + return Config{ + UseNetwork: false, + Repositories: []string{mavenBaseURL}, + UseLocalRepository: false, + LocalRepositoryDir: defaultMavenLocalRepoDir(), + MaxParentRecursiveDepth: 0, // unlimited + } +} + // defaultMavenLocalRepoDir gets default location of the Maven local repository, generally at /.m2/repository func defaultMavenLocalRepoDir() string { homeDir, err := homedir.Dir() @@ -49,15 +78,6 @@ func getSettingsXMLLocalRepository(settingsXML io.Reader) string { return s.LocalRepository } -// deref dereferences ptr if not nil, or returns the type default value if ptr is nil -func deref[T any](ptr *T) T { - if ptr == nil { - var t T - return t - } - return *ptr -} - // remotePomURL returns a URL to download a POM from a remote repository func remotePomURL(repoURL, groupID, artifactID, version string) (requestURL string, err error) { // groupID needs to go from maven.org -> maven/org diff --git a/syft/pkg/cataloger/java/maven_utils_test.go b/syft/pkg/cataloger/java/internal/maven/config_test.go similarity index 93% rename from syft/pkg/cataloger/java/maven_utils_test.go rename to syft/pkg/cataloger/java/internal/maven/config_test.go index 8d599b501..46e7341e7 100644 --- a/syft/pkg/cataloger/java/maven_utils_test.go +++ b/syft/pkg/cataloger/java/internal/maven/config_test.go @@ -1,4 +1,4 @@ -package java +package maven import ( "os" @@ -7,6 +7,8 @@ import ( "github.com/mitchellh/go-homedir" "github.com/stretchr/testify/require" + + "github.com/anchore/syft/internal" ) func Test_defaultMavenLocalRepoDir(t *testing.T) { @@ -69,7 +71,7 @@ func Test_getSettingsXmlLocalRepository(t *testing.T) { for _, test := range tests { t.Run(test.expected, func(t *testing.T) { f, _ := os.Open(test.file) - defer f.Close() + defer internal.CloseAndLogError(f, test.file) got := getSettingsXMLLocalRepository(f) require.Equal(t, test.expected, got) }) @@ -85,7 +87,7 @@ func Test_remotePomURL(t *testing.T) { expected string }{ { - name: "formatMavenURL correctly assembles the pom URL", + name: "remotePomURL correctly assembles the pom URL", groupID: "org.springframework.boot", artifactID: "spring-boot-starter-test", version: "3.1.5", diff --git a/syft/pkg/cataloger/java/internal/maven/pom_parser.go b/syft/pkg/cataloger/java/internal/maven/pom_parser.go new file mode 100644 index 000000000..ed0637e1c --- /dev/null +++ b/syft/pkg/cataloger/java/internal/maven/pom_parser.go @@ -0,0 +1,67 @@ +package maven + +import ( + "bytes" + "encoding/xml" + "fmt" + "io" + "strings" + + "github.com/saintfish/chardet" + "github.com/vifraa/gopom" + "golang.org/x/net/html/charset" +) + +type ( + Project = gopom.Project + Properties = gopom.Properties + Parent = gopom.Parent + Dependency = gopom.Dependency + License = gopom.License +) + +// ParsePomXML decodes a pom XML file, detecting and converting non-UTF-8 charsets. this DOES NOT perform any logic to resolve properties such as groupID, artifactID, and version +func ParsePomXML(content io.Reader) (project *Project, err error) { + inputReader, err := getUtf8Reader(content) + if err != nil { + return nil, fmt.Errorf("unable to read pom.xml: %w", err) + } + + decoder := xml.NewDecoder(inputReader) + // when an xml file has a character set declaration (e.g. '') read that and use the correct decoder + decoder.CharsetReader = charset.NewReaderLabel + + project = &Project{} + if err := decoder.Decode(project); err != nil { + return nil, fmt.Errorf("unable to unmarshal pom.xml: %w", err) + } + + return project, nil +} + +func getUtf8Reader(content io.Reader) (io.Reader, error) { + pomContents, err := io.ReadAll(content) + if err != nil { + return nil, err + } + + detector := chardet.NewTextDetector() + detection, err := detector.DetectBest(pomContents) + + var inputReader io.Reader + if err == nil && detection != nil { + if detection.Charset == "UTF-8" { + inputReader = bytes.NewReader(pomContents) + } else { + inputReader, err = charset.NewReaderLabel(detection.Charset, bytes.NewReader(pomContents)) + if err != nil { + return nil, fmt.Errorf("unable to get encoding: %w", err) + } + } + } else { + // we could not detect the encoding, but we want a valid file to read. Replace unreadable + // characters with the UTF-8 replacement character. + inputReader = strings.NewReader(strings.ToValidUTF8(string(pomContents), "�")) + } + return inputReader, nil +} diff --git a/syft/pkg/cataloger/java/internal/maven/pom_parser_test.go b/syft/pkg/cataloger/java/internal/maven/pom_parser_test.go new file mode 100644 index 000000000..c9f8463a4 --- /dev/null +++ b/syft/pkg/cataloger/java/internal/maven/pom_parser_test.go @@ -0,0 +1,93 @@ +package maven + +import ( + "encoding/base64" + "io" + "os" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/internal" +) + +func Test_getUtf8Reader(t *testing.T) { + tests := []struct { + name string + contents string + }{ + { + name: "unknown encoding", + // random binary contents + contents: "BkiJz02JyEWE0nXR6TH///9NicpJweEETIucJIgAAABJicxPjQwhTY1JCE05WQh0BU2J0eunTYshTIusJIAAAAAPHwBNOeV1BUUx2+tWTIlUJDhMiUwkSEyJRCQgSIl8JFBMiQ==", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + decoder := base64.NewDecoder(base64.StdEncoding, strings.NewReader(tt.contents)) + + got, err := getUtf8Reader(decoder) + require.NoError(t, err) + gotBytes, err := io.ReadAll(got) + require.NoError(t, err) + // if we couldn't decode the section as UTF-8, we should get a replacement character + assert.Contains(t, string(gotBytes), "�") + }) + } +} + +func Test_decodePomXML_surviveNonUtf8Encoding(t *testing.T) { + // regression for https://github.com/anchore/syft/issues/2044 + + // we are storing the base64 contents of the pom.xml file. We are doing this to prevent accidental changes to the + // file, which is extremely important for this test. + + // for instance, even changing a single character in the file and saving in an IntelliJ IDE will automatically + // convert the file to UTF-8, which will break this test: + + // xxd with the original pom.xml + // 00000780: 6964 3e0d 0a20 2020 2020 2020 2020 2020 id>.. + // 00000790: 203c 6e61 6d65 3e4a e972 f46d 6520 4d69 J.r.me Mi + // 000007a0: 7263 3c2f 6e61 6d65 3e0d 0a20 2020 2020 rc.. + + // xxd with the pom.xml converted to UTF-8 (from a simple change with IntelliJ) + // 00000780: 6964 3e0d 0a20 2020 2020 2020 2020 2020 id>.. + // 00000790: 203c 6e61 6d65 3e4a efbf bd72 efbf bd6d J...r...m + // 000007a0: 6520 4d69 7263 3c2f 6e61 6d65 3e0d 0a20 e Mirc.. + + // Note that the name "Jérôme Mirc" was originally interpreted as "J.r.me Mi" and after the save + // is now encoded as "J...r...m" which is not what we want (note the extra bytes for each non UTF-8 character. + // The original 0xe9 byte (é) was converted to 0xefbfbd (�) which is the UTF-8 replacement character. + // This is quite silly on the part of IntelliJ, but it is what it is. + + cases := []struct { + name string + fixture string + }{ + { + name: "undeclared encoding", + fixture: "test-fixtures/undeclared-iso-8859-encoded-pom.xml.base64", + }, + { + name: "declared encoding", + fixture: "test-fixtures/declared-iso-8859-encoded-pom.xml.base64", + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + fh, err := os.Open(c.fixture) + require.NoError(t, err) + defer internal.CloseAndLogError(fh, c.fixture) + + decoder := base64.NewDecoder(base64.StdEncoding, fh) + + proj, err := ParsePomXML(decoder) + + require.NoError(t, err) + require.NotEmpty(t, proj.Developers) + }) + } +} diff --git a/syft/pkg/cataloger/java/maven_resolver.go b/syft/pkg/cataloger/java/internal/maven/resolver.go similarity index 62% rename from syft/pkg/cataloger/java/maven_resolver.go rename to syft/pkg/cataloger/java/internal/maven/resolver.go index f9e373895..710600ae2 100644 --- a/syft/pkg/cataloger/java/maven_resolver.go +++ b/syft/pkg/cataloger/java/internal/maven/resolver.go @@ -1,4 +1,4 @@ -package java +package maven import ( "bytes" @@ -24,56 +24,69 @@ import ( "github.com/anchore/syft/syft/file" ) -// mavenID is the unique identifier for a package in Maven -type mavenID struct { +// ID is the unique identifier for a package in Maven +type ID struct { GroupID string ArtifactID string Version string } -func (m mavenID) String() string { +func NewID(groupID, artifactID, version string) ID { + return ID{ + GroupID: groupID, + ArtifactID: artifactID, + Version: version, + } +} + +func (m ID) String() string { return fmt.Sprintf("(groupId: %s artifactId: %s version: %s)", m.GroupID, m.ArtifactID, m.Version) } +// Valid indicates that the given maven ID has values for groupId, artifactId, and version +func (m ID) Valid() bool { + return m.GroupID != "" && m.ArtifactID != "" && m.Version != "" +} + var expressionMatcher = regexp.MustCompile("[$][{][^}]+[}]") -// mavenResolver is a short-lived utility to resolve maven poms from multiple sources, including: +// Resolver is a short-lived utility to resolve maven poms from multiple sources, including: // the scanned filesystem, local maven cache directories, remote maven repositories, and the syft cache -type mavenResolver struct { - cfg ArchiveCatalogerConfig +type Resolver struct { + cfg Config cache cache.Cache - resolved map[mavenID]*gopom.Project + resolved map[ID]*Project remoteRequestTimeout time.Duration checkedLocalRepo bool // fileResolver and pomLocations are used to resolve parent poms by relativePath fileResolver file.Resolver - pomLocations map[*gopom.Project]file.Location + pomLocations map[*Project]file.Location } -// newMavenResolver constructs a new mavenResolver with the given configuration. +// NewResolver constructs a new Resolver with the given configuration. // NOTE: the fileResolver is optional and if provided will be used to resolve parent poms by relative path -func newMavenResolver(fileResolver file.Resolver, cfg ArchiveCatalogerConfig) *mavenResolver { - return &mavenResolver{ +func NewResolver(fileResolver file.Resolver, cfg Config) *Resolver { + return &Resolver{ cfg: cfg, cache: cache.GetManager().GetCache("java/maven/repo", "v1"), - resolved: map[mavenID]*gopom.Project{}, + resolved: map[ID]*Project{}, remoteRequestTimeout: time.Second * 10, fileResolver: fileResolver, - pomLocations: map[*gopom.Project]file.Location{}, + pomLocations: map[*Project]file.Location{}, } } -// getPropertyValue gets property values by emulating maven property resolution logic, looking in the project's variables +// ResolveProperty gets property values by emulating maven property resolution logic, looking in the project's variables // as well as supporting the project expressions like ${project.parent.groupId}. // Properties which are not resolved result in empty string "" -func (r *mavenResolver) getPropertyValue(ctx context.Context, propertyValue *string, resolutionContext ...*gopom.Project) string { - return r.resolvePropertyValue(ctx, propertyValue, nil, resolutionContext...) +func (r *Resolver) ResolveProperty(ctx context.Context, pom *Project, propertyValue *string) string { + return r.resolvePropertyValue(ctx, propertyValue, nil, pom) } // resolvePropertyValue resolves property values by emulating maven property resolution logic, looking in the project's variables // as well as supporting the project expressions like ${project.parent.groupId}. // Properties which are not resolved result in empty string "" -func (r *mavenResolver) resolvePropertyValue(ctx context.Context, propertyValue *string, resolvingProperties []string, resolutionContext ...*gopom.Project) string { +func (r *Resolver) resolvePropertyValue(ctx context.Context, propertyValue *string, resolvingProperties []string, resolutionContext ...*Project) string { if propertyValue == nil { return "" } @@ -86,7 +99,7 @@ func (r *mavenResolver) resolvePropertyValue(ctx context.Context, propertyValue } // resolveExpression resolves an expression, which may be a plain string or a string with ${ property.references } -func (r *mavenResolver) resolveExpression(ctx context.Context, resolutionContext []*gopom.Project, expression string, resolvingProperties []string) (string, error) { +func (r *Resolver) resolveExpression(ctx context.Context, resolutionContext []*Project, expression string, resolvingProperties []string) (string, error) { log.Tracef("resolving expression: '%v' in context: %v", expression, resolutionContext) var errs error @@ -103,7 +116,7 @@ func (r *mavenResolver) resolveExpression(ctx context.Context, resolutionContext } // resolveProperty resolves properties recursively from the root project -func (r *mavenResolver) resolveProperty(ctx context.Context, resolutionContext []*gopom.Project, propertyExpression string, resolvingProperties []string) (string, error) { +func (r *Resolver) resolveProperty(ctx context.Context, resolutionContext []*Project, propertyExpression string, resolvingProperties []string) (string, error) { // prevent cycles if slices.Contains(resolvingProperties, propertyExpression) { return "", fmt.Errorf("cycle detected resolving: %s", propertyExpression) @@ -146,7 +159,7 @@ func (r *mavenResolver) resolveProperty(ctx context.Context, resolutionContext [ // resolveProjectProperty resolves properties on the project // //nolint:gocognit -func (r *mavenResolver) resolveProjectProperty(ctx context.Context, resolutionContext []*gopom.Project, pom *gopom.Project, propertyExpression string, resolving []string) (string, error) { +func (r *Resolver) resolveProjectProperty(ctx context.Context, resolutionContext []*Project, pom *Project, propertyExpression string, resolving []string) (string, error) { // see if we have a project.x expression and process this based // on the xml tags in gopom parts := strings.Split(propertyExpression, ".") @@ -210,19 +223,48 @@ func (r *mavenResolver) resolveProjectProperty(ctx context.Context, resolutionCo return "", nil } -// getMavenID creates a new mavenID from a pom, resolving parent information as necessary -func (r *mavenResolver) getMavenID(ctx context.Context, resolutionContext ...*gopom.Project) mavenID { - return r.resolveMavenID(ctx, nil, resolutionContext...) +// ResolveParent resolves the parent definition, and returns a POM for the parent, which is possibly incomplete, or nil +func (r *Resolver) ResolveParent(ctx context.Context, pom *Project) (*Project, error) { + if pom == nil || pom.Parent == nil { + return nil, nil + } + + parent, err := r.resolveParent(ctx, pom) + if parent != nil { + return parent, err + } + + groupID := r.ResolveProperty(ctx, pom, pom.Parent.GroupID) + if groupID == "" { + groupID = r.ResolveProperty(ctx, pom, pom.GroupID) + } + artifactID := r.ResolveProperty(ctx, pom, pom.Parent.ArtifactID) + version := r.ResolveProperty(ctx, pom, pom.Parent.Version) + + if artifactID != "" && version != "" { + return &Project{ + GroupID: &groupID, + ArtifactID: &artifactID, + Version: &version, + }, nil + } + + return nil, fmt.Errorf("unsufficient information to create a parent pom project, id: %s", NewID(groupID, artifactID, version)) } -// resolveMavenID creates a new mavenID from a pom, resolving parent information as necessary -func (r *mavenResolver) resolveMavenID(ctx context.Context, resolvingProperties []string, resolutionContext ...*gopom.Project) mavenID { +// ResolveID creates an ID from a pom, resolving parent information as necessary +func (r *Resolver) ResolveID(ctx context.Context, pom *Project) ID { + return r.resolveID(ctx, nil, pom) +} + +// resolveID creates a new ID from a pom, resolving parent information as necessary +func (r *Resolver) resolveID(ctx context.Context, resolvingProperties []string, resolutionContext ...*Project) ID { if len(resolutionContext) == 0 || resolutionContext[0] == nil { - return mavenID{} + return ID{} } pom := resolutionContext[len(resolutionContext)-1] // get topmost pom if pom == nil { - return mavenID{} + return ID{} } groupID := r.resolvePropertyValue(ctx, pom.GroupID, resolvingProperties, resolutionContext...) @@ -239,50 +281,50 @@ func (r *mavenResolver) resolveMavenID(ctx context.Context, resolvingProperties version = r.resolvePropertyValue(ctx, pom.Parent.Version, resolvingProperties, resolutionContext...) } } - return mavenID{groupID, artifactID, version} + return ID{groupID, artifactID, version} } -// resolveDependencyID creates a new mavenID from a dependency element in a pom, resolving information as necessary -func (r *mavenResolver) resolveDependencyID(ctx context.Context, pom *gopom.Project, dep gopom.Dependency) mavenID { +// ResolveDependencyID creates an ID from a dependency element in a pom, resolving information as necessary +func (r *Resolver) ResolveDependencyID(ctx context.Context, pom *Project, dep Dependency) ID { if pom == nil { - return mavenID{} + return ID{} } - groupID := r.getPropertyValue(ctx, dep.GroupID, pom) - artifactID := r.getPropertyValue(ctx, dep.ArtifactID, pom) - version := r.getPropertyValue(ctx, dep.Version, pom) + groupID := r.resolvePropertyValue(ctx, dep.GroupID, nil, pom) + artifactID := r.resolvePropertyValue(ctx, dep.ArtifactID, nil, pom) + version := r.resolvePropertyValue(ctx, dep.Version, nil, pom) var err error if version == "" { - version, err = r.findInheritedVersion(ctx, pom, groupID, artifactID) + version, err = r.resolveInheritedVersion(ctx, pom, groupID, artifactID) } - depID := mavenID{groupID, artifactID, version} + depID := ID{groupID, artifactID, version} if err != nil { - log.WithFields("error", err, "mavenID", r.getMavenID(ctx, pom), "dependencyID", depID) + log.WithFields("error", err, "ID", r.ResolveID(ctx, pom), "dependencyID", depID) } return depID } -// findPom gets a pom from cache, local repository, or from a remote Maven repository depending on configuration -func (r *mavenResolver) findPom(ctx context.Context, groupID, artifactID, version string) (*gopom.Project, error) { +// FindPom gets a pom from cache, local repository, or from a remote Maven repository depending on configuration +func (r *Resolver) FindPom(ctx context.Context, groupID, artifactID, version string) (*Project, error) { if groupID == "" || artifactID == "" || version == "" { return nil, fmt.Errorf("invalid maven pom specification, require non-empty values for groupID: '%s', artifactID: '%s', version: '%s'", groupID, artifactID, version) } - id := mavenID{groupID, artifactID, version} - pom := r.resolved[id] + id := ID{groupID, artifactID, version} + existingPom := r.resolved[id] - if pom != nil { - return pom, nil + if existingPom != nil { + return existingPom, nil } var errs error // try to resolve first from local maven repo - if r.cfg.UseMavenLocalRepository { + if r.cfg.UseLocalRepository { pom, err := r.findPomInLocalRepository(groupID, artifactID, version) if pom != nil { r.resolved[id] = pom @@ -292,8 +334,8 @@ func (r *mavenResolver) findPom(ctx context.Context, groupID, artifactID, versio } // resolve via network maven repository - if pom == nil && r.cfg.UseNetwork { - pom, err := r.findPomInRemoteRepository(ctx, groupID, artifactID, version) + if r.cfg.UseNetwork { + pom, err := r.findPomInRemotes(ctx, groupID, artifactID, version) if pom != nil { r.resolved[id] = pom return pom, nil @@ -305,35 +347,50 @@ func (r *mavenResolver) findPom(ctx context.Context, groupID, artifactID, versio } // findPomInLocalRepository attempts to get the POM from the users local maven repository -func (r *mavenResolver) findPomInLocalRepository(groupID, artifactID, version string) (*gopom.Project, error) { +func (r *Resolver) findPomInLocalRepository(groupID, artifactID, version string) (*Project, error) { groupPath := filepath.Join(strings.Split(groupID, ".")...) - pomFilePath := filepath.Join(r.cfg.MavenLocalRepositoryDir, groupPath, artifactID, version, artifactID+"-"+version+".pom") + pomFilePath := filepath.Join(r.cfg.LocalRepositoryDir, groupPath, artifactID, version, artifactID+"-"+version+".pom") pomFile, err := os.Open(pomFilePath) if err != nil { if !r.checkedLocalRepo && errors.Is(err, os.ErrNotExist) { r.checkedLocalRepo = true // check if the directory exists at all, and if not just stop trying to resolve local maven files - fi, err := os.Stat(r.cfg.MavenLocalRepositoryDir) + fi, err := os.Stat(r.cfg.LocalRepositoryDir) if errors.Is(err, os.ErrNotExist) || !fi.IsDir() { - log.WithFields("error", err, "repositoryDir", r.cfg.MavenLocalRepositoryDir). + log.WithFields("error", err, "repositoryDir", r.cfg.LocalRepositoryDir). Info("local maven repository is not a readable directory, stopping local resolution") - r.cfg.UseMavenLocalRepository = false + r.cfg.UseLocalRepository = false } } return nil, err } defer internal.CloseAndLogError(pomFile, pomFilePath) - return decodePomXML(pomFile) + return ParsePomXML(pomFile) +} + +// findPomInRemotes download the pom file from all configured Maven repositories over HTTP +func (r *Resolver) findPomInRemotes(ctx context.Context, groupID, artifactID, version string) (*Project, error) { + var errs error + for _, repo := range r.cfg.Repositories { + pom, err := r.findPomInRemoteRepository(ctx, repo, groupID, artifactID, version) + if err != nil { + errs = errors.Join(errs, err) + } + if pom != nil { + return pom, err + } + } + return nil, fmt.Errorf("pom for %v not found in any remote repository: %w", ID{groupID, artifactID, version}, errs) } // findPomInRemoteRepository download the pom file from a (remote) Maven repository over HTTP -func (r *mavenResolver) findPomInRemoteRepository(ctx context.Context, groupID, artifactID, version string) (*gopom.Project, error) { +func (r *Resolver) findPomInRemoteRepository(ctx context.Context, repo string, groupID, artifactID, version string) (*Project, error) { if groupID == "" || artifactID == "" || version == "" { return nil, fmt.Errorf("missing/incomplete maven artifact coordinates -- groupId: '%s' artifactId: '%s', version: '%s'", groupID, artifactID, version) } - requestURL, err := remotePomURL(r.cfg.MavenBaseURL, groupID, artifactID, version) + requestURL, err := remotePomURL(repo, groupID, artifactID, version) if err != nil { return nil, fmt.Errorf("unable to find pom in remote due to: %w", err) } @@ -377,7 +434,7 @@ func (r *mavenResolver) findPomInRemoteRepository(ctx context.Context, groupID, if reader, ok := reader.(io.Closer); ok { defer internal.CloseAndLogError(reader, requestURL) } - pom, err := decodePomXML(reader) + pom, err := ParsePomXML(reader) if err != nil { return nil, fmt.Errorf("unable to parse pom from Maven repository url %v: %w", requestURL, err) } @@ -388,7 +445,7 @@ func (r *mavenResolver) findPomInRemoteRepository(ctx context.Context, groupID, // this function is guaranteed to return an unread reader for the correct contents. // NOTE: this could be promoted to the internal cache package as a specialized version of the cache.Resolver // if there are more users of this functionality -func (r *mavenResolver) cacheResolveReader(key string, resolve func() (io.ReadCloser, error)) (io.Reader, error) { +func (r *Resolver) cacheResolveReader(key string, resolve func() (io.ReadCloser, error)) (io.Reader, error) { reader, err := r.cache.Read(key) if err == nil && reader != nil { return reader, err @@ -410,7 +467,7 @@ func (r *mavenResolver) cacheResolveReader(key string, resolve func() (io.ReadCl } // resolveParent attempts to resolve the parent for the given pom -func (r *mavenResolver) resolveParent(ctx context.Context, pom *gopom.Project, resolvingProperties ...string) (*gopom.Project, error) { +func (r *Resolver) resolveParent(ctx context.Context, pom *Project, resolvingProperties ...string) (*Project, error) { if pom == nil || pom.Parent == nil { return nil, nil } @@ -422,7 +479,7 @@ func (r *mavenResolver) resolveParent(ctx context.Context, pom *gopom.Project, r version := r.resolvePropertyValue(ctx, parent.Version, resolvingProperties, &pomWithoutParent) // check cache before resolving - parentID := mavenID{groupID, artifactID, version} + parentID := ID{groupID, artifactID, version} if resolvedParent, ok := r.resolved[parentID]; ok { return resolvedParent, nil } @@ -434,21 +491,21 @@ func (r *mavenResolver) resolveParent(ctx context.Context, pom *gopom.Project, r } // find POM normally - return r.findPom(ctx, groupID, artifactID, version) + return r.FindPom(ctx, groupID, artifactID, version) } -// findInheritedVersion attempts to find the version of a dependency (groupID, artifactID) by searching all parent poms and imported managed dependencies +// resolveInheritedVersion attempts to find the version of a dependency (groupID, artifactID) by searching all parent poms and imported managed dependencies // //nolint:gocognit,funlen -func (r *mavenResolver) findInheritedVersion(ctx context.Context, pom *gopom.Project, groupID, artifactID string, resolutionContext ...*gopom.Project) (string, error) { +func (r *Resolver) resolveInheritedVersion(ctx context.Context, pom *Project, groupID, artifactID string, resolutionContext ...*Project) (string, error) { if pom == nil { return "", fmt.Errorf("nil pom provided to findInheritedVersion") } if r.cfg.MaxParentRecursiveDepth > 0 && len(resolutionContext) > r.cfg.MaxParentRecursiveDepth { - return "", fmt.Errorf("maximum depth reached attempting to resolve version for: %s:%s at: %v", groupID, artifactID, r.getMavenID(ctx, pom)) + return "", fmt.Errorf("maximum depth reached attempting to resolve version for: %s:%s at: %v", groupID, artifactID, r.ResolveID(ctx, pom)) } if slices.Contains(resolutionContext, pom) { - return "", fmt.Errorf("cycle detected attempting to resolve version for: %s:%s at: %v", groupID, artifactID, r.getMavenID(ctx, pom)) + return "", fmt.Errorf("cycle detected attempting to resolve version for: %s:%s at: %v", groupID, artifactID, r.ResolveID(ctx, pom)) } resolutionContext = append(resolutionContext, pom) @@ -457,10 +514,10 @@ func (r *mavenResolver) findInheritedVersion(ctx context.Context, pom *gopom.Pro // check for entries in dependencyManagement first for _, dep := range pomManagedDependencies(pom) { - depGroupID := r.getPropertyValue(ctx, dep.GroupID, resolutionContext...) - depArtifactID := r.getPropertyValue(ctx, dep.ArtifactID, resolutionContext...) + depGroupID := r.resolvePropertyValue(ctx, dep.GroupID, nil, resolutionContext...) + depArtifactID := r.resolvePropertyValue(ctx, dep.ArtifactID, nil, resolutionContext...) if depGroupID == groupID && depArtifactID == artifactID { - version = r.getPropertyValue(ctx, dep.Version, resolutionContext...) + version = r.resolvePropertyValue(ctx, dep.Version, nil, resolutionContext...) if version != "" { return version, nil } @@ -468,17 +525,17 @@ func (r *mavenResolver) findInheritedVersion(ctx context.Context, pom *gopom.Pro // imported pom files should be treated just like parent poms, they are used to define versions of dependencies if deref(dep.Type) == "pom" && deref(dep.Scope) == "import" { - depVersion := r.getPropertyValue(ctx, dep.Version, resolutionContext...) + depVersion := r.resolvePropertyValue(ctx, dep.Version, nil, resolutionContext...) - depPom, err := r.findPom(ctx, depGroupID, depArtifactID, depVersion) + depPom, err := r.FindPom(ctx, depGroupID, depArtifactID, depVersion) if err != nil || depPom == nil { - log.WithFields("error", err, "mavenID", r.getMavenID(ctx, pom), "dependencyID", mavenID{depGroupID, depArtifactID, depVersion}). + log.WithFields("error", err, "ID", r.ResolveID(ctx, pom), "dependencyID", ID{depGroupID, depArtifactID, depVersion}). Debug("unable to find imported pom looking for managed dependencies") continue } - version, err = r.findInheritedVersion(ctx, depPom, groupID, artifactID, resolutionContext...) + version, err = r.resolveInheritedVersion(ctx, depPom, groupID, artifactID, resolutionContext...) if err != nil { - log.WithFields("error", err, "mavenID", r.getMavenID(ctx, pom), "dependencyID", mavenID{depGroupID, depArtifactID, depVersion}). + log.WithFields("error", err, "ID", r.ResolveID(ctx, pom), "dependencyID", ID{depGroupID, depArtifactID, depVersion}). Debug("error during findInheritedVersion") } if version != "" { @@ -493,7 +550,7 @@ func (r *mavenResolver) findInheritedVersion(ctx context.Context, pom *gopom.Pro return "", err } if parent != nil { - version, err = r.findInheritedVersion(ctx, parent, groupID, artifactID, resolutionContext...) + version, err = r.resolveInheritedVersion(ctx, parent, groupID, artifactID, resolutionContext...) if err != nil { return "", err } @@ -503,11 +560,11 @@ func (r *mavenResolver) findInheritedVersion(ctx context.Context, pom *gopom.Pro } // check for inherited dependencies - for _, dep := range pomDependencies(pom) { - depGroupID := r.getPropertyValue(ctx, dep.GroupID, resolutionContext...) - depArtifactID := r.getPropertyValue(ctx, dep.ArtifactID, resolutionContext...) + for _, dep := range DirectPomDependencies(pom) { + depGroupID := r.resolvePropertyValue(ctx, dep.GroupID, nil, resolutionContext...) + depArtifactID := r.resolvePropertyValue(ctx, dep.ArtifactID, nil, resolutionContext...) if depGroupID == groupID && depArtifactID == artifactID { - version = r.getPropertyValue(ctx, dep.Version, resolutionContext...) + version = r.resolvePropertyValue(ctx, dep.Version, nil, resolutionContext...) if version != "" { return version, nil } @@ -517,18 +574,24 @@ func (r *mavenResolver) findInheritedVersion(ctx context.Context, pom *gopom.Pro return "", nil } -// findLicenses search pom for license, traversing parent poms if needed -func (r *mavenResolver) findLicenses(ctx context.Context, groupID, artifactID, version string) ([]gopom.License, error) { - pom, err := r.findPom(ctx, groupID, artifactID, version) +// FindLicenses attempts to find a pom, and once found attempts to resolve licenses traversing +// parent poms as necessary +func (r *Resolver) FindLicenses(ctx context.Context, groupID, artifactID, version string) ([]gopom.License, error) { + pom, err := r.FindPom(ctx, groupID, artifactID, version) if pom == nil || err != nil { return nil, err } return r.resolveLicenses(ctx, pom) } +// ResolveLicenses searches the pom for license, resolving and traversing parent poms if needed +func (r *Resolver) ResolveLicenses(ctx context.Context, pom *Project) ([]License, error) { + return r.resolveLicenses(ctx, pom) +} + // resolveLicenses searches the pom for license, traversing parent poms if needed -func (r *mavenResolver) resolveLicenses(ctx context.Context, pom *gopom.Project, processing ...mavenID) ([]gopom.License, error) { - id := r.getMavenID(ctx, pom) +func (r *Resolver) resolveLicenses(ctx context.Context, pom *Project, processing ...ID) ([]License, error) { + id := r.ResolveID(ctx, pom) if slices.Contains(processing, id) { return nil, fmt.Errorf("cycle detected resolving licenses for: %v", id) } @@ -552,12 +615,12 @@ func (r *mavenResolver) resolveLicenses(ctx context.Context, pom *gopom.Project, } // pomLicenses appends the directly specified licenses with non-empty name or url -func (r *mavenResolver) pomLicenses(ctx context.Context, pom *gopom.Project) []gopom.License { - var out []gopom.License +func (r *Resolver) pomLicenses(ctx context.Context, pom *Project) []License { + var out []License for _, license := range deref(pom.Licenses) { // if we find non-empty licenses, return them - name := r.getPropertyValue(ctx, license.Name, pom) - url := r.getPropertyValue(ctx, license.URL, pom) + name := r.resolvePropertyValue(ctx, license.Name, nil, pom) + url := r.resolvePropertyValue(ctx, license.URL, nil, pom) if name != "" || url != "" { out = append(out, license) } @@ -565,8 +628,8 @@ func (r *mavenResolver) pomLicenses(ctx context.Context, pom *gopom.Project) []g return out } -func (r *mavenResolver) findParentPomByRelativePath(ctx context.Context, pom *gopom.Project, parentID mavenID, resolvingProperties []string) *gopom.Project { - // don't resolve if no resolver +func (r *Resolver) findParentPomByRelativePath(ctx context.Context, pom *Project, parentID ID, resolvingProperties []string) *Project { + // can't resolve without a file resolver if r.fileResolver == nil { return nil } @@ -588,7 +651,7 @@ func (r *mavenResolver) findParentPomByRelativePath(ctx context.Context, pom *go } parentLocations, err := r.fileResolver.FilesByPath(p) if err != nil || len(parentLocations) == 0 { - log.WithFields("error", err, "mavenID", r.resolveMavenID(ctx, resolvingProperties, pom), "parentID", parentID, "relativePath", relativePath). + log.WithFields("error", err, "mavenID", r.resolveID(ctx, resolvingProperties, pom), "parentID", parentID, "relativePath", relativePath). Trace("parent pom not found by relative path") return nil } @@ -596,34 +659,49 @@ func (r *mavenResolver) findParentPomByRelativePath(ctx context.Context, pom *go parentContents, err := r.fileResolver.FileContentsByLocation(parentLocation) if err != nil || parentContents == nil { - log.WithFields("error", err, "mavenID", r.resolveMavenID(ctx, resolvingProperties, pom), "parentID", parentID, "parentLocation", parentLocation). + log.WithFields("error", err, "mavenID", r.resolveID(ctx, resolvingProperties, pom), "parentID", parentID, "parentLocation", parentLocation). Debug("unable to get contents of parent pom by relative path") return nil } defer internal.CloseAndLogError(parentContents, parentLocation.RealPath) - parentPom, err := decodePomXML(parentContents) + parentPom, err := ParsePomXML(parentContents) if err != nil || parentPom == nil { - log.WithFields("error", err, "mavenID", r.resolveMavenID(ctx, resolvingProperties, pom), "parentID", parentID, "parentLocation", parentLocation). + log.WithFields("error", err, "mavenID", r.resolveID(ctx, resolvingProperties, pom), "parentID", parentID, "parentLocation", parentLocation). Debug("unable to parse parent pom") return nil } // ensure parent matches - newParentID := r.resolveMavenID(ctx, resolvingProperties, parentPom) + newParentID := r.resolveID(ctx, resolvingProperties, parentPom) if newParentID.ArtifactID != parentID.ArtifactID { - log.WithFields("newParentID", newParentID, "mavenID", r.resolveMavenID(ctx, resolvingProperties, pom), "parentID", parentID, "parentLocation", parentLocation). + log.WithFields("newParentID", newParentID, "mavenID", r.resolveID(ctx, resolvingProperties, pom), "parentID", parentID, "parentLocation", parentLocation). Debug("parent IDs do not match resolving parent by relative path") return nil } r.resolved[parentID] = parentPom - r.pomLocations[parentPom] = parentLocation // for any future parent relativepath lookups + r.pomLocations[parentPom] = parentLocation // for any future parent relativePath lookups return parentPom } -// pomDependencies returns all dependencies directly defined in a project, including all defined in profiles. -// does not resolve parent dependencies -func pomDependencies(pom *gopom.Project) []gopom.Dependency { +// AddPom allows for adding known pom files with locations within the file resolver, these locations may be used +// while resolving parent poms by relative path +func (r *Resolver) AddPom(ctx context.Context, pom *Project, location file.Location) { + r.pomLocations[pom] = location + // by calling resolve ID here, this will lookup necessary parent poms by relative path, and + // track any poms we found with complete version information if enough is available to resolve + id := r.ResolveID(ctx, pom) + if id.Valid() { + _, existing := r.resolved[id] + if !existing { + r.resolved[id] = pom + } + } +} + +// DirectPomDependencies returns all dependencies directly defined in a project, including all defined in profiles. +// This does not resolve any parent or transitive dependencies +func DirectPomDependencies(pom *Project) []Dependency { dependencies := deref(pom.Dependencies) for _, profile := range deref(pom.Profiles) { dependencies = append(dependencies, deref(profile.Dependencies)...) @@ -633,8 +711,8 @@ func pomDependencies(pom *gopom.Project) []gopom.Dependency { // pomManagedDependencies returns all directly defined managed dependencies in a project pom, including all defined in profiles. // does not resolve parent managed dependencies -func pomManagedDependencies(pom *gopom.Project) []gopom.Dependency { - var dependencies []gopom.Dependency +func pomManagedDependencies(pom *Project) []Dependency { + var dependencies []Dependency if pom.DependencyManagement != nil { dependencies = append(dependencies, deref(pom.DependencyManagement.Dependencies)...) } @@ -645,3 +723,12 @@ func pomManagedDependencies(pom *gopom.Project) []gopom.Dependency { } return dependencies } + +// deref dereferences ptr if not nil, or returns the type default value if ptr is nil +func deref[T any](ptr *T) T { + if ptr == nil { + var t T + return t + } + return *ptr +} diff --git a/syft/pkg/cataloger/java/maven_resolver_test.go b/syft/pkg/cataloger/java/internal/maven/resolver_test.go similarity index 59% rename from syft/pkg/cataloger/java/maven_resolver_test.go rename to syft/pkg/cataloger/java/internal/maven/resolver_test.go index bec9b6691..f8c7e3d65 100644 --- a/syft/pkg/cataloger/java/maven_resolver_test.go +++ b/syft/pkg/cataloger/java/internal/maven/resolver_test.go @@ -1,34 +1,30 @@ -package java +package maven import ( "context" - "io" - "net/http" - "net/http/httptest" - "os" "path/filepath" + "strings" "testing" - "github.com/bmatcuk/doublestar/v4" "github.com/stretchr/testify/require" - "github.com/vifraa/gopom" "github.com/anchore/syft/internal" "github.com/anchore/syft/syft/internal/fileresolver" + maventest "github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven/test" ) func Test_resolveProperty(t *testing.T) { tests := []struct { name string property string - pom gopom.Project + pom Project expected string }{ { name: "property", property: "${version.number}", - pom: gopom.Project{ - Properties: &gopom.Properties{ + pom: Project{ + Properties: &Properties{ Entries: map[string]string{ "version.number": "12.5.0", }, @@ -39,7 +35,7 @@ func Test_resolveProperty(t *testing.T) { { name: "groupId", property: "${project.groupId}", - pom: gopom.Project{ + pom: Project{ GroupID: ptr("org.some.group"), }, expected: "org.some.group", @@ -47,8 +43,8 @@ func Test_resolveProperty(t *testing.T) { { name: "parent groupId", property: "${project.parent.groupId}", - pom: gopom.Project{ - Parent: &gopom.Parent{ + pom: Project{ + Parent: &Parent{ GroupID: ptr("org.some.parent"), }, }, @@ -57,7 +53,7 @@ func Test_resolveProperty(t *testing.T) { { name: "nil pointer halts search", property: "${project.parent.groupId}", - pom: gopom.Project{ + pom: Project{ Parent: nil, }, expected: "", @@ -65,8 +61,8 @@ func Test_resolveProperty(t *testing.T) { { name: "nil string pointer halts search", property: "${project.parent.groupId}", - pom: gopom.Project{ - Parent: &gopom.Parent{ + pom: Project{ + Parent: &Parent{ GroupID: nil, }, }, @@ -75,11 +71,11 @@ func Test_resolveProperty(t *testing.T) { { name: "double dereference", property: "${springboot.version}", - pom: gopom.Project{ - Parent: &gopom.Parent{ + pom: Project{ + Parent: &Parent{ Version: ptr("1.2.3"), }, - Properties: &gopom.Properties{ + Properties: &Properties{ Entries: map[string]string{ "springboot.version": "${project.parent.version}", }, @@ -90,8 +86,8 @@ func Test_resolveProperty(t *testing.T) { { name: "map missing stops double dereference", property: "${springboot.version}", - pom: gopom.Project{ - Parent: &gopom.Parent{ + pom: Project{ + Parent: &Parent{ Version: ptr("1.2.3"), }, }, @@ -100,11 +96,11 @@ func Test_resolveProperty(t *testing.T) { { name: "resolution halts even if it resolves to a variable", property: "${springboot.version}", - pom: gopom.Project{ - Parent: &gopom.Parent{ + pom: Project{ + Parent: &Parent{ Version: ptr("${undefined.version}"), }, - Properties: &gopom.Properties{ + Properties: &Properties{ Entries: map[string]string{ "springboot.version": "${project.parent.version}", }, @@ -115,8 +111,8 @@ func Test_resolveProperty(t *testing.T) { { name: "resolution halts even if cyclic", property: "${springboot.version}", - pom: gopom.Project{ - Properties: &gopom.Properties{ + pom: Project{ + Properties: &Properties{ Entries: map[string]string{ "springboot.version": "${springboot.version}", }, @@ -127,8 +123,8 @@ func Test_resolveProperty(t *testing.T) { { name: "resolution halts even if cyclic more steps", property: "${cyclic.version}", - pom: gopom.Project{ - Properties: &gopom.Properties{ + pom: Project{ + Properties: &Properties{ Entries: map[string]string{ "other.version": "${cyclic.version}", "springboot.version": "${other.version}", @@ -141,11 +137,11 @@ func Test_resolveProperty(t *testing.T) { { name: "resolution halts even if cyclic involving parent", property: "${cyclic.version}", - pom: gopom.Project{ - Parent: &gopom.Parent{ + pom: Project{ + Parent: &Parent{ Version: ptr("${cyclic.version}"), }, - Properties: &gopom.Properties{ + Properties: &Properties{ Entries: map[string]string{ "other.version": "${parent.version}", "springboot.version": "${other.version}", @@ -159,15 +155,15 @@ func Test_resolveProperty(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - r := newMavenResolver(nil, DefaultArchiveCatalogerConfig()) - resolved := r.getPropertyValue(context.Background(), ptr(test.property), &test.pom) + r := NewResolver(nil, DefaultConfig()) + resolved := r.ResolveProperty(context.Background(), &test.pom, ptr(test.property)) require.Equal(t, test.expected, resolved) }) } } func Test_mavenResolverLocal(t *testing.T) { - dir, err := filepath.Abs("test-fixtures/pom/maven-repo") + dir, err := filepath.Abs("test-fixtures/maven-repo") require.NoError(t, err) tests := []struct { @@ -211,26 +207,26 @@ func Test_mavenResolverLocal(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { ctx := context.Background() - r := newMavenResolver(nil, ArchiveCatalogerConfig{ + r := NewResolver(nil, Config{ UseNetwork: false, - UseMavenLocalRepository: true, - MavenLocalRepositoryDir: dir, + UseLocalRepository: true, + LocalRepositoryDir: dir, MaxParentRecursiveDepth: test.maxDepth, }) - pom, err := r.findPom(ctx, test.groupID, test.artifactID, test.version) + pom, err := r.FindPom(ctx, test.groupID, test.artifactID, test.version) if test.wantErr != nil { test.wantErr(t, err) } else { require.NoError(t, err) } - got := r.getPropertyValue(context.Background(), &test.expression, pom) + got := r.ResolveProperty(context.Background(), pom, &test.expression) require.Equal(t, test.expected, got) }) } } func Test_mavenResolverRemote(t *testing.T) { - url := mockMavenRepo(t) + url := maventest.MockRepo(t, "test-fixtures/maven-repo") tests := []struct { groupID string @@ -252,25 +248,25 @@ func Test_mavenResolverRemote(t *testing.T) { for _, test := range tests { t.Run(test.artifactID, func(t *testing.T) { ctx := context.Background() - r := newMavenResolver(nil, ArchiveCatalogerConfig{ - UseNetwork: true, - UseMavenLocalRepository: false, - MavenBaseURL: url, + r := NewResolver(nil, Config{ + UseNetwork: true, + UseLocalRepository: false, + Repositories: strings.Split(url, ","), }) - pom, err := r.findPom(ctx, test.groupID, test.artifactID, test.version) + pom, err := r.FindPom(ctx, test.groupID, test.artifactID, test.version) if test.wantErr != nil { test.wantErr(t, err) } else { require.NoError(t, err) } - got := r.getPropertyValue(context.Background(), &test.expression, pom) + got := r.ResolveProperty(context.Background(), pom, &test.expression) require.Equal(t, test.expected, got) }) } } func Test_relativePathParent(t *testing.T) { - resolver, err := fileresolver.NewFromDirectory("test-fixtures/pom/local", "") + resolver, err := fileresolver.NewFromDirectory("test-fixtures/local", "") require.NoError(t, err) ctx := context.Background() @@ -278,12 +274,12 @@ func Test_relativePathParent(t *testing.T) { tests := []struct { name string pom string - validate func(t *testing.T, r *mavenResolver, pom *gopom.Project) + validate func(t *testing.T, r *Resolver, pom *Project) }{ { name: "basic", pom: "child-1/pom.xml", - validate: func(t *testing.T, r *mavenResolver, pom *gopom.Project) { + validate: func(t *testing.T, r *Resolver, pom *Project) { parent, err := r.resolveParent(ctx, pom) require.NoError(t, err) require.Contains(t, r.pomLocations, parent) @@ -292,16 +288,15 @@ func Test_relativePathParent(t *testing.T) { require.NoError(t, err) require.Contains(t, r.pomLocations, parent) - got := r.getPropertyValue(ctx, ptr("${commons-exec_subversion}"), pom) + got := r.ResolveProperty(ctx, pom, ptr("${commons-exec_subversion}")) require.Equal(t, "3", got) - }, }, { name: "parent property", pom: "child-2/pom.xml", - validate: func(t *testing.T, r *mavenResolver, pom *gopom.Project) { - id := r.getMavenID(ctx, pom) + validate: func(t *testing.T, r *Resolver, pom *Project) { + id := r.ResolveID(ctx, pom) // child.parent.version = ${revision} // parent.revision = 3.3.3 require.Equal(t, id.Version, "3.3.3") @@ -310,9 +305,9 @@ func Test_relativePathParent(t *testing.T) { { name: "invalid parent", pom: "child-3/pom.xml", - validate: func(t *testing.T, r *mavenResolver, pom *gopom.Project) { + validate: func(t *testing.T, r *Resolver, pom *Project) { require.NotNil(t, pom) - id := r.getMavenID(ctx, pom) + id := r.ResolveID(ctx, pom) // version should not be resolved to anything require.Equal(t, "", id.Version) }, @@ -321,7 +316,7 @@ func Test_relativePathParent(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - r := newMavenResolver(resolver, DefaultArchiveCatalogerConfig()) + r := NewResolver(resolver, DefaultConfig()) locs, err := resolver.FilesByPath(test.pom) require.NoError(t, err) require.Len(t, locs, 1) @@ -331,7 +326,7 @@ func Test_relativePathParent(t *testing.T) { require.NoError(t, err) defer internal.CloseAndLogError(contents, loc.RealPath) - pom, err := decodePomXML(contents) + pom, err := ParsePomXML(contents) require.NoError(t, err) r.pomLocations[pom] = loc @@ -341,59 +336,7 @@ func Test_relativePathParent(t *testing.T) { } } -// mockMavenRepo starts a remote maven repo serving all the pom files found in test-fixtures/pom/maven-repo -func mockMavenRepo(t *testing.T) (url string) { - t.Helper() - - return mockMavenRepoAt(t, "test-fixtures/pom/maven-repo") -} - -// mockMavenRepoAt starts a remote maven repo serving all the pom files found in the given directory -func mockMavenRepoAt(t *testing.T, dir string) (url string) { - t.Helper() - - // mux is the HTTP request multiplexer used with the test server. - mux := http.NewServeMux() - - // We want to ensure that tests catch mistakes where the endpoint URL is - // specified as absolute rather than relative. It only makes a difference - // when there's a non-empty base URL path. So, use that. See issue #752. - apiHandler := http.NewServeMux() - apiHandler.Handle("/", mux) - // server is a test HTTP server used to provide mock API responses. - server := httptest.NewServer(apiHandler) - - t.Cleanup(server.Close) - - matches, err := doublestar.Glob(os.DirFS(dir), filepath.Join("**", "*.pom")) - require.NoError(t, err) - - for _, match := range matches { - fullPath, err := filepath.Abs(filepath.Join(dir, match)) - require.NoError(t, err) - match = "/" + filepath.ToSlash(match) - mux.HandleFunc(match, mockMavenHandler(fullPath)) - } - - return server.URL -} - -func mockMavenHandler(responseFixture string) func(w http.ResponseWriter, r *http.Request) { - return func(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(http.StatusOK) - // Set the Content-Type header to indicate that the response is XML - w.Header().Set("Content-Type", "application/xml") - // Copy the file's content to the response writer - f, err := os.Open(responseFixture) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer internal.CloseAndLogError(f, responseFixture) - _, err = io.Copy(w, f) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - } +// ptr returns a pointer to the given value +func ptr[T any](value T) *T { + return &value } diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/declared-iso-8859-encoded-pom.xml.base64 b/syft/pkg/cataloger/java/internal/maven/test-fixtures/declared-iso-8859-encoded-pom.xml.base64 similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/declared-iso-8859-encoded-pom.xml.base64 rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/declared-iso-8859-encoded-pom.xml.base64 diff --git a/syft/pkg/cataloger/java/test-fixtures/local-repository-settings/.m2/settings.xml b/syft/pkg/cataloger/java/internal/maven/test-fixtures/local-repository-settings/.m2/settings.xml similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/local-repository-settings/.m2/settings.xml rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/local-repository-settings/.m2/settings.xml diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/local/child-1/pom.xml b/syft/pkg/cataloger/java/internal/maven/test-fixtures/local/child-1/pom.xml similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/local/child-1/pom.xml rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/local/child-1/pom.xml diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/local/child-2/pom.xml b/syft/pkg/cataloger/java/internal/maven/test-fixtures/local/child-2/pom.xml similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/local/child-2/pom.xml rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/local/child-2/pom.xml diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/local/child-3/pom.xml b/syft/pkg/cataloger/java/internal/maven/test-fixtures/local/child-3/pom.xml similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/local/child-3/pom.xml rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/local/child-3/pom.xml diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/local/contains-child-1/pom.xml b/syft/pkg/cataloger/java/internal/maven/test-fixtures/local/contains-child-1/pom.xml similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/local/contains-child-1/pom.xml rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/local/contains-child-1/pom.xml diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/local/parent-1/pom.xml b/syft/pkg/cataloger/java/internal/maven/test-fixtures/local/parent-1/pom.xml similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/local/parent-1/pom.xml rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/local/parent-1/pom.xml diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/local/parent-2/pom.xml b/syft/pkg/cataloger/java/internal/maven/test-fixtures/local/parent-2/pom.xml similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/local/parent-2/pom.xml rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/local/parent-2/pom.xml diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/local/parent-3/pom.xml b/syft/pkg/cataloger/java/internal/maven/test-fixtures/local/parent-3/pom.xml similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/local/parent-3/pom.xml rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/local/parent-3/pom.xml diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/child-one/1.3.6/child-one-1.3.6.pom b/syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/my/org/child-one/1.3.6/child-one-1.3.6.pom similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/child-one/1.3.6/child-one-1.3.6.pom rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/my/org/child-one/1.3.6/child-one-1.3.6.pom diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/child-two/2.1.90/child-two-2.1.90.pom b/syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/my/org/child-two/2.1.90/child-two-2.1.90.pom similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/child-two/2.1.90/child-two-2.1.90.pom rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/my/org/child-two/2.1.90/child-two-2.1.90.pom diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/parent-one/3.11.0/parent-one-3.11.0.pom b/syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/my/org/parent-one/3.11.0/parent-one-3.11.0.pom similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/parent-one/3.11.0/parent-one-3.11.0.pom rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/my/org/parent-one/3.11.0/parent-one-3.11.0.pom diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/parent-two/13.7.8/parent-two-13.7.8.pom b/syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/my/org/parent-two/13.7.8/parent-two-13.7.8.pom similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/parent-two/13.7.8/parent-two-13.7.8.pom rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/my/org/parent-two/13.7.8/parent-two-13.7.8.pom diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/net/shibboleth/parent/7.11.2/parent-7.11.2.pom b/syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/net/shibboleth/parent/7.11.2/parent-7.11.2.pom similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/net/shibboleth/parent/7.11.2/parent-7.11.2.pom rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/net/shibboleth/parent/7.11.2/parent-7.11.2.pom diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/org/apache/commons/commons-parent/54/commons-parent-54.pom b/syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/org/apache/commons/commons-parent/54/commons-parent-54.pom similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/org/apache/commons/commons-parent/54/commons-parent-54.pom rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/org/apache/commons/commons-parent/54/commons-parent-54.pom diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/org/junit/junit-bom/5.9.0/junit-bom-5.9.0.pom b/syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/org/junit/junit-bom/5.9.0/junit-bom-5.9.0.pom similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/org/junit/junit-bom/5.9.0/junit-bom-5.9.0.pom rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/org/junit/junit-bom/5.9.0/junit-bom-5.9.0.pom diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/org/junit/junit-bom/5.9.1/junit-bom-5.9.1.pom b/syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/org/junit/junit-bom/5.9.1/junit-bom-5.9.1.pom similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/org/junit/junit-bom/5.9.1/junit-bom-5.9.1.pom rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/org/junit/junit-bom/5.9.1/junit-bom-5.9.1.pom diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/org/opensaml/opensaml-parent/3.4.6/opensaml-parent-3.4.6.pom b/syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/org/opensaml/opensaml-parent/3.4.6/opensaml-parent-3.4.6.pom similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/org/opensaml/opensaml-parent/3.4.6/opensaml-parent-3.4.6.pom rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/org/opensaml/opensaml-parent/3.4.6/opensaml-parent-3.4.6.pom diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/undeclared-iso-8859-encoded-pom.xml.base64 b/syft/pkg/cataloger/java/internal/maven/test-fixtures/undeclared-iso-8859-encoded-pom.xml.base64 similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/undeclared-iso-8859-encoded-pom.xml.base64 rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/undeclared-iso-8859-encoded-pom.xml.base64 diff --git a/syft/pkg/cataloger/java/internal/maven/test/mock_repo.go b/syft/pkg/cataloger/java/internal/maven/test/mock_repo.go new file mode 100644 index 000000000..6ab462f6b --- /dev/null +++ b/syft/pkg/cataloger/java/internal/maven/test/mock_repo.go @@ -0,0 +1,65 @@ +package maventest + +import ( + "io" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" + + "github.com/bmatcuk/doublestar/v4" + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/internal" +) + +// MockRepo starts a remote maven repo serving all the pom files found in a maven-structured directory +func MockRepo(t *testing.T, dir string) (url string) { + t.Helper() + + // mux is the HTTP request multiplexer used with the test server. + mux := http.NewServeMux() + + // We want to ensure that tests catch mistakes where the endpoint URL is + // specified as absolute rather than relative. It only makes a difference + // when there's a non-empty base URL path. So, use that. See issue #752. + apiHandler := http.NewServeMux() + apiHandler.Handle("/", mux) + // server is a test HTTP server used to provide mock API responses. + server := httptest.NewServer(apiHandler) + + t.Cleanup(server.Close) + + matches, err := doublestar.Glob(os.DirFS(dir), filepath.Join("**", "*.pom")) + require.NoError(t, err) + + for _, match := range matches { + fullPath, err := filepath.Abs(filepath.Join(dir, match)) + require.NoError(t, err) + match = "/" + filepath.ToSlash(match) + mux.HandleFunc(match, mockMavenHandler(fullPath)) + } + + return server.URL +} + +func mockMavenHandler(responseFixture string) http.HandlerFunc { + return func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + // Set the Content-Type header to indicate that the response is XML + w.Header().Set("Content-Type", "application/xml") + // Copy the file's content to the response writer + f, err := os.Open(responseFixture) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + defer internal.CloseAndLogError(f, responseFixture) + _, err = io.Copy(w, f) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + } +} diff --git a/syft/pkg/cataloger/java/parse_pom_xml.go b/syft/pkg/cataloger/java/parse_pom_xml.go index 9a5f391d0..dc4922774 100644 --- a/syft/pkg/cataloger/java/parse_pom_xml.go +++ b/syft/pkg/cataloger/java/parse_pom_xml.go @@ -1,24 +1,17 @@ package java import ( - "bytes" "context" - "encoding/xml" "errors" - "fmt" - "io" "strings" - "github.com/saintfish/chardet" - "github.com/vifraa/gopom" - "golang.org/x/net/html/charset" - "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/unknown" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven" ) const ( @@ -40,10 +33,11 @@ func (p pomXMLCataloger) Catalog(ctx context.Context, fileResolver file.Resolver return nil, nil, err } - r := newMavenResolver(fileResolver, p.cfg) + r := maven.NewResolver(fileResolver, p.cfg.mavenConfig()) var errs error - var poms []*gopom.Project + var poms []*maven.Project + pomLocations := map[*maven.Project]file.Location{} for _, pomLocation := range locations { pom, err := readPomFromLocation(fileResolver, pomLocation) if err != nil || pom == nil { @@ -53,59 +47,172 @@ func (p pomXMLCataloger) Catalog(ctx context.Context, fileResolver file.Resolver } poms = append(poms, pom) - - // store information about this pom for future lookups - r.pomLocations[pom] = pomLocation - r.resolved[r.getMavenID(ctx, pom)] = pom + pomLocations[pom] = pomLocation + r.AddPom(ctx, pom, pomLocation) } var pkgs []pkg.Package + var relationships []artifact.Relationship + resolved := map[maven.ID]*pkg.Package{} + + // catalog all the main packages first so these can be referenced later when building the dependency graph for _, pom := range poms { - pkgs = append(pkgs, processPomXML(ctx, r, pom, r.pomLocations[pom])...) + location := pomLocations[pom] // should always exist + + id := r.ResolveID(ctx, pom) + mainPkg := newPackageFromMavenPom(ctx, r, pom, location) + if mainPkg == nil { + continue + } + resolved[id] = mainPkg + pkgs = append(pkgs, *mainPkg) } - return pkgs, nil, errs + + // catalog all dependencies + for _, pom := range poms { + location := pomLocations[pom] // should always exist + + id := r.ResolveID(ctx, pom) + mainPkg := resolved[id] + + newPkgs, newRelationships, newErrs := collectDependencies(ctx, r, resolved, mainPkg, pom, location, p.cfg.ResolveTransitiveDependencies) + pkgs = append(pkgs, newPkgs...) + relationships = append(relationships, newRelationships...) + errs = unknown.Join(errs, newErrs) + } + + return pkgs, relationships, errs } -func readPomFromLocation(fileResolver file.Resolver, pomLocation file.Location) (*gopom.Project, error) { +func readPomFromLocation(fileResolver file.Resolver, pomLocation file.Location) (*maven.Project, error) { contents, err := fileResolver.FileContentsByLocation(pomLocation) if err != nil { return nil, err } defer internal.CloseAndLogError(contents, pomLocation.RealPath) - return decodePomXML(contents) + return maven.ParsePomXML(contents) } -func processPomXML(ctx context.Context, r *mavenResolver, pom *gopom.Project, loc file.Location) []pkg.Package { - var pkgs []pkg.Package - - pomID := r.getMavenID(ctx, pom) - for _, dep := range pomDependencies(pom) { - depID := r.resolveDependencyID(ctx, pom, dep) - log.WithFields("pomLocation", loc, "mavenID", pomID, "dependencyID", depID).Trace("adding maven pom dependency") - - p, err := newPackageFromDependency( - ctx, - r, - pom, - dep, - loc.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), - ) - if err != nil { - log.WithFields("error", err, "pomLocation", loc, "mavenID", pomID, "dependencyID", depID).Debugf("error adding dependency") - } - if p == nil { - continue - } - pkgs = append(pkgs, *p) +// newPackageFromMavenPom processes a single Maven POM for a given parent package, returning only the main package from the pom +func newPackageFromMavenPom(ctx context.Context, r *maven.Resolver, pom *maven.Project, location file.Location) *pkg.Package { + id := r.ResolveID(ctx, pom) + parent, err := r.ResolveParent(ctx, pom) + if err != nil { + // this is expected in many cases, there will be no network access and the maven resolver is unable to + // look up information, so we can continue with what little information we have + log.Trace("unable to resolve parent due to: %v", err) } - return pkgs + var javaPomParent *pkg.JavaPomParent + if parent != nil { // parent is returned in both cases: when it is resolved or synthesized from the pom.parent info + parentID := r.ResolveID(ctx, parent) + javaPomParent = &pkg.JavaPomParent{ + GroupID: parentID.GroupID, + ArtifactID: parentID.ArtifactID, + Version: parentID.Version, + } + } + + pomLicenses, err := r.ResolveLicenses(ctx, pom) + if err != nil { + log.Tracef("error resolving licenses: %v", err) + } + licenses := toPkgLicenses(&location, pomLicenses) + + m := pkg.JavaArchive{ + PomProject: &pkg.JavaPomProject{ + Parent: javaPomParent, + GroupID: id.GroupID, + ArtifactID: id.ArtifactID, + Version: id.Version, + Name: r.ResolveProperty(ctx, pom, pom.Name), + Description: r.ResolveProperty(ctx, pom, pom.Description), + URL: r.ResolveProperty(ctx, pom, pom.URL), + }, + } + + p := &pkg.Package{ + Name: id.ArtifactID, + Version: id.Version, + Locations: file.NewLocationSet( + location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), + ), + Licenses: pkg.NewLicenseSet(licenses...), + Language: pkg.Java, + Type: pkg.JavaPkg, + FoundBy: pomCatalogerName, + PURL: packageURL(id.ArtifactID, id.Version, m), + Metadata: m, + } + + finalizePackage(p) + + return p } -func newPomProject(ctx context.Context, r *mavenResolver, path string, pom *gopom.Project) *pkg.JavaPomProject { - id := r.getMavenID(ctx, pom) - name := r.getPropertyValue(ctx, pom.Name, pom) - projectURL := r.getPropertyValue(ctx, pom.URL, pom) +func collectDependencies(ctx context.Context, r *maven.Resolver, resolved map[maven.ID]*pkg.Package, parentPkg *pkg.Package, pom *maven.Project, loc file.Location, includeTransitiveDependencies bool) ([]pkg.Package, []artifact.Relationship, error) { + var errs error + var pkgs []pkg.Package + var relationships []artifact.Relationship + + pomID := r.ResolveID(ctx, pom) + for _, dep := range maven.DirectPomDependencies(pom) { + depID := r.ResolveDependencyID(ctx, pom, dep) + log.WithFields("pomLocation", loc, "mavenID", pomID, "dependencyID", depID).Trace("adding maven pom dependency") + + // we may have a reference to a package pointing to an existing pom on the filesystem, but we don't want to duplicate these entries + depPkg := resolved[depID] + if depPkg == nil { + p, err := newPackageFromDependency( + ctx, + r, + pom, + dep, + loc.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), + ) + if err != nil { + log.WithFields("error", err, "pomLocation", loc, "mavenID", pomID, "dependencyID", depID).Debugf("error adding dependency") + } + + if p == nil { + // we don't have a valid package, just continue to the next dependency + continue + } + depPkg = p + resolved[depID] = depPkg + + // only resolve transitive dependencies if we're not already looking these up for the specific package + if includeTransitiveDependencies && depID.Valid() { + depPom, err := r.FindPom(ctx, depID.GroupID, depID.ArtifactID, depID.Version) + if err != nil { + log.WithFields("mavenID", depID, "error", err).Debug("error finding pom") + } + if depPom != nil { + transitivePkgs, transitiveRelationships, transitiveErrs := collectDependencies(ctx, r, resolved, depPkg, depPom, loc, includeTransitiveDependencies) + pkgs = append(pkgs, transitivePkgs...) + relationships = append(relationships, transitiveRelationships...) + errs = unknown.Join(errs, transitiveErrs) + } + } + } + + pkgs = append(pkgs, *depPkg) + if parentPkg != nil { + relationships = append(relationships, artifact.Relationship{ + From: *depPkg, + To: *parentPkg, + Type: artifact.DependencyOfRelationship, + }) + } + } + + return pkgs, relationships, errs +} + +func newPomProject(ctx context.Context, r *maven.Resolver, path string, pom *maven.Project) *pkg.JavaPomProject { + id := r.ResolveID(ctx, pom) + name := r.ResolveProperty(ctx, pom, pom.Name) + projectURL := r.ResolveProperty(ctx, pom, pom.URL) log.WithFields("path", path, "artifactID", id.ArtifactID, "name", name, "projectURL", projectURL).Trace("parsing pom.xml") return &pkg.JavaPomProject{ @@ -115,34 +222,43 @@ func newPomProject(ctx context.Context, r *mavenResolver, path string, pom *gopo ArtifactID: id.ArtifactID, Version: id.Version, Name: name, - Description: cleanDescription(r.getPropertyValue(ctx, pom.Description, pom)), + Description: cleanDescription(r.ResolveProperty(ctx, pom, pom.Description)), URL: projectURL, } } -func newPackageFromDependency(ctx context.Context, r *mavenResolver, pom *gopom.Project, dep gopom.Dependency, locations ...file.Location) (*pkg.Package, error) { - id := r.resolveDependencyID(ctx, pom, dep) +func newPackageFromDependency(ctx context.Context, r *maven.Resolver, pom *maven.Project, dep maven.Dependency, locations ...file.Location) (*pkg.Package, error) { + id := r.ResolveDependencyID(ctx, pom, dep) + + var err error + var licenses []pkg.License + dependencyPom, depErr := r.FindPom(ctx, id.GroupID, id.ArtifactID, id.Version) + if depErr != nil { + err = errors.Join(err, depErr) + } + + var pomProject *pkg.JavaPomProject + if dependencyPom != nil { + depLicenses, _ := r.ResolveLicenses(ctx, dependencyPom) + licenses = append(licenses, toPkgLicenses(nil, depLicenses)...) + pomProject = &pkg.JavaPomProject{ + Parent: pomParent(ctx, r, dependencyPom), + GroupID: id.GroupID, + ArtifactID: id.ArtifactID, + Version: id.Version, + Name: r.ResolveProperty(ctx, pom, pom.Name), + Description: r.ResolveProperty(ctx, pom, pom.Description), + URL: r.ResolveProperty(ctx, pom, pom.URL), + } + } m := pkg.JavaArchive{ PomProperties: &pkg.JavaPomProperties{ GroupID: id.GroupID, ArtifactID: id.ArtifactID, - Scope: r.getPropertyValue(ctx, dep.Scope, pom), + Scope: r.ResolveProperty(ctx, pom, dep.Scope), }, - } - - var err error - var licenses []pkg.License - dependencyPom, depErr := r.findPom(ctx, id.GroupID, id.ArtifactID, id.Version) - if depErr != nil { - err = errors.Join(err, depErr) - } - - if dependencyPom != nil { - depLicenses, _ := r.resolveLicenses(ctx, dependencyPom) - for _, license := range depLicenses { - licenses = append(licenses, pkg.NewLicenseFromFields(deref(license.Name), deref(license.URL), nil)) - } + PomProject: pomProject, } p := &pkg.Package{ @@ -157,65 +273,19 @@ func newPackageFromDependency(ctx context.Context, r *mavenResolver, pom *gopom. Metadata: m, } - p.SetID() + finalizePackage(p) return p, err } -// decodePomXML decodes a pom XML file, detecting and converting non-UTF-8 charsets. this DOES NOT perform any logic to resolve properties such as groupID, artifactID, and version -func decodePomXML(content io.Reader) (project *gopom.Project, err error) { - inputReader, err := getUtf8Reader(content) - if err != nil { - return nil, fmt.Errorf("unable to read pom.xml: %w", err) - } - - decoder := xml.NewDecoder(inputReader) - // when an xml file has a character set declaration (e.g. '') read that and use the correct decoder - decoder.CharsetReader = charset.NewReaderLabel - - project = &gopom.Project{} - if err := decoder.Decode(project); err != nil { - return nil, fmt.Errorf("unable to unmarshal pom.xml: %w", err) - } - - return project, nil -} - -func getUtf8Reader(content io.Reader) (io.Reader, error) { - pomContents, err := io.ReadAll(content) - if err != nil { - return nil, err - } - - detector := chardet.NewTextDetector() - detection, err := detector.DetectBest(pomContents) - - var inputReader io.Reader - if err == nil && detection != nil { - if detection.Charset == "UTF-8" { - inputReader = bytes.NewReader(pomContents) - } else { - inputReader, err = charset.NewReaderLabel(detection.Charset, bytes.NewReader(pomContents)) - if err != nil { - return nil, fmt.Errorf("unable to get encoding: %w", err) - } - } - } else { - // we could not detect the encoding, but we want a valid file to read. Replace unreadable - // characters with the UTF-8 replacement character. - inputReader = strings.NewReader(strings.ToValidUTF8(string(pomContents), "�")) - } - return inputReader, nil -} - -func pomParent(ctx context.Context, r *mavenResolver, pom *gopom.Project) *pkg.JavaPomParent { +func pomParent(ctx context.Context, r *maven.Resolver, pom *maven.Project) *pkg.JavaPomParent { if pom == nil || pom.Parent == nil { return nil } - groupID := r.getPropertyValue(ctx, pom.Parent.GroupID, pom) - artifactID := r.getPropertyValue(ctx, pom.Parent.ArtifactID, pom) - version := r.getPropertyValue(ctx, pom.Parent.Version, pom) + groupID := r.ResolveProperty(ctx, pom, pom.Parent.GroupID) + artifactID := r.ResolveProperty(ctx, pom, pom.Parent.ArtifactID) + version := r.ResolveProperty(ctx, pom, pom.Parent.Version) if groupID == "" && artifactID == "" && version == "" { return nil diff --git a/syft/pkg/cataloger/java/parse_pom_xml_test.go b/syft/pkg/cataloger/java/parse_pom_xml_test.go index f80a67628..6bd0a5f23 100644 --- a/syft/pkg/cataloger/java/parse_pom_xml_test.go +++ b/syft/pkg/cataloger/java/parse_pom_xml_test.go @@ -2,61 +2,102 @@ package java import ( "context" - "encoding/base64" - "io" "os" - "strings" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/vifraa/gopom" + "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/cataloging" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/license" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" + "github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven" + maventest "github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven/test" "github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source/directorysource" ) func Test_parsePomXML(t *testing.T) { + pomLocation := file.NewLocationSet(file.NewLocation("pom.xml")) + + exampleJavaAppMaven := pkg.Package{ + Name: "example-java-app-maven", + Version: "0.1.0", + PURL: "pkg:maven/org.anchore/example-java-app-maven@0.1.0", + Language: pkg.Java, + Type: pkg.JavaPkg, + FoundBy: pomCatalogerName, + Locations: pomLocation, + Metadata: pkg.JavaArchive{ + PomProject: &pkg.JavaPomProject{ + GroupID: "org.anchore", + ArtifactID: "example-java-app-maven", + Version: "0.1.0", + }, + }, + } + finalizePackage(&exampleJavaAppMaven) + + jodaTime := pkg.Package{ + Name: "joda-time", + Version: "2.9.2", + PURL: "pkg:maven/com.joda/joda-time@2.9.2", + Language: pkg.Java, + Type: pkg.JavaPkg, + FoundBy: pomCatalogerName, + Locations: pomLocation, + Metadata: pkg.JavaArchive{ + PomProperties: &pkg.JavaPomProperties{ + GroupID: "com.joda", + ArtifactID: "joda-time", + }, + }, + } + finalizePackage(&jodaTime) + + junit := pkg.Package{ + Name: "junit", + Version: "4.12", + PURL: "pkg:maven/junit/junit@4.12", + Language: pkg.Java, + Type: pkg.JavaPkg, + FoundBy: pomCatalogerName, + Locations: pomLocation, + Metadata: pkg.JavaArchive{ + PomProperties: &pkg.JavaPomProperties{ + GroupID: "junit", + ArtifactID: "junit", + Scope: "test", + }, + }, + } + finalizePackage(&junit) + tests := []struct { - dir string - expected []pkg.Package + dir string + expected []pkg.Package + expectedRelationships []artifact.Relationship }{ { - dir: "test-fixtures/pom/local/example-java-app-maven", + dir: "test-fixtures/pom/example-java-app-maven", expected: []pkg.Package{ + exampleJavaAppMaven, + jodaTime, + junit, + }, + expectedRelationships: []artifact.Relationship{ { - Name: "joda-time", - Version: "2.9.2", - PURL: "pkg:maven/com.joda/joda-time@2.9.2", - Language: pkg.Java, - Type: pkg.JavaPkg, - FoundBy: pomCatalogerName, - Metadata: pkg.JavaArchive{ - PomProperties: &pkg.JavaPomProperties{ - GroupID: "com.joda", - ArtifactID: "joda-time", - }, - }, + From: jodaTime, + To: exampleJavaAppMaven, + Type: artifact.DependencyOfRelationship, }, { - Name: "junit", - Version: "4.12", - PURL: "pkg:maven/junit/junit@4.12", - Language: pkg.Java, - Type: pkg.JavaPkg, - FoundBy: pomCatalogerName, - Metadata: pkg.JavaArchive{ - PomProperties: &pkg.JavaPomProperties{ - GroupID: "junit", - ArtifactID: "junit", - Scope: "test", - }, - }, + From: junit, + To: exampleJavaAppMaven, + Type: artifact.DependencyOfRelationship, }, }, }, @@ -75,190 +116,77 @@ func Test_parsePomXML(t *testing.T) { }, }) - pkgtest.TestCataloger(t, test.dir, cat, test.expected, nil) + pkgtest.TestCataloger(t, test.dir, cat, test.expected, test.expectedRelationships) }) } } -func Test_decodePomXML_surviveNonUtf8Encoding(t *testing.T) { - // regression for https://github.com/anchore/syft/issues/2044 - - // we are storing the base64 contents of the pom.xml file. We are doing this to prevent accidental changes to the - // file, which is extremely important for this test. - - // for instance, even changing a single character in the file and saving in an IntelliJ IDE will automatically - // convert the file to UTF-8, which will break this test: - - // xxd with the original pom.xml - // 00000780: 6964 3e0d 0a20 2020 2020 2020 2020 2020 id>.. - // 00000790: 203c 6e61 6d65 3e4a e972 f46d 6520 4d69 J.r.me Mi - // 000007a0: 7263 3c2f 6e61 6d65 3e0d 0a20 2020 2020 rc.. - - // xxd with the pom.xml converted to UTF-8 (from a simple change with IntelliJ) - // 00000780: 6964 3e0d 0a20 2020 2020 2020 2020 2020 id>.. - // 00000790: 203c 6e61 6d65 3e4a efbf bd72 efbf bd6d J...r...m - // 000007a0: 6520 4d69 7263 3c2f 6e61 6d65 3e0d 0a20 e Mirc.. - - // Note that the name "Jérôme Mirc" was originally interpreted as "J.r.me Mi" and after the save - // is now encoded as "J...r...m" which is not what we want (note the extra bytes for each non UTF-8 character. - // The original 0xe9 byte (é) was converted to 0xefbfbd (�) which is the UTF-8 replacement character. - // This is quite silly on the part of IntelliJ, but it is what it is. - - cases := []struct { - name string - fixture string - }{ - { - name: "undeclared encoding", - fixture: "test-fixtures/pom/undeclared-iso-8859-encoded-pom.xml.base64", - }, - { - name: "declared encoding", - fixture: "test-fixtures/pom/declared-iso-8859-encoded-pom.xml.base64", - }, - } - - for _, c := range cases { - t.Run(c.name, func(t *testing.T) { - fh, err := os.Open(c.fixture) - require.NoError(t, err) - - decoder := base64.NewDecoder(base64.StdEncoding, fh) - - proj, err := decodePomXML(decoder) - - require.NoError(t, err) - require.NotEmpty(t, proj.Developers) - }) - } - -} - func Test_parseCommonsTextPomXMLProject(t *testing.T) { + mavenLocalRepoDir := "internal/maven/test-fixtures/maven-repo" + mavenBaseURL := maventest.MockRepo(t, "internal/maven/test-fixtures/maven-repo") + tests := []struct { + name string dir string - expected []pkg.Package + config ArchiveCatalogerConfig + expected expected }{ { - dir: "test-fixtures/pom/local/commons-text-1.10.0", - - expected: getCommonsTextExpectedPackages(), - }, - } - - for _, test := range tests { - t.Run(test.dir, func(t *testing.T) { - for i := range test.expected { - test.expected[i].Locations.Add(file.NewLocation("pom.xml")) - } - - cat := NewPomCataloger(ArchiveCatalogerConfig{ - ArchiveSearchConfig: cataloging.ArchiveSearchConfig{ - IncludeIndexedArchives: true, - IncludeUnindexedArchives: true, - }, + name: "no resolution", + dir: "test-fixtures/pom/commons-text-1.10.0", + config: ArchiveCatalogerConfig{ + UseNetwork: false, UseMavenLocalRepository: false, - }) - pkgtest.TestCataloger(t, test.dir, cat, test.expected, nil) - }) - } -} - -func Test_parseCommonsTextPomXMLProjectWithLocalRepository(t *testing.T) { - // Using the local repository, the version of junit-jupiter will be resolved - expectedPackages := getCommonsTextExpectedPackages() - - for i := 0; i < len(expectedPackages); i++ { - if expectedPackages[i].Name == "junit-jupiter" { - expPkg := &expectedPackages[i] - expPkg.Version = "5.9.1" - expPkg.PURL = "pkg:maven/org.junit.jupiter/junit-jupiter@5.9.1" - expPkg.Metadata = pkg.JavaArchive{ - PomProperties: &pkg.JavaPomProperties{ - GroupID: "org.junit.jupiter", - ArtifactID: "junit-jupiter", - Scope: "test", - }, - } - } - } - - tests := []struct { - dir string - expected []pkg.Package - }{ - { - dir: "test-fixtures/pom/local/commons-text-1.10.0", - expected: expectedPackages, + }, + expected: getCommonsTextExpectedPackages(false), }, - } - - for _, test := range tests { - t.Run(test.dir, func(t *testing.T) { - for i := range test.expected { - test.expected[i].Locations.Add(file.NewLocation("pom.xml")) - } - - cat := NewPomCataloger(ArchiveCatalogerConfig{ - ArchiveSearchConfig: cataloging.ArchiveSearchConfig{ - IncludeIndexedArchives: true, - IncludeUnindexedArchives: true, - }, - UseMavenLocalRepository: true, - MavenLocalRepositoryDir: "test-fixtures/pom/maven-repo", - }) - pkgtest.TestCataloger(t, test.dir, cat, test.expected, nil) - }) - } -} - -func Test_parseCommonsTextPomXMLProjectWithNetwork(t *testing.T) { - url := mockMavenRepo(t) - - // Using the local repository, the version of junit-jupiter will be resolved - expectedPackages := getCommonsTextExpectedPackages() - - for i := 0; i < len(expectedPackages); i++ { - if expectedPackages[i].Name == "junit-jupiter" { - expPkg := &expectedPackages[i] - expPkg.Version = "5.9.1" - expPkg.PURL = "pkg:maven/org.junit.jupiter/junit-jupiter@5.9.1" - expPkg.Metadata = pkg.JavaArchive{ - PomProperties: &pkg.JavaPomProperties{ - GroupID: "org.junit.jupiter", - ArtifactID: "junit-jupiter", - Scope: "test", - }, - } - } - } - - tests := []struct { - dir string - expected []pkg.Package - }{ { - dir: "test-fixtures/pom/local/commons-text-1.10.0", - expected: expectedPackages, - }, - } - - for _, test := range tests { - t.Run(test.dir, func(t *testing.T) { - for i := range test.expected { - test.expected[i].Locations.Add(file.NewLocation("pom.xml")) - } - - cat := NewPomCataloger(ArchiveCatalogerConfig{ - ArchiveSearchConfig: cataloging.ArchiveSearchConfig{ - IncludeIndexedArchives: true, - IncludeUnindexedArchives: true, - }, + name: "use network", + dir: "test-fixtures/pom/commons-text-1.10.0", + config: ArchiveCatalogerConfig{ UseNetwork: true, - MavenBaseURL: url, + MavenBaseURL: mavenBaseURL, UseMavenLocalRepository: false, + }, + expected: getCommonsTextExpectedPackages(true), + }, + { + name: "use local repository", + dir: "test-fixtures/pom/commons-text-1.10.0", + config: ArchiveCatalogerConfig{ + UseNetwork: false, + UseMavenLocalRepository: true, + MavenLocalRepositoryDir: mavenLocalRepoDir, + }, + expected: getCommonsTextExpectedPackages(true), + }, + { + name: "transitive dependencies", + dir: "test-fixtures/pom/transitive-top-level", + config: ArchiveCatalogerConfig{ + UseNetwork: false, + UseMavenLocalRepository: true, + MavenLocalRepositoryDir: mavenLocalRepoDir, + ResolveTransitiveDependencies: true, + }, + expected: expectedTransientPackageData(), + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + cat := NewPomCataloger(ArchiveCatalogerConfig{ + ArchiveSearchConfig: cataloging.ArchiveSearchConfig{ + IncludeIndexedArchives: true, + IncludeUnindexedArchives: true, + }, + UseNetwork: test.config.UseNetwork, + MavenBaseURL: test.config.MavenBaseURL, + UseMavenLocalRepository: test.config.UseMavenLocalRepository, + MavenLocalRepositoryDir: test.config.MavenLocalRepositoryDir, + ResolveTransitiveDependencies: test.config.ResolveTransitiveDependencies, }) - pkgtest.TestCataloger(t, test.dir, cat, test.expected, nil) + pkgtest.TestCataloger(t, test.dir, cat, test.expected.packages, test.expected.relationships) }) } } @@ -272,7 +200,7 @@ func Test_parsePomXMLProject(t *testing.T) { licenses []pkg.License }{ { - name: "go case", + name: "no license info", project: &pkg.JavaPomProject{ Path: "test-fixtures/pom/commons-codec.pom.xml", Parent: &pkg.JavaPomParent{ @@ -331,17 +259,17 @@ func Test_parsePomXMLProject(t *testing.T) { t.Run(test.name, func(t *testing.T) { fixture, err := os.Open(test.project.Path) assert.NoError(t, err) - r := newMavenResolver(nil, ArchiveCatalogerConfig{}) + r := maven.NewResolver(nil, maven.Config{}) - pom, err := gopom.ParseFromReader(fixture) + pom, err := maven.ParsePomXML(fixture) require.NoError(t, err) actual := newPomProject(context.Background(), r, fixture.Name(), pom) assert.NoError(t, err) assert.Equal(t, test.project, actual) - licenses := r.pomLicenses(context.Background(), pom) - assert.NoError(t, err) + licenses, err := r.ResolveLicenses(context.Background(), pom) + //assert.NoError(t, err) assert.Equal(t, test.licenses, toPkgLicenses(&jarLocation, licenses)) }) } @@ -350,12 +278,12 @@ func Test_parsePomXMLProject(t *testing.T) { func Test_pomParent(t *testing.T) { tests := []struct { name string - input *gopom.Parent + input *maven.Parent expected *pkg.JavaPomParent }{ { name: "only group ID", - input: &gopom.Parent{ + input: &maven.Parent{ GroupID: ptr("org.something"), }, expected: &pkg.JavaPomParent{ @@ -364,7 +292,7 @@ func Test_pomParent(t *testing.T) { }, { name: "only artifact ID", - input: &gopom.Parent{ + input: &maven.Parent{ ArtifactID: ptr("something"), }, expected: &pkg.JavaPomParent{ @@ -373,7 +301,7 @@ func Test_pomParent(t *testing.T) { }, { name: "only Version", - input: &gopom.Parent{ + input: &maven.Parent{ Version: ptr("something"), }, expected: &pkg.JavaPomParent{ @@ -387,12 +315,12 @@ func Test_pomParent(t *testing.T) { }, { name: "empty", - input: &gopom.Parent{}, + input: &maven.Parent{}, expected: nil, }, { name: "unused field", - input: &gopom.Parent{ + input: &maven.Parent{ RelativePath: ptr("something"), }, expected: nil, @@ -401,8 +329,8 @@ func Test_pomParent(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - r := newMavenResolver(nil, DefaultArchiveCatalogerConfig()) - assert.Equal(t, test.expected, pomParent(context.Background(), r, &gopom.Project{Parent: test.input})) + r := maven.NewResolver(nil, maven.DefaultConfig()) + assert.Equal(t, test.expected, pomParent(context.Background(), r, &maven.Project{Parent: test.input})) }) } } @@ -431,10 +359,10 @@ func Test_cleanDescription(t *testing.T) { } func Test_resolveLicenses(t *testing.T) { - mavenURL := mockMavenRepo(t) - localM2 := "test-fixtures/pom/maven-repo" - localDir := "test-fixtures/pom/local" - containingDir := "test-fixtures/pom/local/contains-child-1" + mavenURL := maventest.MockRepo(t, "internal/maven/test-fixtures/maven-repo") + localM2 := "internal/maven/test-fixtures/maven-repo" + localDir := "internal/maven/test-fixtures/local" + containingDir := "internal/maven/test-fixtures/local/contains-child-1" expectedLicenses := []pkg.License{ { @@ -527,185 +455,6 @@ func Test_resolveLicenses(t *testing.T) { } } -func Test_getUtf8Reader(t *testing.T) { - tests := []struct { - name string - contents string - }{ - { - name: "unknown encoding", - // random binary contents - contents: "BkiJz02JyEWE0nXR6TH///9NicpJweEETIucJIgAAABJicxPjQwhTY1JCE05WQh0BU2J0eunTYshTIusJIAAAAAPHwBNOeV1BUUx2+tWTIlUJDhMiUwkSEyJRCQgSIl8JFBMiQ==", - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - decoder := base64.NewDecoder(base64.StdEncoding, strings.NewReader(tt.contents)) - - got, err := getUtf8Reader(decoder) - require.NoError(t, err) - gotBytes, err := io.ReadAll(got) - require.NoError(t, err) - // if we couldn't decode the section as UTF-8, we should get a replacement character - assert.Contains(t, string(gotBytes), "�") - }) - } -} - -func getCommonsTextExpectedPackages() []pkg.Package { - return []pkg.Package{ - { - Name: "commons-lang3", - Version: "3.12.0", - PURL: "pkg:maven/org.apache.commons/commons-lang3@3.12.0", - Language: pkg.Java, - Type: pkg.JavaPkg, - FoundBy: pomCatalogerName, - Metadata: pkg.JavaArchive{ - PomProperties: &pkg.JavaPomProperties{ - GroupID: "org.apache.commons", - ArtifactID: "commons-lang3", - }, - }, - }, - { - Name: "junit-jupiter", - Version: "", - PURL: "pkg:maven/org.junit.jupiter/junit-jupiter", - Language: pkg.Java, - Type: pkg.JavaPkg, - FoundBy: pomCatalogerName, - Metadata: pkg.JavaArchive{ - PomProperties: &pkg.JavaPomProperties{ - GroupID: "org.junit.jupiter", - ArtifactID: "junit-jupiter", - Scope: "test", - }, - }, - }, - { - Name: "assertj-core", - Version: "3.23.1", - PURL: "pkg:maven/org.assertj/assertj-core@3.23.1", - Language: pkg.Java, - Type: pkg.JavaPkg, - FoundBy: pomCatalogerName, - Metadata: pkg.JavaArchive{ - PomProperties: &pkg.JavaPomProperties{ - GroupID: "org.assertj", - ArtifactID: "assertj-core", - Scope: "test", - }, - }, - }, - { - Name: "commons-io", - Version: "2.11.0", - PURL: "pkg:maven/commons-io/commons-io@2.11.0", - Language: pkg.Java, - Type: pkg.JavaPkg, - FoundBy: pomCatalogerName, - Metadata: pkg.JavaArchive{ - PomProperties: &pkg.JavaPomProperties{ - GroupID: "commons-io", - ArtifactID: "commons-io", - Scope: "test", - }, - }, - }, - { - Name: "mockito-inline", - Version: "4.8.0", - PURL: "pkg:maven/org.mockito/mockito-inline@4.8.0", - Language: pkg.Java, - Type: pkg.JavaPkg, - FoundBy: pomCatalogerName, - Metadata: pkg.JavaArchive{ - PomProperties: &pkg.JavaPomProperties{ - GroupID: "org.mockito", - ArtifactID: "mockito-inline", - Scope: "test", - }, - }, - }, - { - Name: "js", - Version: "22.0.0.2", - PURL: "pkg:maven/org.graalvm.js/js@22.0.0.2", - Language: pkg.Java, - Type: pkg.JavaPkg, - FoundBy: pomCatalogerName, - Metadata: pkg.JavaArchive{ - PomProperties: &pkg.JavaPomProperties{ - GroupID: "org.graalvm.js", - ArtifactID: "js", - Scope: "test", - }, - }, - }, - { - Name: "js-scriptengine", - Version: "22.0.0.2", - PURL: "pkg:maven/org.graalvm.js/js-scriptengine@22.0.0.2", - Language: pkg.Java, - Type: pkg.JavaPkg, - FoundBy: pomCatalogerName, - Metadata: pkg.JavaArchive{ - PomProperties: &pkg.JavaPomProperties{ - GroupID: "org.graalvm.js", - ArtifactID: "js-scriptengine", - Scope: "test", - }, - }, - }, - { - Name: "commons-rng-simple", - Version: "1.4", - PURL: "pkg:maven/org.apache.commons/commons-rng-simple@1.4", - Language: pkg.Java, - Type: pkg.JavaPkg, - FoundBy: pomCatalogerName, - Metadata: pkg.JavaArchive{ - PomProperties: &pkg.JavaPomProperties{ - GroupID: "org.apache.commons", - ArtifactID: "commons-rng-simple", - Scope: "test", - }, - }, - }, - { - Name: "jmh-core", - Version: "1.35", - PURL: "pkg:maven/org.openjdk.jmh/jmh-core@1.35", - Language: pkg.Java, - Type: pkg.JavaPkg, - FoundBy: pomCatalogerName, - Metadata: pkg.JavaArchive{ - PomProperties: &pkg.JavaPomProperties{ - GroupID: "org.openjdk.jmh", - ArtifactID: "jmh-core", - Scope: "test", - }, - }, - }, - { - Name: "jmh-generator-annprocess", - Version: "1.35", - PURL: "pkg:maven/org.openjdk.jmh/jmh-generator-annprocess@1.35", - Language: pkg.Java, - Type: pkg.JavaPkg, - FoundBy: pomCatalogerName, - Metadata: pkg.JavaArchive{ - PomProperties: &pkg.JavaPomProperties{ - GroupID: "org.openjdk.jmh", - ArtifactID: "jmh-generator-annprocess", - Scope: "test", - }, - }, - }, - } -} - func Test_corruptPomXml(t *testing.T) { c := NewPomCataloger(DefaultArchiveCatalogerConfig()) pkgtest.NewCatalogTester(). @@ -713,3 +462,375 @@ func Test_corruptPomXml(t *testing.T) { WithError(). TestCataloger(t, c) } + +type expected struct { + packages []pkg.Package + relationships []artifact.Relationship +} + +func getCommonsTextExpectedPackages(resolved bool) expected { + pomXmlLocation := file.NewLocationSet(file.NewLocation("pom.xml")) + + commonsText := pkg.Package{ + Name: "commons-text", + Version: "1.10.0", + PURL: "pkg:maven/org.apache.commons/commons-text@1.10.0", + Language: pkg.Java, + Type: pkg.JavaPkg, + FoundBy: pomCatalogerName, + Metadata: pkg.JavaArchive{ + PomProject: &pkg.JavaPomProject{ + Parent: &pkg.JavaPomParent{ + GroupID: "org.apache.commons", + ArtifactID: "commons-parent", + Version: "54", + }, + GroupID: "org.apache.commons", + ArtifactID: "commons-text", + Version: "1.10.0", + Name: "Apache Commons Text", + Description: "Apache Commons Text is a library focused on algorithms working on strings.", + URL: "https://commons.apache.org/proper/commons-text", + }, + }, + } + + commonsLang3 := pkg.Package{ + Name: "commons-lang3", + Version: "3.12.0", + PURL: "pkg:maven/org.apache.commons/commons-lang3@3.12.0", + Language: pkg.Java, + Type: pkg.JavaPkg, + FoundBy: pomCatalogerName, + Metadata: pkg.JavaArchive{ + PomProperties: &pkg.JavaPomProperties{ + GroupID: "org.apache.commons", + ArtifactID: "commons-lang3", + }, + }, + } + + junitJupiter := pkg.Package{ + Name: "junit-jupiter", + Version: "", + PURL: "pkg:maven/org.junit.jupiter/junit-jupiter", + Language: pkg.Java, + Type: pkg.JavaPkg, + FoundBy: pomCatalogerName, + Metadata: pkg.JavaArchive{ + PomProperties: &pkg.JavaPomProperties{ + GroupID: "org.junit.jupiter", + ArtifactID: "junit-jupiter", + Scope: "test", + }, + }, + } + + assertjCore := pkg.Package{ + Name: "assertj-core", + Version: "3.23.1", + PURL: "pkg:maven/org.assertj/assertj-core@3.23.1", + Language: pkg.Java, + Type: pkg.JavaPkg, + FoundBy: pomCatalogerName, + Metadata: pkg.JavaArchive{ + PomProperties: &pkg.JavaPomProperties{ + GroupID: "org.assertj", + ArtifactID: "assertj-core", + Scope: "test", + }, + }, + } + + commonsIO := pkg.Package{ + Name: "commons-io", + Version: "2.11.0", + PURL: "pkg:maven/commons-io/commons-io@2.11.0", + Language: pkg.Java, + Type: pkg.JavaPkg, + FoundBy: pomCatalogerName, + Metadata: pkg.JavaArchive{ + PomProperties: &pkg.JavaPomProperties{ + GroupID: "commons-io", + ArtifactID: "commons-io", + Scope: "test", + }, + }, + } + + mockitoInline := pkg.Package{ + Name: "mockito-inline", + Version: "4.8.0", + PURL: "pkg:maven/org.mockito/mockito-inline@4.8.0", + Language: pkg.Java, + Type: pkg.JavaPkg, + FoundBy: pomCatalogerName, + Metadata: pkg.JavaArchive{ + PomProperties: &pkg.JavaPomProperties{ + GroupID: "org.mockito", + ArtifactID: "mockito-inline", + Scope: "test", + }, + }, + } + + js := pkg.Package{ + Name: "js", + Version: "22.0.0.2", + PURL: "pkg:maven/org.graalvm.js/js@22.0.0.2", + Language: pkg.Java, + Type: pkg.JavaPkg, + FoundBy: pomCatalogerName, + Metadata: pkg.JavaArchive{ + PomProperties: &pkg.JavaPomProperties{ + GroupID: "org.graalvm.js", + ArtifactID: "js", + Scope: "test", + }, + }, + } + + jsScriptengine := pkg.Package{ + Name: "js-scriptengine", + Version: "22.0.0.2", + PURL: "pkg:maven/org.graalvm.js/js-scriptengine@22.0.0.2", + Language: pkg.Java, + Type: pkg.JavaPkg, + FoundBy: pomCatalogerName, + Metadata: pkg.JavaArchive{ + PomProperties: &pkg.JavaPomProperties{ + GroupID: "org.graalvm.js", + ArtifactID: "js-scriptengine", + Scope: "test", + }, + }, + } + + commonsRngSimple := pkg.Package{ + Name: "commons-rng-simple", + Version: "1.4", + PURL: "pkg:maven/org.apache.commons/commons-rng-simple@1.4", + Language: pkg.Java, + Type: pkg.JavaPkg, + FoundBy: pomCatalogerName, + Metadata: pkg.JavaArchive{ + PomProperties: &pkg.JavaPomProperties{ + GroupID: "org.apache.commons", + ArtifactID: "commons-rng-simple", + Scope: "test", + }, + }, + } + + jmhCore := pkg.Package{ + Name: "jmh-core", + Version: "1.35", + PURL: "pkg:maven/org.openjdk.jmh/jmh-core@1.35", + Language: pkg.Java, + Type: pkg.JavaPkg, + FoundBy: pomCatalogerName, + Metadata: pkg.JavaArchive{ + PomProperties: &pkg.JavaPomProperties{ + GroupID: "org.openjdk.jmh", + ArtifactID: "jmh-core", + Scope: "test", + }, + }, + } + + jmhGeneratorAnnprocess := pkg.Package{ + Name: "jmh-generator-annprocess", + Version: "1.35", + PURL: "pkg:maven/org.openjdk.jmh/jmh-generator-annprocess@1.35", + Language: pkg.Java, + Type: pkg.JavaPkg, + FoundBy: pomCatalogerName, + Metadata: pkg.JavaArchive{ + PomProperties: &pkg.JavaPomProperties{ + GroupID: "org.openjdk.jmh", + ArtifactID: "jmh-generator-annprocess", + Scope: "test", + }, + }, + } + + if resolved { + junitJupiter.Version = "5.9.1" + } + + pkgs := []pkg.Package{ + commonsText, + commonsLang3, + junitJupiter, + assertjCore, + commonsIO, + mockitoInline, + js, + jsScriptengine, + commonsRngSimple, + jmhCore, + jmhGeneratorAnnprocess, + } + + var relationships []artifact.Relationship + for i := range pkgs { + p := &pkgs[i] + p.Locations = pomXmlLocation + finalizePackage(p) + if i == 0 { + continue + } + relationships = append(relationships, artifact.Relationship{ + From: *p, + To: pkgs[0], + Type: artifact.DependencyOfRelationship, + }) + } + + return expected{pkgs, relationships} +} + +func expectedTransientPackageData() expected { + epl2 := pkg.NewLicenseSet(pkg.License{ + Value: "Eclipse Public License v2.0", + Type: license.Declared, + URLs: []string{"https://www.eclipse.org/legal/epl-v20.html"}, + }) + transitiveTopLevel := pkg.Package{ + Name: "transitive-top-level", + Version: "99", + Metadata: pkg.JavaArchive{ + PomProject: &pkg.JavaPomProject{ + GroupID: "my.other.group", + ArtifactID: "transitive-top-level", + Version: "99", + }, + }, + } + childOne := pkg.Package{ + Name: "child-one", + Version: "1.3.6", + Licenses: epl2, + Metadata: pkg.JavaArchive{ + PomProject: &pkg.JavaPomProject{ + GroupID: "my.org", + ArtifactID: "child-one", + Version: "1.3.6", + Parent: &pkg.JavaPomParent{ + GroupID: "my.org", + ArtifactID: "parent-one", + Version: "3.11.0", + }, + }, + PomProperties: &pkg.JavaPomProperties{ + GroupID: "my.org", + ArtifactID: "child-one", + }, + }, + } + childTwo := pkg.Package{ + Name: "child-two", + Version: "2.1.90", + Licenses: epl2, + Metadata: pkg.JavaArchive{ + PomProject: &pkg.JavaPomProject{ + GroupID: "my.org", + ArtifactID: "child-two", + Version: "2.1.90", + Parent: &pkg.JavaPomParent{ + GroupID: "my.org", + ArtifactID: "parent-one", + Version: "3.11.0", + }, + }, + PomProperties: &pkg.JavaPomProperties{ + GroupID: "my.org", + ArtifactID: "child-two", + Scope: "test", + }, + }, + } + commonsLang3_113_7_8_0 := pkg.Package{ + Name: "commons-lang3", + Version: "3.113.7.8.0", + Metadata: pkg.JavaArchive{ + PomProperties: &pkg.JavaPomProperties{ + GroupID: "org.apache.commons", + ArtifactID: "commons-lang3", + }, + }, + } + commonsLang3_12_0 := pkg.Package{ + Name: "commons-lang3", + Version: "3.12.0", + Metadata: pkg.JavaArchive{ + PomProperties: &pkg.JavaPomProperties{ + GroupID: "org.apache.commons", + ArtifactID: "commons-lang3", + }, + }, + } + commonsMath3 := pkg.Package{ + Name: "commons-math3.11.0", + Version: "3.5", + Metadata: pkg.JavaArchive{ + PomProperties: &pkg.JavaPomProperties{ + GroupID: "org.apache.commons", + ArtifactID: "commons-math3.11.0", + }, + }, + } + commonsExec := pkg.Package{ + Name: "commons-exec", + Version: "1.3", + Metadata: pkg.JavaArchive{ + PomProperties: &pkg.JavaPomProperties{ + GroupID: "org.apache.commons", + ArtifactID: "commons-exec", + }, + }, + } + + allPackages := []*pkg.Package{ + &transitiveTopLevel, + &childOne, + &childTwo, + &commonsLang3_12_0, + &commonsLang3_113_7_8_0, + &commonsMath3, + &commonsExec, + } + + for _, p := range allPackages { + p.Language = pkg.Java + p.Type = pkg.JavaPkg + p.FoundBy = pomCatalogerName + p.Locations = file.NewLocationSet(file.NewLocation("pom.xml")) + finalizePackage(p) + } + + pkgs := make([]pkg.Package, len(allPackages)) + for i := 0; i < len(allPackages); i++ { + pkgs[i] = *allPackages[i] + } + + depOf := func(a, b pkg.Package) artifact.Relationship { + return artifact.Relationship{ + From: a, + To: b, + Type: artifact.DependencyOfRelationship, + } + } + + return expected{ + packages: pkgs, + relationships: []artifact.Relationship{ + depOf(childTwo, transitiveTopLevel), + depOf(childOne, transitiveTopLevel), + depOf(commonsLang3_12_0, childOne), + depOf(commonsLang3_113_7_8_0, childTwo), + depOf(commonsMath3, childTwo), + depOf(commonsExec, childTwo), + }, + } +} diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/local/commons-text-1.10.0/pom.xml b/syft/pkg/cataloger/java/test-fixtures/pom/commons-text-1.10.0/pom.xml similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/local/commons-text-1.10.0/pom.xml rename to syft/pkg/cataloger/java/test-fixtures/pom/commons-text-1.10.0/pom.xml diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/local/example-java-app-maven/pom.xml b/syft/pkg/cataloger/java/test-fixtures/pom/example-java-app-maven/pom.xml similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/local/example-java-app-maven/pom.xml rename to syft/pkg/cataloger/java/test-fixtures/pom/example-java-app-maven/pom.xml diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/transitive-top-level/pom.xml b/syft/pkg/cataloger/java/test-fixtures/pom/transitive-top-level/pom.xml new file mode 100644 index 000000000..ea5c2ab99 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/pom/transitive-top-level/pom.xml @@ -0,0 +1,34 @@ + + + 4.0.0 + + my.other.group + transitive-top-level + 99 + jar + + + + + my.org + child-one + 1.3.6 + + + + + + + my.org + child-one + + + my.org + child-two + 2.1.90 + test + + + +