From 1d14f22e4538f03a1896b2d4e1d99a65e52b6f30 Mon Sep 17 00:00:00 2001 From: Jonas Xavier Date: Wed, 22 Jun 2022 11:19:10 -0700 Subject: [PATCH] add pom.xml cataloger (#1055) Co-authored-by: Christopher Phillips --- syft/pkg/cataloger/cataloger.go | 2 + syft/pkg/cataloger/java/archive_parser.go | 2 +- syft/pkg/cataloger/java/parse_pom_xml.go | 79 +++++++++++++++---- syft/pkg/cataloger/java/parse_pom_xml_test.go | 51 +++++++++++- syft/pkg/cataloger/java/pom_cataloger.go | 17 ++++ .../cataloger/java/test-fixtures/pom/pom.xml | 59 ++++++++++++++ .../catalog_packages_cases_test.go | 30 ++++--- .../image-pkg-coverage/pkgs/java/pom.xml | 59 ++++++++++++++ 8 files changed, 270 insertions(+), 29 deletions(-) create mode 100644 syft/pkg/cataloger/java/pom_cataloger.go create mode 100644 syft/pkg/cataloger/java/test-fixtures/pom/pom.xml create mode 100644 test/integration/test-fixtures/image-pkg-coverage/pkgs/java/pom.xml diff --git a/syft/pkg/cataloger/cataloger.go b/syft/pkg/cataloger/cataloger.go index 1aded4adf..c75385325 100644 --- a/syft/pkg/cataloger/cataloger.go +++ b/syft/pkg/cataloger/cataloger.go @@ -66,6 +66,7 @@ func DirectoryCatalogers(cfg Config) []Cataloger { deb.NewDpkgdbCataloger(), rpmdb.NewRpmdbCataloger(), java.NewJavaCataloger(cfg.Java()), + java.NewJavaPomCataloger(), apkdb.NewApkdbCataloger(), golang.NewGoModuleBinaryCataloger(), golang.NewGoModFileCataloger(), @@ -88,6 +89,7 @@ func AllCatalogers(cfg Config) []Cataloger { deb.NewDpkgdbCataloger(), rpmdb.NewRpmdbCataloger(), java.NewJavaCataloger(cfg.Java()), + java.NewJavaPomCataloger(), apkdb.NewApkdbCataloger(), golang.NewGoModuleBinaryCataloger(), golang.NewGoModFileCataloger(), diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index c49b591ee..b87cf5526 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -335,7 +335,7 @@ func pomProjectByParentPath(archivePath, virtualPath string, extractPaths []stri projectByParentPath := make(map[string]pkg.PomProject) for filePath, fileContents := range contentsOfMavenProjectFiles { - pomProject, err := parsePomXML(filePath, strings.NewReader(fileContents)) + pomProject, err := parsePomXMLProject(filePath, strings.NewReader(fileContents)) if err != nil { log.Warnf("failed to parse pom.xml virtualPath=%q path=%q: %+v", virtualPath, filePath, err) continue diff --git a/syft/pkg/cataloger/java/parse_pom_xml.go b/syft/pkg/cataloger/java/parse_pom_xml.go index 038aff6a1..621e0fa40 100644 --- a/syft/pkg/cataloger/java/parse_pom_xml.go +++ b/syft/pkg/cataloger/java/parse_pom_xml.go @@ -6,34 +6,79 @@ import ( "io" "strings" + "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/pkg" "github.com/vifraa/gopom" "golang.org/x/net/html/charset" ) const pomXMLGlob = "*pom.xml" +const pomXMLDirGlob = "**/pom.xml" -func parsePomXML(path string, reader io.Reader) (*pkg.PomProject, error) { - var project gopom.Project - - decoder := xml.NewDecoder(reader) - // prevent against warnings for "xml: encoding "iso-8859-1" declared but Decoder.CharsetReader is nil" - decoder.CharsetReader = charset.NewReaderLabel - - if err := decoder.Decode(&project); err != nil { - return nil, fmt.Errorf("unable to unmarshal pom.xml: %w", err) +func parserPomXML(path string, content io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { + pom, err := decodePomXML(content) + if err != nil { + return nil, nil, err } + var pkgs []*pkg.Package + for _, dep := range pom.Dependencies { + p := newPackageFromPom(dep) + if p.Name == "" { + continue + } + + pkgs = append(pkgs, p) + } + + return pkgs, nil, nil +} + +func parsePomXMLProject(path string, reader io.Reader) (*pkg.PomProject, error) { + project, err := decodePomXML(reader) + if err != nil { + return nil, err + } + return newPomProject(path, project), nil +} + +func newPomProject(path string, p gopom.Project) *pkg.PomProject { return &pkg.PomProject{ Path: path, - Parent: pomParent(project.Parent), - GroupID: project.GroupID, - ArtifactID: project.ArtifactID, - Version: project.Version, - Name: project.Name, - Description: cleanDescription(project.Description), - URL: project.URL, - }, nil + Parent: pomParent(p.Parent), + GroupID: p.GroupID, + ArtifactID: p.ArtifactID, + Version: p.Version, + Name: p.Name, + Description: cleanDescription(p.Description), + URL: p.URL, + } +} + +func newPackageFromPom(dep gopom.Dependency) *pkg.Package { + p := &pkg.Package{ + Name: dep.ArtifactID, + Version: dep.Version, + Language: pkg.Java, + Type: pkg.JavaPkg, // TODO: should we differentiate between packages from jar/war/zip versus packages from a pom.xml that were not installed yet? + MetadataType: pkg.JavaMetadataType, + FoundBy: javaPomCataloger, + } + + p.Metadata = pkg.JavaMetadata{PURL: packageURL(*p)} + + return p +} + +func decodePomXML(content io.Reader) (project gopom.Project, err error) { + decoder := xml.NewDecoder(content) + // prevent against warnings for "xml: encoding "iso-8859-1" declared but Decoder.CharsetReader is nil" + decoder.CharsetReader = charset.NewReaderLabel + if err := decoder.Decode(&project); err != nil { + return project, fmt.Errorf("unable to unmarshal pom.xml: %w", err) + } + + return project, nil } func pomParent(parent gopom.Parent) (result *pkg.PomParent) { diff --git a/syft/pkg/cataloger/java/parse_pom_xml_test.go b/syft/pkg/cataloger/java/parse_pom_xml_test.go index 6f48e860d..bae7f7449 100644 --- a/syft/pkg/cataloger/java/parse_pom_xml_test.go +++ b/syft/pkg/cataloger/java/parse_pom_xml_test.go @@ -10,7 +10,54 @@ import ( "github.com/stretchr/testify/assert" ) -func Test_parsePomXML(t *testing.T) { +func Test_parserPomXML(t *testing.T) { + tests := []struct { + input string + expected []*pkg.Package + }{ + { + input: "test-fixtures/pom/pom.xml", + expected: []*pkg.Package{ + { + Name: "joda-time", + Version: "2.9.2", + FoundBy: javaPomCataloger, + Language: pkg.Java, + Type: pkg.JavaPkg, + MetadataType: pkg.JavaMetadataType, + Metadata: pkg.JavaMetadata{ + PURL: "pkg:maven/joda-time/joda-time@2.9.2", + }, + }, + { + Name: "junit", + Version: "4.12", + FoundBy: "java-pom-cataloger", + Language: pkg.Java, + Type: pkg.JavaPkg, + MetadataType: pkg.JavaMetadataType, + Metadata: pkg.JavaMetadata{ + PURL: "pkg:maven/junit/junit@4.12", + }, + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + fixture, err := os.Open(test.input) + assert.NoError(t, err) + + actual, relationships, err := parserPomXML(fixture.Name(), fixture) + assert.NoError(t, err) + assert.Nil(t, relationships) + assert.Equal(t, test.expected, actual) + }) + } +} + +func Test_parsePomXMLProject(t *testing.T) { tests := []struct { expected pkg.PomProject }{ @@ -37,7 +84,7 @@ func Test_parsePomXML(t *testing.T) { fixture, err := os.Open(test.expected.Path) assert.NoError(t, err) - actual, err := parsePomXML(fixture.Name(), fixture) + actual, err := parsePomXMLProject(fixture.Name(), fixture) assert.NoError(t, err) assert.Equal(t, &test.expected, actual) diff --git a/syft/pkg/cataloger/java/pom_cataloger.go b/syft/pkg/cataloger/java/pom_cataloger.go new file mode 100644 index 000000000..753f5d2c3 --- /dev/null +++ b/syft/pkg/cataloger/java/pom_cataloger.go @@ -0,0 +1,17 @@ +package java + +import "github.com/anchore/syft/syft/pkg/cataloger/common" + +const javaPomCataloger = "java-pom-cataloger" + +// NewJavaPomCataloger returns a cataloger capable of parsing +// dependencies from a pom.xml file. +// Pom files list dependencies that maybe not be locally installed yet. +func NewJavaPomCataloger() *common.GenericCataloger { + globParsers := make(map[string]common.ParserFn) + + // java project files + globParsers[pomXMLDirGlob] = parserPomXML + + return common.NewGenericCataloger(nil, globParsers, javaPomCataloger) +} diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/pom.xml b/syft/pkg/cataloger/java/test-fixtures/pom/pom.xml new file mode 100644 index 000000000..4ab76d1d9 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/pom/pom.xml @@ -0,0 +1,59 @@ + + + 4.0.0 + + org.anchore + example-java-app-maven + jar + 0.1.0 + + + 1.8 + 1.8 + + + + + + joda-time + joda-time + 2.9.2 + + + + + junit + junit + 4.12 + test + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 2.1 + + + package + + shade + + + + + hello.HelloWorld + + + + + + + + + + diff --git a/test/integration/catalog_packages_cases_test.go b/test/integration/catalog_packages_cases_test.go index 8b7f72435..74e0e64a0 100644 --- a/test/integration/catalog_packages_cases_test.go +++ b/test/integration/catalog_packages_cases_test.go @@ -60,6 +60,15 @@ var imageOnlyTestCases = []testCase{ "libc-utils": "0.7.2-r0", }, }, + { + name: "find java packages excluding pom.xml", // image scans can not include packages that have yet to be installed + pkgType: pkg.JavaPkg, + pkgLanguage: pkg.Java, + pkgInfo: map[string]string{ + "example-java-app-maven": "0.1.0", + "joda-time": "2.9.2", + }, + }, } var dirOnlyTestCases = []testCase{ @@ -218,6 +227,17 @@ var dirOnlyTestCases = []testCase{ "System.Runtime.CompilerServices.Unsafe": "6.0.0", }, }, + { + name: "find java packages including pom.xml", // directory scans can include packages that have yet to be installed + pkgType: pkg.JavaPkg, + pkgLanguage: pkg.Java, + duplicates: 1, // joda-time is included in both pom.xml AND the .jar collection + pkgInfo: map[string]string{ + "example-java-app-maven": "0.1.0", + "joda-time": "2.9.2", + "junit": "4.12", + }, + }, } var commonTestCases = []testCase{ @@ -244,15 +264,7 @@ var commonTestCases = []testCase{ "netbase": "5.4", }, }, - { - name: "find java packages", - pkgType: pkg.JavaPkg, - pkgLanguage: pkg.Java, - pkgInfo: map[string]string{ - "example-java-app-maven": "0.1.0", - "joda-time": "2.9.2", - }, - }, + { name: "find jenkins plugins", pkgType: pkg.JenkinsPluginPkg, diff --git a/test/integration/test-fixtures/image-pkg-coverage/pkgs/java/pom.xml b/test/integration/test-fixtures/image-pkg-coverage/pkgs/java/pom.xml new file mode 100644 index 000000000..4ab76d1d9 --- /dev/null +++ b/test/integration/test-fixtures/image-pkg-coverage/pkgs/java/pom.xml @@ -0,0 +1,59 @@ + + + 4.0.0 + + org.anchore + example-java-app-maven + jar + 0.1.0 + + + 1.8 + 1.8 + + + + + + joda-time + joda-time + 2.9.2 + + + + + junit + junit + 4.12 + test + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 2.1 + + + package + + shade + + + + + hello.HelloWorld + + + + + + + + + +