From 1230650771dd1cb2f6739623946f727a0f02331b Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Thu, 29 Oct 2020 11:28:27 -0400 Subject: [PATCH 1/9] allow for java manifest data to be optional Signed-off-by: Alex Goodman --- syft/pkg/java_metadata.go | 18 +++++------ test/inline-compare/compare.py | 21 +++++-------- test/inline-compare/utils/inline.py | 22 ++++++++++---- test/inline-compare/utils/package.py | 2 +- test/inline-compare/utils/syft.py | 43 ++++++++++++++++++++++++++- test/inline-compare/utils/traverse.py | 21 +++++++++++++ 6 files changed, 98 insertions(+), 29 deletions(-) create mode 100644 test/inline-compare/utils/traverse.py diff --git a/syft/pkg/java_metadata.go b/syft/pkg/java_metadata.go index ad2a66079..3aa6fcb20 100644 --- a/syft/pkg/java_metadata.go +++ b/syft/pkg/java_metadata.go @@ -22,15 +22,15 @@ type PomProperties struct { // JavaManifest represents the fields of interest extracted from a Java archive's META-INF/MANIFEST.MF file. type JavaManifest struct { - Name string `mapstructure:"Name" json:"name"` - ManifestVersion string `mapstructure:"Manifest-Version" json:"manifestVersion"` - SpecTitle string `mapstructure:"Specification-Title" json:"specificationTitle"` - SpecVersion string `mapstructure:"Specification-Version" json:"specificationVersion"` - SpecVendor string `mapstructure:"Specification-Vendor" json:"specificationVendor"` - ImplTitle string `mapstructure:"Implementation-Title" json:"implementationTitle"` - ImplVersion string `mapstructure:"Implementation-Version" json:"implementationVersion"` - ImplVendor string `mapstructure:"Implementation-Vendor" json:"implementationVendor"` - Extra map[string]string `mapstructure:",remain" json:"extraFields"` + Name string `mapstructure:"Name" json:"name,omitempty"` + ManifestVersion string `mapstructure:"Manifest-Version" json:"manifestVersion,omitempty"` + SpecTitle string `mapstructure:"Specification-Title" json:"specificationTitle,omitempty"` + SpecVersion string `mapstructure:"Specification-Version" json:"specificationVersion,omitempty"` + SpecVendor string `mapstructure:"Specification-Vendor" json:"specificationVendor,omitempty"` + ImplTitle string `mapstructure:"Implementation-Title" json:"implementationTitle,omitempty"` + ImplVersion string `mapstructure:"Implementation-Version" json:"implementationVersion,omitempty"` + ImplVendor string `mapstructure:"Implementation-Vendor" json:"implementationVendor,omitempty"` + Extra map[string]string `mapstructure:",remain" json:"extraFields,omitempty"` Sections []map[string]string `json:"sections,omitempty"` } diff --git a/test/inline-compare/compare.py b/test/inline-compare/compare.py index 2e0220221..a2f6224a4 100755 --- a/test/inline-compare/compare.py +++ b/test/inline-compare/compare.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import os import sys +import difflib import collections import utils.package @@ -58,19 +59,13 @@ def report(analysis): if pkg not in analysis.syft_data.metadata[pkg.type]: continue syft_metadata_item = analysis.syft_data.metadata[pkg.type][pkg] - rows.append( - [ - INDENT, - "for:", - repr(pkg), - ":", - repr(syft_metadata_item), - "!=", - repr(metadata), - ] - ) - if rows: - print_rows(rows) + + diffs = difflib.ndiff([repr(syft_metadata_item)], [repr(metadata)]) + + print(INDENT + "for: " + repr(pkg)) + print(INDENT+INDENT+("\n"+INDENT+INDENT).join(list(diffs))) + print() + else: print( INDENT, diff --git a/test/inline-compare/utils/inline.py b/test/inline-compare/utils/inline.py index bf58c1510..1e9d77276 100644 --- a/test/inline-compare/utils/inline.py +++ b/test/inline-compare/utils/inline.py @@ -71,8 +71,20 @@ class InlineScan: type=pkg_type, ) packages.add(pkg) + + extra = dict(entry) + extra.pop('type') + extra.pop('maven-version') + for k, v in dict(extra).items(): + if v in ("", "N/A"): + extra[k] = None + + # temp temp temp + extra.pop("location") + metadata[pkg.type][pkg] = utils.package.Metadata( - version=entry["maven-version"] + version=entry["maven-version"], + extra=tuple(sorted(extra.items())), ) return packages, metadata @@ -86,7 +98,7 @@ class InlineScan: type=entry["type"].lower(), ) packages.add(pkg) - metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"]) + metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"], extra=tuple()) return packages, metadata @@ -101,7 +113,7 @@ class InlineScan: type=entry["type"].lower(), ) packages.add(pkg) - metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"]) + metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"], extra=tuple()) return packages, metadata @@ -114,7 +126,7 @@ class InlineScan: type=entry["type"].lower(), ) packages.add(pkg) - metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"]) + metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"], extra=tuple()) return packages, metadata @@ -126,6 +138,6 @@ class InlineScan: name=entry["package"], type=entry["type"].lower() ) packages.add(pkg) - metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"]) + metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"], extra=tuple()) return packages, metadata diff --git a/test/inline-compare/utils/package.py b/test/inline-compare/utils/package.py index a6cb3353d..06d36b800 100644 --- a/test/inline-compare/utils/package.py +++ b/test/inline-compare/utils/package.py @@ -3,7 +3,7 @@ import collections import dataclasses from typing import Set, FrozenSet, Tuple, Any, List -Metadata = collections.namedtuple("Metadata", "version") +Metadata = collections.namedtuple("Metadata", "version extra") Package = collections.namedtuple("Package", "name type") Info = collections.namedtuple("Info", "packages metadata") diff --git a/test/inline-compare/utils/syft.py b/test/inline-compare/utils/syft.py index b6e66b4d6..3bd1ac59c 100644 --- a/test/inline-compare/utils/syft.py +++ b/test/inline-compare/utils/syft.py @@ -4,6 +4,7 @@ import collections import utils.package import utils.image +from utils.traverse import dig class Syft: @@ -28,6 +29,8 @@ class Syft: metadata = collections.defaultdict(dict) for entry in self._enumerate_section(section="artifacts"): + extra = {} + # normalize to inline pkg_type = entry["type"].lower() if pkg_type in ("wheel", "egg", "python"): @@ -49,6 +52,44 @@ class Syft: ) packages.add(pkg) - metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"]) + + if "java" in pkg_type: + # lets match what inline scan expects to output + + path = dig(entry, "locations", 0, "path") + specVendor = dig(entry, "metadata", "manifest", "specificationVendor") + implVendor = dig(entry, "metadata", "manifest", "implementationVendor") + + specVersion = dig(entry, "metadata", "manifest", "specificationVersion") or None + implVersion = dig(entry, "metadata", "manifest", "implementationVersion") or None + + extra = { + "implementation-version": implVersion, + "specification-version": specVersion, + "origin": dig(entry, "metadata", "pomProperties", "groupId"), + "location": path, + "package": dig(entry, "metadata", "pomProperties", "artifactId"), + } + + if dig(entry, "metadata", "parentPackage"): + extra['origin'] = dig(entry, "metadata", "pomProperties", "groupId") + else: + # this is a nested package... + if specVendor: + extra['origin'] = specVendor + elif implVendor: + extra['origin'] = implVendor + + pomPath = dig(entry, "metadata", "pomProperties", "Path") + if path and pomPath: + extra["location"] = "%s:%s" % (path, pomPath), + + # temp temp temp + extra.pop("location") + + elif pkg_type == "apkg": + entry["version"] = entry["version"].split("-")[0] + + metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"], extra=tuple(sorted(extra.items()))) return utils.package.Info(packages=frozenset(packages), metadata=metadata) diff --git a/test/inline-compare/utils/traverse.py b/test/inline-compare/utils/traverse.py new file mode 100644 index 000000000..d98cf4ee7 --- /dev/null +++ b/test/inline-compare/utils/traverse.py @@ -0,0 +1,21 @@ + +def dig(target, *keys, **kwargs): + """ + Traverse a nested set of dictionaries, tuples, or lists similar to ruby's dig function. + """ + end_of_chain = target + for key in keys: + if isinstance(end_of_chain, dict) and key in end_of_chain: + end_of_chain = end_of_chain[key] + elif isinstance(end_of_chain, (list, tuple)) and isinstance(key, int): + end_of_chain = end_of_chain[key] + else: + if 'fail' in kwargs and kwargs['fail'] is True: + if isinstance(end_of_chain, dict): + raise KeyError + else: + raise IndexError + else: + return None + + return end_of_chain From fc991bc62e2c393bf062407022435e5184c74c20 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Thu, 29 Oct 2020 12:40:49 -0400 Subject: [PATCH 2/9] partial java comparison with extra metadata Signed-off-by: Alex Goodman --- test/inline-compare/compare.py | 160 +++++++++++++++------------ test/inline-compare/utils/inline.py | 28 +++-- test/inline-compare/utils/package.py | 10 -- test/inline-compare/utils/syft.py | 69 +++++------- 4 files changed, 136 insertions(+), 131 deletions(-) diff --git a/test/inline-compare/compare.py b/test/inline-compare/compare.py index a2f6224a4..6d963b624 100755 --- a/test/inline-compare/compare.py +++ b/test/inline-compare/compare.py @@ -9,9 +9,13 @@ from utils.format import Colors, print_rows from utils.inline import InlineScan from utils.syft import Syft -QUALITY_GATE_THRESHOLD = 0.95 +DEFAULT_QUALITY_GATE_THRESHOLD = 0.95 INDENT = " " -IMAGE_QUALITY_GATE = collections.defaultdict(lambda: QUALITY_GATE_THRESHOLD, **{}) + +PACKAGE_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THRESHOLD, **{}) +METADATA_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THRESHOLD, **{ + "anchore/test_images:java": 0.58, +}) # We additionally fail if an image is above a particular threshold. Why? We expect the lower threshold to be 90%, # however additional functionality in grype is still being implemented, so this threshold may not be able to be met. @@ -19,10 +23,14 @@ IMAGE_QUALITY_GATE = collections.defaultdict(lambda: QUALITY_GATE_THRESHOLD, **{ # issues/enhancements are done we want to ensure that the lower threshold is bumped up to catch regression. The only way # to do this is to select an upper threshold for images with known threshold values, so we have a failure that # loudly indicates the lower threshold should be bumped. -IMAGE_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{}) +PACKAGE_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{}) +METADATA_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{ + # syft is better at detecting package versions in specific cases, leading to a drop in matching metadata + "anchore/test_images:java": 0.65, +}) -def report(analysis): +def report(image, analysis): if analysis.extra_packages: rows = [] print( @@ -48,7 +56,6 @@ def report(analysis): print() if analysis.missing_metadata: - rows = [] print( Colors.bold + "Syft mismatched metadata:", Colors.reset, @@ -62,7 +69,7 @@ def report(analysis): diffs = difflib.ndiff([repr(syft_metadata_item)], [repr(metadata)]) - print(INDENT + "for: " + repr(pkg)) + print(INDENT + "for: " + repr(pkg), "(top is syft, bottom is inline)") print(INDENT+INDENT+("\n"+INDENT+INDENT).join(list(diffs))) print() @@ -73,42 +80,42 @@ def report(analysis): ) print() - if analysis.similar_missing_packages: - rows = [] - print( - Colors.bold + "Probably pairings of missing/extra packages:", - Colors.reset, - "to aid in troubleshooting missed/extra packages", - ) - for similar_packages in analysis.similar_missing_packages: - rows.append( - [ - INDENT, - repr(similar_packages.pkg), - "--->", - repr(similar_packages.missed), - ] - ) - print_rows(rows) - print() + # if analysis.similar_missing_packages: + # rows = [] + # print( + # Colors.bold + "Probably pairings of missing/extra packages:", + # Colors.reset, + # "to aid in troubleshooting missed/extra packages", + # ) + # for similar_packages in analysis.similar_missing_packages: + # rows.append( + # [ + # INDENT, + # repr(similar_packages.pkg), + # "--->", + # repr(similar_packages.missed), + # ] + # ) + # print_rows(rows) + # print() - if analysis.unmatched_missing_packages and analysis.extra_packages: - rows = [] - print( - Colors.bold + "Probably missed packages:", - Colors.reset, - "a probable pair was not found", - ) - for p in analysis.unmatched_missing_packages: - rows.append([INDENT, repr(p)]) - print_rows(rows) - print() + # if analysis.unmatched_missing_packages and analysis.extra_packages: + # rows = [] + # print( + # Colors.bold + "Probably missed packages:", + # Colors.reset, + # "a probable pair was not found", + # ) + # for p in analysis.unmatched_missing_packages: + # rows.append([INDENT, repr(p)]) + # print_rows(rows) + # print() - print(Colors.bold + "Summary:", Colors.reset) + print(Colors.bold + "Summary:", Colors.reset, image) print(" Inline Packages : %d" % len(analysis.inline_data.packages)) print(" Syft Packages : %d" % len(analysis.syft_data.packages)) print( - " (extra) : %d (note: this is ignored in the analysis!)" + " (extra) : %d (note: this is ignored by the quality gate!)" % len(analysis.extra_packages) ) print(" (missing) : %d" % len(analysis.missing_packages)) @@ -150,12 +157,37 @@ def report(analysis): ) ) - overall_score = ( - analysis.percent_overlapping_packages + analysis.percent_overlapping_metadata - ) / 2.0 - print(Colors.bold + " Overall Score: %2.1f %%" % overall_score, Colors.reset) +def enforce_quality_gate(title, actual_value, lower_gate_value, upper_gate_value): + if actual_value < lower_gate_value: + print( + Colors.bold + + " %s Quality Gate:\t" % title + + Colors.FG.red + + "FAIL (is not >= %d %%)" % lower_gate_value, + Colors.reset, + ) + return False + elif actual_value > upper_gate_value: + print( + Colors.bold + + " %s Quality Gate:\t" % title + + Colors.FG.orange + + "FAIL (lower threshold is artificially low and should be updated)", + Colors.reset, + ) + return False + + print( + Colors.bold + + " %s Quality Gate:\t" % title + + Colors.FG.green + + "Pass (>= %d %%)" % lower_gate_value, + Colors.reset, + ) + + return True def main(image): cwd = os.path.dirname(os.path.abspath(__file__)) @@ -170,41 +202,27 @@ def main(image): ) # show some useful report data for debugging / warm fuzzies - report(analysis) + report(image, analysis) # enforce a quality gate based on the comparison of package values and metadata values - upper_gate_value = IMAGE_UPPER_THRESHOLD[image] * 100 - lower_gate_value = IMAGE_QUALITY_GATE[image] * 100 - if analysis.quality_gate_score < lower_gate_value: - print( - Colors.bold - + " Quality Gate: " - + Colors.FG.red - + "FAILED (is not >= %d %%)\n" % lower_gate_value, - Colors.reset, - ) - return 1 - elif analysis.quality_gate_score > upper_gate_value: - print( - Colors.bold - + " Quality Gate: " - + Colors.FG.orange - + "FAILED (lower threshold is artificially low and should be updated)\n", - Colors.reset, - ) - return 1 - else: - print( - Colors.bold - + " Quality Gate: " - + Colors.FG.green - + "pass (>= %d %%)\n" % lower_gate_value, - Colors.reset, - ) + success = True + success &= enforce_quality_gate( + title="Package", + actual_value=analysis.percent_overlapping_packages, + lower_gate_value=PACKAGE_QUALITY_GATE[image] * 100, + upper_gate_value=PACKAGE_UPPER_THRESHOLD[image] * 100 + ) + success &= enforce_quality_gate( + title="Metadata", + actual_value=analysis.percent_overlapping_metadata, + lower_gate_value=METADATA_QUALITY_GATE[image] * 100, + upper_gate_value=METADATA_UPPER_THRESHOLD[image] * 100 + ) + if not success: + return 1 return 0 - if __name__ == "__main__": if len(sys.argv) != 2: sys.exit("provide an image") diff --git a/test/inline-compare/utils/inline.py b/test/inline-compare/utils/inline.py index 1e9d77276..9bc118500 100644 --- a/test/inline-compare/utils/inline.py +++ b/test/inline-compare/utils/inline.py @@ -1,4 +1,5 @@ import os +import re import json import collections @@ -66,22 +67,27 @@ class InlineScan: elif pkg_type in ("java-jpi", "java-hpi"): pkg_type = "java-?pi" + # this would usually be "package" but this would not be able to account for duplicate dependencies in + # nested jars of the same name. Fallback to the package name if there is no given location + name = entry["location"] + + # replace fields with "N/A" with None + for k, v in dict(entry).items(): + if v in ("", "N/A"): + entry[k] = None + + extra = {} + # extra = dict(entry) + # extra.pop('type') + # extra.pop('maven-version') + # extra.pop("origin") # don't include origin as syft does not compact several fields into 1 + pkg = utils.package.Package( - name=entry["package"], + name=name, type=pkg_type, ) packages.add(pkg) - extra = dict(entry) - extra.pop('type') - extra.pop('maven-version') - for k, v in dict(extra).items(): - if v in ("", "N/A"): - extra[k] = None - - # temp temp temp - extra.pop("location") - metadata[pkg.type][pkg] = utils.package.Metadata( version=entry["maven-version"], extra=tuple(sorted(extra.items())), diff --git a/test/inline-compare/utils/package.py b/test/inline-compare/utils/package.py index 06d36b800..b4ac2c796 100644 --- a/test/inline-compare/utils/package.py +++ b/test/inline-compare/utils/package.py @@ -144,13 +144,3 @@ class Analysis: float(len(self.overlapping_packages) + len(self.similar_missing_packages)) / float(len(self.inline_data.packages)) ) * 100.0 - - @property - def quality_gate_score(self): - """ - The result of the analysis in the form of an aggregated percentage; it is up to the caller to use this value - and enforce a quality gate. - """ - return ( - self.percent_overlapping_packages + self.percent_overlapping_metadata - ) / 2.0 diff --git a/test/inline-compare/utils/syft.py b/test/inline-compare/utils/syft.py index 3bd1ac59c..a9b95c825 100644 --- a/test/inline-compare/utils/syft.py +++ b/test/inline-compare/utils/syft.py @@ -46,50 +46,41 @@ class Syft: elif pkg_type in ("apk",): pkg_type = "apkg" + name = entry["name"] + version = entry["version"] + + if "java" in pkg_type: + # lets match what inline scan expects to output + + # specVersion = dig(entry, "metadata", "manifest", "specificationVersion") or None + # implVersion = dig(entry, "metadata", "manifest", "implementationVersion") or None + virtualPath = dig(entry, "metadata", "virtualPath") + + # package = dig(entry, "metadata", "pomProperties", "artifactId") + # if not package: + # package = "%s-%s" % (dig(entry, "name"), dig(entry, "version")) + # + # extra = { + # "implementation-version": implVersion, + # "specification-version": specVersion, + # "location": virtualPath, + # "package": package, + # } + + # we need to use the virtual path instead of the name to account for nested dependencies with the same + # package name (but potentially different metadata) + name = virtualPath + + elif pkg_type == "apkg": + version = entry["version"].split("-")[0] + pkg = utils.package.Package( - name=entry["name"], + name=name, type=pkg_type, ) packages.add(pkg) - if "java" in pkg_type: - # lets match what inline scan expects to output - - path = dig(entry, "locations", 0, "path") - specVendor = dig(entry, "metadata", "manifest", "specificationVendor") - implVendor = dig(entry, "metadata", "manifest", "implementationVendor") - - specVersion = dig(entry, "metadata", "manifest", "specificationVersion") or None - implVersion = dig(entry, "metadata", "manifest", "implementationVersion") or None - - extra = { - "implementation-version": implVersion, - "specification-version": specVersion, - "origin": dig(entry, "metadata", "pomProperties", "groupId"), - "location": path, - "package": dig(entry, "metadata", "pomProperties", "artifactId"), - } - - if dig(entry, "metadata", "parentPackage"): - extra['origin'] = dig(entry, "metadata", "pomProperties", "groupId") - else: - # this is a nested package... - if specVendor: - extra['origin'] = specVendor - elif implVendor: - extra['origin'] = implVendor - - pomPath = dig(entry, "metadata", "pomProperties", "Path") - if path and pomPath: - extra["location"] = "%s:%s" % (path, pomPath), - - # temp temp temp - extra.pop("location") - - elif pkg_type == "apkg": - entry["version"] = entry["version"].split("-")[0] - - metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"], extra=tuple(sorted(extra.items()))) + metadata[pkg.type][pkg] = utils.package.Metadata(version=version, extra=tuple(sorted(extra.items()))) return utils.package.Info(packages=frozenset(packages), metadata=metadata) From ab45be98b839d569513205530f7e792f1913a8cd Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Thu, 29 Oct 2020 13:52:34 -0400 Subject: [PATCH 3/9] append java nested package names to the virtual path Signed-off-by: Alex Goodman --- schema/json/schema.json | 21 +-------------------- syft/cataloger/java/archive_parser.go | 8 +++++++- syft/cataloger/java/archive_parser_test.go | 4 +++- syft/cataloger/java/java_manifest.go | 6 ------ syft/cataloger/java/java_manifest_test.go | 3 ++- 5 files changed, 13 insertions(+), 29 deletions(-) diff --git a/schema/json/schema.json b/schema/json/schema.json index b38874381..370f9c27d 100644 --- a/schema/json/schema.json +++ b/schema/json/schema.json @@ -194,38 +194,19 @@ "implementationTitle": { "type": "string" }, - "implementationVendor": { - "type": "string" - }, "implementationVersion": { "type": "string" }, "manifestVersion": { "type": "string" }, - "name": { - "type": "string" - }, "specificationTitle": { "type": "string" - }, - "specificationVendor": { - "type": "string" - }, - "specificationVersion": { - "type": "string" } }, "required": [ "extraFields", - "implementationTitle", - "implementationVendor", - "implementationVersion", - "manifestVersion", - "name", - "specificationTitle", - "specificationVendor", - "specificationVersion" + "manifestVersion" ], "type": "object" }, diff --git a/syft/cataloger/java/archive_parser.go b/syft/cataloger/java/archive_parser.go index 691bb5564..ca0f13dc2 100644 --- a/syft/cataloger/java/archive_parser.go +++ b/syft/cataloger/java/archive_parser.go @@ -177,6 +177,12 @@ func (j *archiveParser) discoverPkgsFromPomProperties(parentPkg *pkg.Package) ([ if propsObj.Version != "" && propsObj.ArtifactID != "" { // TODO: if there is no parentPkg (no java manifest) one of these poms could be the parent. We should discover the right parent and attach the correct info accordingly to each discovered package + // keep the artifact name within the virtual path if this package does not match the parent package + vPathSuffix := "" + if !strings.HasPrefix(propsObj.ArtifactID, parentPkg.Name) { + vPathSuffix += ":" + propsObj.ArtifactID + } + // discovered props = new package p := pkg.Package{ Name: propsObj.ArtifactID, @@ -185,7 +191,7 @@ func (j *archiveParser) discoverPkgsFromPomProperties(parentPkg *pkg.Package) ([ Type: pkg.JavaPkg, MetadataType: pkg.JavaMetadataType, Metadata: pkg.JavaMetadata{ - VirtualPath: j.virtualPath, + VirtualPath: j.virtualPath + vPathSuffix, PomProperties: propsObj, Parent: parentPkg, }, diff --git a/syft/cataloger/java/archive_parser_test.go b/syft/cataloger/java/archive_parser_test.go index c5f526bd7..e93374cfc 100644 --- a/syft/cataloger/java/archive_parser_test.go +++ b/syft/cataloger/java/archive_parser_test.go @@ -236,7 +236,9 @@ func TestParseJar(t *testing.T) { Type: pkg.JavaPkg, MetadataType: pkg.JavaMetadataType, Metadata: pkg.JavaMetadata{ - VirtualPath: "test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.jar", + // ensure that nested packages with different names than that of the parent are appended as + // a suffix on the virtual path + VirtualPath: "test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.jar:joda-time", PomProperties: &pkg.PomProperties{ Path: "META-INF/maven/joda-time/joda-time/pom.properties", GroupID: "joda-time", diff --git a/syft/cataloger/java/java_manifest.go b/syft/cataloger/java/java_manifest.go index 02c0f6031..bb08062fe 100644 --- a/syft/cataloger/java/java_manifest.go +++ b/syft/cataloger/java/java_manifest.go @@ -72,12 +72,6 @@ func parseJavaManifest(reader io.Reader) (*pkg.JavaManifest, error) { manifest.Sections = sections[1:] } - // clean select fields - if strings.Trim(manifest.ImplVersion, " ") != "" { - // transform versions with dates attached to just versions (e.g. "1.3 2244 October 5 2008" --> "1.3") - manifest.ImplVersion = strings.Split(manifest.ImplVersion, " ")[0] - } - return &manifest, nil } diff --git a/syft/cataloger/java/java_manifest_test.go b/syft/cataloger/java/java_manifest_test.go index ad80b4871..00e2560bf 100644 --- a/syft/cataloger/java/java_manifest_test.go +++ b/syft/cataloger/java/java_manifest_test.go @@ -62,10 +62,11 @@ func TestParseJavaManifest(t *testing.T) { }, }, { + // regression test, we should always keep the full version fixture: "test-fixtures/manifest/version-with-date", expected: pkg.JavaManifest{ ManifestVersion: "1.0", - ImplVersion: "1.3", // ensure the date is stripped off during processing + ImplVersion: "1.3 2244 October 5 2005", }, }, } From a4f22e65fc670f3d921c8a8c037d6334cc0f6f95 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Thu, 29 Oct 2020 13:52:51 -0400 Subject: [PATCH 4/9] expand compare testing images Signed-off-by: Alex Goodman --- test/inline-compare/compare-all.sh | 3 +- test/inline-compare/compare.py | 73 +++++++++++++++-------------- test/inline-compare/utils/inline.py | 8 +--- test/inline-compare/utils/syft.py | 25 ++-------- 4 files changed, 45 insertions(+), 64 deletions(-) diff --git a/test/inline-compare/compare-all.sh b/test/inline-compare/compare-all.sh index 85a8e9e20..146afe9b2 100755 --- a/test/inline-compare/compare-all.sh +++ b/test/inline-compare/compare-all.sh @@ -1,8 +1,7 @@ #!/usr/bin/env bash set -eu -# TODO: Add "alpine:3.12.0" back in when we've figured out how to handle the apk version field w/ and w/o release information (see issue: https://github.com/anchore/syft/pull/195) -images=("debian:10.5" "centos:8.2.2004" "rails:5.0.1") +images=("debian:10.5" "centos:8.2.2004" "rails:5.0.1" "alpine:3.12.0" "anchore/test_images:java" "anchore/test_images:py38" "anchore/anchore-engine:v0.8.2" "jenkins/jenkins:2.249.2-lts-jdk11" ) # gather all image analyses for img in "${images[@]}"; do diff --git a/test/inline-compare/compare.py b/test/inline-compare/compare.py index 6d963b624..819f80756 100755 --- a/test/inline-compare/compare.py +++ b/test/inline-compare/compare.py @@ -14,7 +14,9 @@ INDENT = " " PACKAGE_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THRESHOLD, **{}) METADATA_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THRESHOLD, **{ - "anchore/test_images:java": 0.58, + # syft is better at detecting package versions in specific cases, leading to a drop in matching metadata + "anchore/test_images:java": 0.52, + "jenkins/jenkins:2.249.2-lts-jdk11": 0.82, }) # We additionally fail if an image is above a particular threshold. Why? We expect the lower threshold to be 90%, @@ -26,7 +28,8 @@ METADATA_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THR PACKAGE_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{}) METADATA_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{ # syft is better at detecting package versions in specific cases, leading to a drop in matching metadata - "anchore/test_images:java": 0.65, + "anchore/test_images:java": 0.54, + "jenkins/jenkins:2.249.2-lts-jdk11": 0.84, }) @@ -71,45 +74,45 @@ def report(image, analysis): print(INDENT + "for: " + repr(pkg), "(top is syft, bottom is inline)") print(INDENT+INDENT+("\n"+INDENT+INDENT).join(list(diffs))) - print() - else: + if not analysis.missing_metadata: print( INDENT, - "There are mismatches, but only due to packages Syft did not find (but inline did).", + "There are mismatches, but only due to packages Syft did not find (but inline did).\n", ) + + if analysis.similar_missing_packages: + rows = [] + print( + Colors.bold + "Probably pairings of missing/extra packages:", + Colors.reset, + "to aid in troubleshooting missed/extra packages", + ) + for similar_packages in analysis.similar_missing_packages: + rows.append( + [ + INDENT, + repr(similar_packages.pkg), + "--->", + repr(similar_packages.missed), + ] + ) + print_rows(rows) print() - # if analysis.similar_missing_packages: - # rows = [] - # print( - # Colors.bold + "Probably pairings of missing/extra packages:", - # Colors.reset, - # "to aid in troubleshooting missed/extra packages", - # ) - # for similar_packages in analysis.similar_missing_packages: - # rows.append( - # [ - # INDENT, - # repr(similar_packages.pkg), - # "--->", - # repr(similar_packages.missed), - # ] - # ) - # print_rows(rows) - # print() + show_probable_mismatches = analysis.unmatched_missing_packages and analysis.extra_packages and len(analysis.unmatched_missing_packages) != len(analysis.missing_packages) - # if analysis.unmatched_missing_packages and analysis.extra_packages: - # rows = [] - # print( - # Colors.bold + "Probably missed packages:", - # Colors.reset, - # "a probable pair was not found", - # ) - # for p in analysis.unmatched_missing_packages: - # rows.append([INDENT, repr(p)]) - # print_rows(rows) - # print() + if show_probable_mismatches: + rows = [] + print( + Colors.bold + "Probably missed packages:", + Colors.reset, + "a probable pair was not found", + ) + for p in analysis.unmatched_missing_packages: + rows.append([INDENT, repr(p)]) + print_rows(rows) + print() print(Colors.bold + "Summary:", Colors.reset, image) print(" Inline Packages : %d" % len(analysis.inline_data.packages)) @@ -121,7 +124,7 @@ def report(image, analysis): print(" (missing) : %d" % len(analysis.missing_packages)) print() - if analysis.unmatched_missing_packages and analysis.extra_packages: + if show_probable_mismatches: print( " Probable Package Matches : %d (matches not made, but were probably found by both Inline and Syft)" % len(analysis.similar_missing_packages) diff --git a/test/inline-compare/utils/inline.py b/test/inline-compare/utils/inline.py index 9bc118500..11637cf9a 100644 --- a/test/inline-compare/utils/inline.py +++ b/test/inline-compare/utils/inline.py @@ -76,12 +76,6 @@ class InlineScan: if v in ("", "N/A"): entry[k] = None - extra = {} - # extra = dict(entry) - # extra.pop('type') - # extra.pop('maven-version') - # extra.pop("origin") # don't include origin as syft does not compact several fields into 1 - pkg = utils.package.Package( name=name, type=pkg_type, @@ -90,7 +84,7 @@ class InlineScan: metadata[pkg.type][pkg] = utils.package.Metadata( version=entry["maven-version"], - extra=tuple(sorted(extra.items())), + extra=tuple(), ) return packages, metadata diff --git a/test/inline-compare/utils/syft.py b/test/inline-compare/utils/syft.py index a9b95c825..52b5c7899 100644 --- a/test/inline-compare/utils/syft.py +++ b/test/inline-compare/utils/syft.py @@ -50,29 +50,14 @@ class Syft: version = entry["version"] if "java" in pkg_type: - # lets match what inline scan expects to output - - # specVersion = dig(entry, "metadata", "manifest", "specificationVersion") or None - # implVersion = dig(entry, "metadata", "manifest", "implementationVersion") or None - virtualPath = dig(entry, "metadata", "virtualPath") - - # package = dig(entry, "metadata", "pomProperties", "artifactId") - # if not package: - # package = "%s-%s" % (dig(entry, "name"), dig(entry, "version")) - # - # extra = { - # "implementation-version": implVersion, - # "specification-version": specVersion, - # "location": virtualPath, - # "package": package, - # } - # we need to use the virtual path instead of the name to account for nested dependencies with the same # package name (but potentially different metadata) - name = virtualPath + name = dig(entry, "metadata", "virtualPath") elif pkg_type == "apkg": - version = entry["version"].split("-")[0] + # inline scan strips off the release from the version, which should be normalized here + fields = entry["version"].split("-") + version = "-".join(fields[:-1]) pkg = utils.package.Package( name=name, @@ -81,6 +66,6 @@ class Syft: packages.add(pkg) - metadata[pkg.type][pkg] = utils.package.Metadata(version=version, extra=tuple(sorted(extra.items()))) + metadata[pkg.type][pkg] = utils.package.Metadata(version=version, extra=tuple()) return utils.package.Info(packages=frozenset(packages), metadata=metadata) From a5cba13ddfb2e7f3c433da97d2747bf9acf8e10c Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Thu, 29 Oct 2020 16:26:04 -0400 Subject: [PATCH 5/9] enable more flexible java manifest structure (closer to the spec) Signed-off-by: Alex Goodman --- schema/json/schema.json | 32 ++++----- syft/cataloger/java/archive_parser.go | 2 +- syft/cataloger/java/archive_parser_test.go | 41 ++++++----- syft/cataloger/java/java_manifest.go | 70 +++++++++++-------- syft/cataloger/java/java_manifest_test.go | 46 ++++++------ .../java/test-fixtures/manifest/extra-info | 1 + syft/pkg/java_metadata.go | 12 +--- 7 files changed, 110 insertions(+), 94 deletions(-) diff --git a/schema/json/schema.json b/schema/json/schema.json index 370f9c27d..a908a335e 100644 --- a/schema/json/schema.json +++ b/schema/json/schema.json @@ -126,7 +126,7 @@ }, "manifest": { "properties": { - "extraFields": { + "main": { "properties": { "Archiver-Version": { "type": "string" @@ -149,6 +149,12 @@ "Hudson-Version": { "type": "string" }, + "Implementation-Title": { + "type": "string" + }, + "Implementation-Version": { + "type": "string" + }, "Jenkins-Version": { "type": "string" }, @@ -158,6 +164,9 @@ "Main-Class": { "type": "string" }, + "Manifest-Version": { + "type": "string" + }, "Minimum-Java-Version": { "type": "string" }, @@ -181,32 +190,23 @@ }, "Short-Name": { "type": "string" + }, + "Specification-Title": { + "type": "string" } }, "required": [ "Archiver-Version", "Build-Jdk", "Built-By", - "Created-By" + "Created-By", + "Manifest-Version" ], "type": "object" - }, - "implementationTitle": { - "type": "string" - }, - "implementationVersion": { - "type": "string" - }, - "manifestVersion": { - "type": "string" - }, - "specificationTitle": { - "type": "string" } }, "required": [ - "extraFields", - "manifestVersion" + "main" ], "type": "object" }, diff --git a/syft/cataloger/java/archive_parser.go b/syft/cataloger/java/archive_parser.go index ca0f13dc2..fa9e619cd 100644 --- a/syft/cataloger/java/archive_parser.go +++ b/syft/cataloger/java/archive_parser.go @@ -136,7 +136,7 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) { // parse the manifest file into a rich object manifestContents := contents[manifestMatches[0]] - manifest, err := parseJavaManifest(strings.NewReader(manifestContents)) + manifest, err := parseJavaManifest(j.archivePath, strings.NewReader(manifestContents)) if err != nil { return nil, fmt.Errorf("failed to parse java manifest (%s): %w", j.virtualPath, err) } diff --git a/syft/cataloger/java/archive_parser_test.go b/syft/cataloger/java/archive_parser_test.go index e93374cfc..9da709902 100644 --- a/syft/cataloger/java/archive_parser_test.go +++ b/syft/cataloger/java/archive_parser_test.go @@ -89,18 +89,19 @@ func TestSelectName(t *testing.T) { desc: "name from Implementation-Title", archive: archiveFilename{}, manifest: pkg.JavaManifest{ - Name: "", - SpecTitle: "", - ImplTitle: "maven-wrapper", + Main: map[string]string{ + "Implementation-Title": "maven-wrapper", + }, }, expected: "maven-wrapper", }, { desc: "Implementation-Title does not override", manifest: pkg.JavaManifest{ - Name: "Foo", - SpecTitle: "", - ImplTitle: "maven-wrapper", + Main: map[string]string{ + "Name": "foo", + "Implementation-Title": "maven-wrapper", + }, }, archive: archiveFilename{ fields: []map[string]string{ @@ -145,11 +146,12 @@ func TestParseJar(t *testing.T) { Metadata: pkg.JavaMetadata{ VirtualPath: "test-fixtures/java-builds/packages/example-jenkins-plugin.hpi", Manifest: &pkg.JavaManifest{ - ManifestVersion: "1.0", - SpecTitle: "The Jenkins Plugins Parent POM Project", - ImplTitle: "example-jenkins-plugin", - ImplVersion: "1.0-SNAPSHOT", - Extra: map[string]string{ + Main: map[string]string{ + "Manifest-Version": "1.0", + "Specification-Title": "The Jenkins Plugins Parent POM Project", + "Implementation-Title": "example-jenkins-plugin", + "Implementation-Version": "1.0-SNAPSHOT", + // extra fields... "Archiver-Version": "Plexus Archiver", "Plugin-License-Url": "https://opensource.org/licenses/MIT", "Plugin-License-Name": "MIT License", @@ -191,7 +193,9 @@ func TestParseJar(t *testing.T) { Metadata: pkg.JavaMetadata{ VirtualPath: "test-fixtures/java-builds/packages/example-java-app-gradle-0.1.0.jar", Manifest: &pkg.JavaManifest{ - ManifestVersion: "1.0", + Main: map[string]string{ + "Manifest-Version": "1.0", + }, }, }, }, @@ -212,8 +216,9 @@ func TestParseJar(t *testing.T) { Metadata: pkg.JavaMetadata{ VirtualPath: "test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.jar", Manifest: &pkg.JavaManifest{ - ManifestVersion: "1.0", - Extra: map[string]string{ + Main: map[string]string{ + "Manifest-Version": "1.0", + // extra fields... "Archiver-Version": "Plexus Archiver", "Created-By": "Apache Maven 3.6.3", "Built-By": "?", @@ -305,11 +310,11 @@ func TestParseJar(t *testing.T) { metadata := a.Metadata.(pkg.JavaMetadata) metadata.Parent = nil - // ignore select fields + // ignore select fields (only works for the main section) for _, field := range test.ignoreExtras { - if metadata.Manifest != nil && metadata.Manifest.Extra != nil { - if _, ok := metadata.Manifest.Extra[field]; ok { - delete(metadata.Manifest.Extra, field) + if metadata.Manifest != nil && metadata.Manifest.Main != nil { + if _, ok := metadata.Manifest.Main[field]; ok { + delete(metadata.Manifest.Main, field) } } } diff --git a/syft/cataloger/java/java_manifest.go b/syft/cataloger/java/java_manifest.go index bb08062fe..c4ca715e2 100644 --- a/syft/cataloger/java/java_manifest.go +++ b/syft/cataloger/java/java_manifest.go @@ -4,16 +4,20 @@ import ( "bufio" "fmt" "io" + "strconv" "strings" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/pkg" - "github.com/mitchellh/mapstructure" ) const manifestGlob = "/META-INF/MANIFEST.MF" // nolint:funlen -func parseJavaManifest(reader io.Reader) (*pkg.JavaManifest, error) { +// parseJavaManifest takes MANIFEST.MF file content and returns sections of parsed key/value pairs. +// For more information: https://docs.oracle.com/en/java/javase/11/docs/specs/jar/jar.html#jar-manifest +func parseJavaManifest(path string, reader io.Reader) (*pkg.JavaManifest, error) { var manifest pkg.JavaManifest sections := []map[string]string{ make(map[string]string), @@ -63,13 +67,24 @@ func parseJavaManifest(reader io.Reader) (*pkg.JavaManifest, error) { return nil, fmt.Errorf("unable to read java manifest: %w", err) } - if err := mapstructure.Decode(sections[0], &manifest); err != nil { - return nil, fmt.Errorf("unable to parse java manifest: %w", err) - } - - // append on extra sections - if len(sections) > 1 { - manifest.Sections = sections[1:] + if len(sections) > 0 { + manifest.Main = sections[0] + if len(sections) > 1 { + manifest.Sections = make(map[string]map[string]string) + for i, s := range sections[1:] { + name, ok := s["Name"] + if !ok { + // per the manifest spec (https://docs.oracle.com/en/java/javase/11/docs/specs/jar/jar.html#jar-manifest) + // this should never happen. If it does we want to know about it, but not necessarily stop + // cataloging entirely... for this reason we only log. + log.Errorf("java manifest section found without a name: %s", path) + name = strconv.Itoa(i) + } else { + delete(s, "Name") + } + manifest.Sections[name] = s + } + } } return &manifest, nil @@ -80,24 +95,21 @@ func selectName(manifest *pkg.JavaManifest, filenameObj archiveFilename) string switch { case filenameObj.name() != "": name = filenameObj.name() - case manifest.Name != "": + case manifest.Main["Name"] != "": // Manifest original spec... - name = manifest.Name - case manifest.Extra["Bundle-Name"] != "": + name = manifest.Main["Name"] + case manifest.Main["Bundle-Name"] != "": // BND tooling... - name = manifest.Extra["Bundle-Name"] - case manifest.Extra["Short-Name"] != "": + name = manifest.Main["Bundle-Name"] + case manifest.Main["Short-Name"] != "": // Jenkins... - name = manifest.Extra["Short-Name"] - case manifest.Extra["Extension-Name"] != "": + name = manifest.Main["Short-Name"] + case manifest.Main["Extension-Name"] != "": // Jenkins... - name = manifest.Extra["Extension-Name"] - } - - // in situations where we hit this point and no name was - // determined, look at the Implementation-Title - if name == "" && manifest.ImplTitle != "" { - name = manifest.ImplTitle + name = manifest.Main["Extension-Name"] + case manifest.Main["Implementation-Title"] != "": + // last ditch effort... + name = manifest.Main["Implementation-Title"] } return name } @@ -105,14 +117,14 @@ func selectName(manifest *pkg.JavaManifest, filenameObj archiveFilename) string func selectVersion(manifest *pkg.JavaManifest, filenameObj archiveFilename) string { var version string switch { - case manifest.ImplVersion != "": - version = manifest.ImplVersion + case manifest.Main["Implementation-Version"] != "": + version = manifest.Main["Implementation-Version"] case filenameObj.version() != "": version = filenameObj.version() - case manifest.SpecVersion != "": - version = manifest.SpecVersion - case manifest.Extra["Plugin-Version"] != "": - version = manifest.Extra["Plugin-Version"] + case manifest.Main["Specification-Version"] != "": + version = manifest.Main["Specification-Version"] + case manifest.Main["Plugin-Version"] != "": + version = manifest.Main["Plugin-Version"] } return version } diff --git a/syft/cataloger/java/java_manifest_test.go b/syft/cataloger/java/java_manifest_test.go index 00e2560bf..fd6d096fb 100644 --- a/syft/cataloger/java/java_manifest_test.go +++ b/syft/cataloger/java/java_manifest_test.go @@ -17,35 +17,39 @@ func TestParseJavaManifest(t *testing.T) { { fixture: "test-fixtures/manifest/small", expected: pkg.JavaManifest{ - ManifestVersion: "1.0", + Main: map[string]string{ + "Manifest-Version": "1.0", + }, }, }, { fixture: "test-fixtures/manifest/standard-info", expected: pkg.JavaManifest{ - ManifestVersion: "1.0", - Name: "the-best-name", - SpecTitle: "the-spec-title", - SpecVersion: "the-spec-version", - SpecVendor: "the-spec-vendor", - ImplTitle: "the-impl-title", - ImplVersion: "the-impl-version", - ImplVendor: "the-impl-vendor", + Main: map[string]string{ + "Name": "the-best-name", + "Manifest-Version": "1.0", + "Specification-Title": "the-spec-title", + "Specification-Version": "the-spec-version", + "Specification-Vendor": "the-spec-vendor", + "Implementation-Title": "the-impl-title", + "Implementation-Version": "the-impl-version", + "Implementation-Vendor": "the-impl-vendor", + }, }, }, { fixture: "test-fixtures/manifest/extra-info", expected: pkg.JavaManifest{ - ManifestVersion: "1.0", - Extra: map[string]string{ + Main: map[string]string{ + "Manifest-Version": "1.0", "Archiver-Version": "Plexus Archiver", "Created-By": "Apache Maven 3.6.3", }, - Sections: []map[string]string{ - { + Sections: map[string]map[string]string{ + "thing-1": { "Built-By": "?", }, - { + "2": { "Build-Jdk": "14.0.1", "Main-Class": "hello.HelloWorld", }, @@ -55,9 +59,9 @@ func TestParseJavaManifest(t *testing.T) { { fixture: "test-fixtures/manifest/continuation", expected: pkg.JavaManifest{ - ManifestVersion: "1.0", - Extra: map[string]string{ - "Plugin-ScmUrl": "https://github.com/jenkinsci/plugin-pom/example-jenkins-plugin", + Main: map[string]string{ + "Manifest-Version": "1.0", + "Plugin-ScmUrl": "https://github.com/jenkinsci/plugin-pom/example-jenkins-plugin", }, }, }, @@ -65,8 +69,10 @@ func TestParseJavaManifest(t *testing.T) { // regression test, we should always keep the full version fixture: "test-fixtures/manifest/version-with-date", expected: pkg.JavaManifest{ - ManifestVersion: "1.0", - ImplVersion: "1.3 2244 October 5 2005", + Main: map[string]string{ + "Manifest-Version": "1.0", + "Implementation-Version": "1.3 2244 October 5 2005", + }, }, }, } @@ -78,7 +84,7 @@ func TestParseJavaManifest(t *testing.T) { t.Fatalf("could not open fixture: %+v", err) } - actual, err := parseJavaManifest(fixture) + actual, err := parseJavaManifest(test.fixture, fixture) if err != nil { t.Fatalf("failed to parse manifest: %+v", err) } diff --git a/syft/cataloger/java/test-fixtures/manifest/extra-info b/syft/cataloger/java/test-fixtures/manifest/extra-info index 8938f487c..c1fa40e5c 100644 --- a/syft/cataloger/java/test-fixtures/manifest/extra-info +++ b/syft/cataloger/java/test-fixtures/manifest/extra-info @@ -2,6 +2,7 @@ Manifest-Version: 1.0 Archiver-Version: Plexus Archiver Created-By: Apache Maven 3.6.3 +Name: thing-1 Built-By: ? Build-Jdk: 14.0.1 diff --git a/syft/pkg/java_metadata.go b/syft/pkg/java_metadata.go index 3aa6fcb20..a4a9c7578 100644 --- a/syft/pkg/java_metadata.go +++ b/syft/pkg/java_metadata.go @@ -22,16 +22,8 @@ type PomProperties struct { // JavaManifest represents the fields of interest extracted from a Java archive's META-INF/MANIFEST.MF file. type JavaManifest struct { - Name string `mapstructure:"Name" json:"name,omitempty"` - ManifestVersion string `mapstructure:"Manifest-Version" json:"manifestVersion,omitempty"` - SpecTitle string `mapstructure:"Specification-Title" json:"specificationTitle,omitempty"` - SpecVersion string `mapstructure:"Specification-Version" json:"specificationVersion,omitempty"` - SpecVendor string `mapstructure:"Specification-Vendor" json:"specificationVendor,omitempty"` - ImplTitle string `mapstructure:"Implementation-Title" json:"implementationTitle,omitempty"` - ImplVersion string `mapstructure:"Implementation-Version" json:"implementationVersion,omitempty"` - ImplVendor string `mapstructure:"Implementation-Vendor" json:"implementationVendor,omitempty"` - Extra map[string]string `mapstructure:",remain" json:"extraFields,omitempty"` - Sections []map[string]string `json:"sections,omitempty"` + Main map[string]string `json:"main,omitempty"` + Sections map[string]map[string]string `json:"sections,omitempty"` } func (m JavaMetadata) PackageURL() string { From 03dbfb8dfb420bcaa31d9a29b7198f630570dabc Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 30 Oct 2020 08:12:25 -0400 Subject: [PATCH 6/9] improve java name and version extraction as well as parent pkg pairing Signed-off-by: Alex Goodman --- syft/cataloger/java/archive_filename.go | 23 ++++--- syft/cataloger/java/archive_filename_test.go | 69 +++++++++++++++++++- syft/cataloger/java/archive_parser.go | 25 +++++-- syft/cataloger/java/java_manifest.go | 8 +-- syft/cataloger/java/java_manifest_test.go | 2 +- syft/pkg/java_metadata.go | 4 +- 6 files changed, 107 insertions(+), 24 deletions(-) diff --git a/syft/cataloger/java/archive_filename.go b/syft/cataloger/java/archive_filename.go index 0f50d99c5..85c4621cd 100644 --- a/syft/cataloger/java/archive_filename.go +++ b/syft/cataloger/java/archive_filename.go @@ -10,11 +10,20 @@ import ( "github.com/anchore/syft/syft/pkg" ) +// match on versions and anything after the version. This is used to isolate the name from the version. +// match examples: +// wagon-webdav-1.0.2-rc1-hudson.jar ---> -1.0.2-rc1-hudson.jar +// windows-remote-command-1.0.jar ---> -1.0.jar +// wstx-asl-1-2.jar ---> -1-2.jar +// guava-rc0.jar ---> -rc0.jar +var versionAreaPattern = regexp.MustCompile(`-(?P(\d+\.)?(\d+\.)?(r?c?\d+)(-[a-zA-Z0-9\-.]+)*)(?P.*)$`) + +// match on explicit versions. This is used for extracting version information. // match examples: // pkg-extra-field-4.3.2-rc1 --> match(name=pkg-extra-field version=4.3.2-rc1) // pkg-extra-field-4.3-rc1 --> match(name=pkg-extra-field version=4.3-rc1) // pkg-extra-field-4.3 --> match(name=pkg-extra-field version=4.3) -var versionPattern = regexp.MustCompile(`(?P.+)-(?P(\d+\.)?(\d+\.)?(\*|\d+)(-[a-zA-Z0-9\-\.]+)*)`) +var versionPattern = regexp.MustCompile(`-(?P(\d+\.)?(\d+\.)?(r?c?\d+)(-[a-zA-Z0-9\-.]+)*)`) type archiveFilename struct { raw string @@ -70,14 +79,8 @@ func (a archiveFilename) version() string { } func (a archiveFilename) name() string { - for _, fieldSet := range a.fields { - if name, ok := fieldSet["name"]; ok { - // return the first name - return name - } - } - - // derive the name from the archive name (no path or extension) + // derive the name from the archive name (no path or extension) and remove any versions found basename := filepath.Base(a.raw) - return strings.TrimSuffix(basename, filepath.Ext(basename)) + cleaned := strings.TrimSuffix(basename, filepath.Ext(basename)) + return versionAreaPattern.ReplaceAllString(cleaned, "") } diff --git a/syft/cataloger/java/archive_filename_test.go b/syft/cataloger/java/archive_filename_test.go index db79e61d9..553ce9c41 100644 --- a/syft/cataloger/java/archive_filename_test.go +++ b/syft/cataloger/java/archive_filename_test.go @@ -1,9 +1,10 @@ package java import ( + "testing" + "github.com/anchore/syft/syft/pkg" "github.com/sergi/go-diff/diffmatchpatch" - "testing" ) func TestExtractInfoFromJavaArchiveFilename(t *testing.T) { @@ -56,12 +57,78 @@ func TestExtractInfoFromJavaArchiveFilename(t *testing.T) { name: "pkg-extra-field-maven", ty: pkg.JenkinsPluginPkg, }, + { + filename: "/some/path-with-version-5.4.3/wagon-webdav-1.0.2-beta-2.2.3a-hudson.jar", + version: "1.0.2-beta-2.2.3a-hudson", + extension: "jar", + name: "wagon-webdav", + ty: pkg.JavaPkg, + }, + { + filename: "/some/path-with-version-5.4.3/wagon-webdav-1.0.2-beta-2.2.3-hudson.jar", + version: "1.0.2-beta-2.2.3-hudson", + extension: "jar", + name: "wagon-webdav", + ty: pkg.JavaPkg, + }, + { + filename: "/some/path-with-version-5.4.3/windows-remote-command-1.0.jar", + version: "1.0", + extension: "jar", + name: "windows-remote-command", + ty: pkg.JavaPkg, + }, + { + filename: "/some/path-with-version-5.4.3/wagon-http-lightweight-1.0.5-beta-2.jar", + version: "1.0.5-beta-2", + extension: "jar", + name: "wagon-http-lightweight", + ty: pkg.JavaPkg, + }, + { + filename: "/hudson.war:WEB-INF/lib/commons-jelly-1.1-hudson-20100305.jar", + version: "1.1-hudson-20100305", + extension: "jar", + name: "commons-jelly", + ty: pkg.JavaPkg, + }, + { + filename: "/hudson.war:WEB-INF/lib/jtidy-4aug2000r7-dev-hudson-1.jar", + // I don't see how we can reliably account for this case + //version: "4aug2000r7-dev-hudson-1", + version: "", + extension: "jar", + name: "jtidy", + ty: pkg.JavaPkg, + }, + { + filename: "/hudson.war:WEB-INF/lib/trilead-ssh2-build212-hudson-5.jar", + // I don't see how we can reliably account for this case + //version: "build212-hudson-5", + version: "5", + extension: "jar", + // name: "trilead-ssh2", + name: "trilead-ssh2-build212-hudson", + ty: pkg.JavaPkg, + }, + { + filename: "/hudson.war:WEB-INF/lib/guava-r06.jar", + version: "r06", + extension: "jar", + name: "guava", + ty: pkg.JavaPkg, + }, } for _, test := range tests { t.Run(test.filename, func(t *testing.T) { obj := newJavaArchiveFilename(test.filename) + ty := obj.pkgType() + if ty != test.ty { + t.Errorf("mismatched type: %+v != %v", ty, test.ty) + } + version := obj.version() if version != test.version { dmp := diffmatchpatch.New() diff --git a/syft/cataloger/java/archive_parser.go b/syft/cataloger/java/archive_parser.go index fa9e619cd..72f0ff79b 100644 --- a/syft/cataloger/java/archive_parser.go +++ b/syft/cataloger/java/archive_parser.go @@ -66,13 +66,17 @@ func newJavaArchiveParser(virtualPath string, reader io.Reader, detectNested boo return nil, cleanupFn, fmt.Errorf("unable to read files from java archive: %w", err) } + // fetch the last element of the virtual path + virtualElements := strings.Split(virtualPath, ":") + currentFilepath := virtualElements[len(virtualElements)-1] + return &archiveParser{ discoveredPkgs: internal.NewStringSet(), fileManifest: fileManifest, virtualPath: virtualPath, archivePath: archivePath, contentPath: contentPath, - fileInfo: newJavaArchiveFilename(virtualPath), + fileInfo: newJavaArchiveFilename(currentFilepath), detectNested: detectNested, }, cleanupFn, nil } @@ -182,6 +186,7 @@ func (j *archiveParser) discoverPkgsFromPomProperties(parentPkg *pkg.Package) ([ if !strings.HasPrefix(propsObj.ArtifactID, parentPkg.Name) { vPathSuffix += ":" + propsObj.ArtifactID } + virtualPath := j.virtualPath + vPathSuffix // discovered props = new package p := pkg.Package{ @@ -191,7 +196,7 @@ func (j *archiveParser) discoverPkgsFromPomProperties(parentPkg *pkg.Package) ([ Type: pkg.JavaPkg, MetadataType: pkg.JavaMetadataType, Metadata: pkg.JavaMetadata{ - VirtualPath: j.virtualPath + vPathSuffix, + VirtualPath: virtualPath, PomProperties: propsObj, Parent: parentPkg, }, @@ -199,16 +204,24 @@ func (j *archiveParser) discoverPkgsFromPomProperties(parentPkg *pkg.Package) ([ pkgKey := uniquePkgKey(&p) - if !j.discoveredPkgs.Contains(pkgKey) { - // only keep packages we haven't seen yet - pkgs = append(pkgs, p) - } else if pkgKey == parentKey { + if pkgKey == parentKey || parentPkg.Metadata.(pkg.JavaMetadata).VirtualPath == virtualPath || len(contents) == 1 { // we've run across more information about our parent package, add this info to the parent package metadata + // the pom properties is typically a better source of information for name and version than the manifest + if p.Name != parentPkg.Name { + parentPkg.Name = p.Name + } + if p.Version != parentPkg.Version { + parentPkg.Version = p.Version + } + parentMetadata, ok := parentPkg.Metadata.(pkg.JavaMetadata) if ok { parentMetadata.PomProperties = propsObj parentPkg.Metadata = parentMetadata } + } else if !j.discoveredPkgs.Contains(pkgKey) { + // only keep packages we haven't seen yet (and are not related to the parent package) + pkgs = append(pkgs, p) } } } diff --git a/syft/cataloger/java/java_manifest.go b/syft/cataloger/java/java_manifest.go index c4ca715e2..2eaf8aac2 100644 --- a/syft/cataloger/java/java_manifest.go +++ b/syft/cataloger/java/java_manifest.go @@ -70,7 +70,7 @@ func parseJavaManifest(path string, reader io.Reader) (*pkg.JavaManifest, error) if len(sections) > 0 { manifest.Main = sections[0] if len(sections) > 1 { - manifest.Sections = make(map[string]map[string]string) + manifest.NamedSections = make(map[string]map[string]string) for i, s := range sections[1:] { name, ok := s["Name"] if !ok { @@ -82,7 +82,7 @@ func parseJavaManifest(path string, reader io.Reader) (*pkg.JavaManifest, error) } else { delete(s, "Name") } - manifest.Sections[name] = s + manifest.NamedSections[name] = s } } } @@ -117,10 +117,10 @@ func selectName(manifest *pkg.JavaManifest, filenameObj archiveFilename) string func selectVersion(manifest *pkg.JavaManifest, filenameObj archiveFilename) string { var version string switch { - case manifest.Main["Implementation-Version"] != "": - version = manifest.Main["Implementation-Version"] case filenameObj.version() != "": version = filenameObj.version() + case manifest.Main["Implementation-Version"] != "": + version = manifest.Main["Implementation-Version"] case manifest.Main["Specification-Version"] != "": version = manifest.Main["Specification-Version"] case manifest.Main["Plugin-Version"] != "": diff --git a/syft/cataloger/java/java_manifest_test.go b/syft/cataloger/java/java_manifest_test.go index fd6d096fb..727346dc6 100644 --- a/syft/cataloger/java/java_manifest_test.go +++ b/syft/cataloger/java/java_manifest_test.go @@ -45,7 +45,7 @@ func TestParseJavaManifest(t *testing.T) { "Archiver-Version": "Plexus Archiver", "Created-By": "Apache Maven 3.6.3", }, - Sections: map[string]map[string]string{ + NamedSections: map[string]map[string]string{ "thing-1": { "Built-By": "?", }, diff --git a/syft/pkg/java_metadata.go b/syft/pkg/java_metadata.go index a4a9c7578..d4852eb55 100644 --- a/syft/pkg/java_metadata.go +++ b/syft/pkg/java_metadata.go @@ -22,8 +22,8 @@ type PomProperties struct { // JavaManifest represents the fields of interest extracted from a Java archive's META-INF/MANIFEST.MF file. type JavaManifest struct { - Main map[string]string `json:"main,omitempty"` - Sections map[string]map[string]string `json:"sections,omitempty"` + Main map[string]string `json:"main,omitempty"` + NamedSections map[string]map[string]string `json:"namedSections,omitempty"` } func (m JavaMetadata) PackageURL() string { From 2532928afaa4edb403d1e70741c07cf3e6a3fa67 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 30 Oct 2020 10:06:47 -0400 Subject: [PATCH 7/9] bump java compare testing thresholds Signed-off-by: Alex Goodman --- test/inline-compare/compare.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/inline-compare/compare.py b/test/inline-compare/compare.py index 819f80756..052b3f3b5 100755 --- a/test/inline-compare/compare.py +++ b/test/inline-compare/compare.py @@ -15,7 +15,7 @@ INDENT = " " PACKAGE_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THRESHOLD, **{}) METADATA_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THRESHOLD, **{ # syft is better at detecting package versions in specific cases, leading to a drop in matching metadata - "anchore/test_images:java": 0.52, + "anchore/test_images:java": 0.61, "jenkins/jenkins:2.249.2-lts-jdk11": 0.82, }) @@ -28,7 +28,7 @@ METADATA_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THR PACKAGE_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{}) METADATA_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{ # syft is better at detecting package versions in specific cases, leading to a drop in matching metadata - "anchore/test_images:java": 0.54, + "anchore/test_images:java": 0.65, "jenkins/jenkins:2.249.2-lts-jdk11": 0.84, }) From 232cd130355077cd322aa369b304098fe35da932 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 30 Oct 2020 10:37:34 -0400 Subject: [PATCH 8/9] update tests for enhanced java pkg pairings Signed-off-by: Alex Goodman --- syft/cataloger/java/archive_parser.go | 14 ++- syft/cataloger/java/archive_parser_test.go | 110 +++++++-------------- syft/cataloger/java/java_manifest_test.go | 44 ++++++++- 3 files changed, 94 insertions(+), 74 deletions(-) diff --git a/syft/cataloger/java/archive_parser.go b/syft/cataloger/java/archive_parser.go index 72f0ff79b..e8de3414d 100644 --- a/syft/cataloger/java/archive_parser.go +++ b/syft/cataloger/java/archive_parser.go @@ -160,6 +160,7 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) { // discoverPkgsFromPomProperties parses Maven POM properties for a given parent package, returning all listed Java packages found and // associating each discovered package to the given parent package. +// nolint:funlen,gocognit func (j *archiveParser) discoverPkgsFromPomProperties(parentPkg *pkg.Package) ([]pkg.Package, error) { var pkgs = make([]pkg.Package, 0) parentKey := uniquePkgKey(parentPkg) @@ -204,7 +205,18 @@ func (j *archiveParser) discoverPkgsFromPomProperties(parentPkg *pkg.Package) ([ pkgKey := uniquePkgKey(&p) - if pkgKey == parentKey || parentPkg.Metadata.(pkg.JavaMetadata).VirtualPath == virtualPath || len(contents) == 1 { + // the name/version pair matches... + matchesParentPkg := pkgKey == parentKey + + // the virtual path matches... + matchesParentPkg = matchesParentPkg || parentPkg.Metadata.(pkg.JavaMetadata).VirtualPath == virtualPath + + // the pom artifactId has the parent name or vice versa + if propsObj.ArtifactID != "" { + matchesParentPkg = matchesParentPkg || strings.Contains(parentPkg.Name, propsObj.ArtifactID) || strings.Contains(propsObj.ArtifactID, parentPkg.Name) + } + + if matchesParentPkg { // we've run across more information about our parent package, add this info to the parent package metadata // the pom properties is typically a better source of information for name and version than the manifest if p.Name != parentPkg.Name { diff --git a/syft/cataloger/java/archive_parser_test.go b/syft/cataloger/java/archive_parser_test.go index 9da709902..0586ee5fe 100644 --- a/syft/cataloger/java/archive_parser_test.go +++ b/syft/cataloger/java/archive_parser_test.go @@ -78,52 +78,6 @@ func generateJavaBuildFixture(t *testing.T, fixturePath string) { } } -func TestSelectName(t *testing.T) { - tests := []struct { - desc string - manifest pkg.JavaManifest - archive archiveFilename - expected string - }{ - { - desc: "name from Implementation-Title", - archive: archiveFilename{}, - manifest: pkg.JavaManifest{ - Main: map[string]string{ - "Implementation-Title": "maven-wrapper", - }, - }, - expected: "maven-wrapper", - }, - { - desc: "Implementation-Title does not override", - manifest: pkg.JavaManifest{ - Main: map[string]string{ - "Name": "foo", - "Implementation-Title": "maven-wrapper", - }, - }, - archive: archiveFilename{ - fields: []map[string]string{ - {"name": "omg"}, - }, - }, - expected: "omg", - }, - } - - for _, test := range tests { - t.Run(test.desc, func(t *testing.T) { - result := selectName(&test.manifest, test.archive) - - if result != test.expected { - t.Errorf("mismatch in names: '%s' != '%s'", result, test.expected) - } - }) - } - -} - func TestParseJar(t *testing.T) { tests := []struct { fixture string @@ -349,7 +303,7 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-boot-starter", - Version: "2.2.2.RELEASE", + Version: "2.2.2", }, { Name: "jul-to-slf4j", @@ -361,7 +315,7 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-boot-starter-validation", - Version: "2.2.2.RELEASE", + Version: "2.2.2", }, { Name: "hibernate-validator", @@ -373,7 +327,7 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-expression", - Version: "5.2.2.RELEASE", + Version: "5.2.2", }, { Name: "jakarta.validation-api", @@ -381,11 +335,11 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-web", - Version: "5.2.2.RELEASE", + Version: "5.2.2", }, { Name: "spring-boot-starter-actuator", - Version: "2.2.2.RELEASE", + Version: "2.2.2", }, { Name: "log4j-api", @@ -405,23 +359,23 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-aop", - Version: "5.2.2.RELEASE", + Version: "5.2.2", }, { Name: "spring-boot-actuator-autoconfigure", - Version: "2.2.2.RELEASE", + Version: "2.2.2", }, { Name: "spring-jcl", - Version: "5.2.2.RELEASE", + Version: "5.2.2", }, { Name: "spring-boot", - Version: "2.2.2.RELEASE", + Version: "2.2.2", }, { Name: "spring-boot-starter-logging", - Version: "2.2.2.RELEASE", + Version: "2.2.2", }, { Name: "jakarta.annotation-api", @@ -429,7 +383,7 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-webmvc", - Version: "5.2.2.RELEASE", + Version: "5.2.2", }, { Name: "HdrHistogram", @@ -437,7 +391,7 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-boot-starter-web", - Version: "2.2.2.RELEASE", + Version: "2.2.2", }, { Name: "logback-classic", @@ -449,7 +403,7 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-boot-starter-json", - Version: "2.2.2.RELEASE", + Version: "2.2.2", }, { Name: "jackson-databind", @@ -465,7 +419,7 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-boot-autoconfigure", - Version: "2.2.2.RELEASE", + Version: "2.2.2", }, { Name: "jackson-datatype-jdk8", @@ -481,11 +435,11 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-beans", - Version: "5.2.2.RELEASE", + Version: "5.2.2", }, { Name: "spring-boot-actuator", - Version: "2.2.2.RELEASE", + Version: "2.2.2", }, { Name: "slf4j-api", @@ -493,7 +447,7 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-core", - Version: "5.2.2.RELEASE", + Version: "5.2.2", }, { Name: "logback-core", @@ -513,7 +467,7 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-boot-starter-tomcat", - Version: "2.2.2.RELEASE", + Version: "2.2.2", }, { Name: "classmate", @@ -521,7 +475,7 @@ func TestParseNestedJar(t *testing.T) { }, { Name: "spring-context", - Version: "5.2.2.RELEASE", + Version: "5.2.2", }, }, }, @@ -542,7 +496,7 @@ func TestParseNestedJar(t *testing.T) { t.Fatalf("failed to parse java archive: %+v", err) } - nameVersionPairSet := internal.NewStringSet() + expectedNameVersionPairSet := internal.NewStringSet() makeKey := func(p *pkg.Package) string { if p == nil { @@ -552,20 +506,32 @@ func TestParseNestedJar(t *testing.T) { } for _, e := range test.expected { - nameVersionPairSet.Add(makeKey(&e)) + expectedNameVersionPairSet.Add(makeKey(&e)) } - if len(actual) != len(nameVersionPairSet) { + if len(actual) != len(expectedNameVersionPairSet) { + actualNameVersionPairSet := internal.NewStringSet() for _, a := range actual { - t.Log(" ", a) + key := makeKey(&a) + actualNameVersionPairSet.Add(key) + if !expectedNameVersionPairSet.Contains(key) { + t.Logf("extra package: %s", a) + } } - t.Fatalf("unexpected package count: %d!=%d", len(actual), len(nameVersionPairSet)) + + for _, key := range expectedNameVersionPairSet.ToSlice() { + if !actualNameVersionPairSet.Contains(key) { + t.Logf("missing package: %s", key) + } + } + + t.Fatalf("unexpected package count: %d!=%d", len(actual), len(expectedNameVersionPairSet)) } for _, a := range actual { actualKey := makeKey(&a) - if !nameVersionPairSet.Contains(actualKey) { + if !expectedNameVersionPairSet.Contains(actualKey) { t.Errorf("unexpected pkg: %q", actualKey) } @@ -578,7 +544,7 @@ func TestParseNestedJar(t *testing.T) { if metadata.Parent == nil { t.Errorf("unassigned error for pkg=%q", actualKey) } else if makeKey(metadata.Parent) != "spring-boot|0.0.1-SNAPSHOT" { - // NB: this is a hard-coded condition to simplify the test harness + // NB: this is a hard-coded condition to simplify the test harness to account for https://github.com/micrometer-metrics/micrometer/issues/1785 if a.Name == "pcollections" { if metadata.Parent.Name != "micrometer-core" { t.Errorf("nested 'pcollections' pkg has wrong parent: %q", metadata.Parent.Name) diff --git a/syft/cataloger/java/java_manifest_test.go b/syft/cataloger/java/java_manifest_test.go index 727346dc6..d80a864fa 100644 --- a/syft/cataloger/java/java_manifest_test.go +++ b/syft/cataloger/java/java_manifest_test.go @@ -49,7 +49,7 @@ func TestParseJavaManifest(t *testing.T) { "thing-1": { "Built-By": "?", }, - "2": { + "1": { "Build-Jdk": "14.0.1", "Main-Class": "hello.HelloWorld", }, @@ -105,3 +105,45 @@ func TestParseJavaManifest(t *testing.T) { }) } } + +func TestSelectName(t *testing.T) { + tests := []struct { + desc string + manifest pkg.JavaManifest + archive archiveFilename + expected string + }{ + { + desc: "Get name from Implementation-Title", + archive: archiveFilename{}, + manifest: pkg.JavaManifest{ + Main: map[string]string{ + "Implementation-Title": "maven-wrapper", + }, + }, + expected: "maven-wrapper", + }, + { + desc: "Implementation-Title does not override name from filename", + manifest: pkg.JavaManifest{ + Main: map[string]string{ + "Name": "foo", + "Implementation-Title": "maven-wrapper", + }, + }, + archive: newJavaArchiveFilename("/something/omg.jar"), + expected: "omg", + }, + } + + for _, test := range tests { + t.Run(test.desc, func(t *testing.T) { + result := selectName(&test.manifest, test.archive) + + if result != test.expected { + t.Errorf("mismatch in names: '%s' != '%s'", result, test.expected) + } + }) + } + +} From e2593cd6b72a4560aee8154f47b446fda6f9e4d3 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 30 Oct 2020 10:58:11 -0400 Subject: [PATCH 9/9] remove extra fields from the compare script metadata namedtuple Signed-off-by: Alex Goodman --- test/inline-compare/utils/inline.py | 9 ++++----- test/inline-compare/utils/package.py | 2 +- test/inline-compare/utils/syft.py | 7 ++----- test/inline-compare/utils/traverse.py | 21 --------------------- 4 files changed, 7 insertions(+), 32 deletions(-) delete mode 100644 test/inline-compare/utils/traverse.py diff --git a/test/inline-compare/utils/inline.py b/test/inline-compare/utils/inline.py index 11637cf9a..781d6a72a 100644 --- a/test/inline-compare/utils/inline.py +++ b/test/inline-compare/utils/inline.py @@ -84,7 +84,6 @@ class InlineScan: metadata[pkg.type][pkg] = utils.package.Metadata( version=entry["maven-version"], - extra=tuple(), ) return packages, metadata @@ -98,7 +97,7 @@ class InlineScan: type=entry["type"].lower(), ) packages.add(pkg) - metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"], extra=tuple()) + metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"]) return packages, metadata @@ -113,7 +112,7 @@ class InlineScan: type=entry["type"].lower(), ) packages.add(pkg) - metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"], extra=tuple()) + metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"]) return packages, metadata @@ -126,7 +125,7 @@ class InlineScan: type=entry["type"].lower(), ) packages.add(pkg) - metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"], extra=tuple()) + metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"]) return packages, metadata @@ -138,6 +137,6 @@ class InlineScan: name=entry["package"], type=entry["type"].lower() ) packages.add(pkg) - metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"], extra=tuple()) + metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"]) return packages, metadata diff --git a/test/inline-compare/utils/package.py b/test/inline-compare/utils/package.py index b4ac2c796..13118a4b5 100644 --- a/test/inline-compare/utils/package.py +++ b/test/inline-compare/utils/package.py @@ -3,7 +3,7 @@ import collections import dataclasses from typing import Set, FrozenSet, Tuple, Any, List -Metadata = collections.namedtuple("Metadata", "version extra") +Metadata = collections.namedtuple("Metadata", "version") Package = collections.namedtuple("Package", "name type") Info = collections.namedtuple("Info", "packages metadata") diff --git a/test/inline-compare/utils/syft.py b/test/inline-compare/utils/syft.py index 52b5c7899..51c7e0ba0 100644 --- a/test/inline-compare/utils/syft.py +++ b/test/inline-compare/utils/syft.py @@ -4,7 +4,6 @@ import collections import utils.package import utils.image -from utils.traverse import dig class Syft: @@ -29,8 +28,6 @@ class Syft: metadata = collections.defaultdict(dict) for entry in self._enumerate_section(section="artifacts"): - extra = {} - # normalize to inline pkg_type = entry["type"].lower() if pkg_type in ("wheel", "egg", "python"): @@ -52,7 +49,7 @@ class Syft: if "java" in pkg_type: # we need to use the virtual path instead of the name to account for nested dependencies with the same # package name (but potentially different metadata) - name = dig(entry, "metadata", "virtualPath") + name = entry.get("metadata", {}).get("virtualPath") elif pkg_type == "apkg": # inline scan strips off the release from the version, which should be normalized here @@ -66,6 +63,6 @@ class Syft: packages.add(pkg) - metadata[pkg.type][pkg] = utils.package.Metadata(version=version, extra=tuple()) + metadata[pkg.type][pkg] = utils.package.Metadata(version=version) return utils.package.Info(packages=frozenset(packages), metadata=metadata) diff --git a/test/inline-compare/utils/traverse.py b/test/inline-compare/utils/traverse.py deleted file mode 100644 index d98cf4ee7..000000000 --- a/test/inline-compare/utils/traverse.py +++ /dev/null @@ -1,21 +0,0 @@ - -def dig(target, *keys, **kwargs): - """ - Traverse a nested set of dictionaries, tuples, or lists similar to ruby's dig function. - """ - end_of_chain = target - for key in keys: - if isinstance(end_of_chain, dict) and key in end_of_chain: - end_of_chain = end_of_chain[key] - elif isinstance(end_of_chain, (list, tuple)) and isinstance(key, int): - end_of_chain = end_of_chain[key] - else: - if 'fail' in kwargs and kwargs['fail'] is True: - if isinstance(end_of_chain, dict): - raise KeyError - else: - raise IndexError - else: - return None - - return end_of_chain