diff --git a/test/inline-compare/compare.py b/test/inline-compare/compare.py index a2f6224a4..6d963b624 100755 --- a/test/inline-compare/compare.py +++ b/test/inline-compare/compare.py @@ -9,9 +9,13 @@ from utils.format import Colors, print_rows from utils.inline import InlineScan from utils.syft import Syft -QUALITY_GATE_THRESHOLD = 0.95 +DEFAULT_QUALITY_GATE_THRESHOLD = 0.95 INDENT = " " -IMAGE_QUALITY_GATE = collections.defaultdict(lambda: QUALITY_GATE_THRESHOLD, **{}) + +PACKAGE_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THRESHOLD, **{}) +METADATA_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THRESHOLD, **{ + "anchore/test_images:java": 0.58, +}) # We additionally fail if an image is above a particular threshold. Why? We expect the lower threshold to be 90%, # however additional functionality in grype is still being implemented, so this threshold may not be able to be met. @@ -19,10 +23,14 @@ IMAGE_QUALITY_GATE = collections.defaultdict(lambda: QUALITY_GATE_THRESHOLD, **{ # issues/enhancements are done we want to ensure that the lower threshold is bumped up to catch regression. The only way # to do this is to select an upper threshold for images with known threshold values, so we have a failure that # loudly indicates the lower threshold should be bumped. -IMAGE_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{}) +PACKAGE_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{}) +METADATA_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{ + # syft is better at detecting package versions in specific cases, leading to a drop in matching metadata + "anchore/test_images:java": 0.65, +}) -def report(analysis): +def report(image, analysis): if analysis.extra_packages: rows = [] print( @@ -48,7 +56,6 @@ def report(analysis): print() if analysis.missing_metadata: - rows = [] print( Colors.bold + "Syft mismatched metadata:", Colors.reset, @@ -62,7 +69,7 @@ def report(analysis): diffs = difflib.ndiff([repr(syft_metadata_item)], [repr(metadata)]) - print(INDENT + "for: " + repr(pkg)) + print(INDENT + "for: " + repr(pkg), "(top is syft, bottom is inline)") print(INDENT+INDENT+("\n"+INDENT+INDENT).join(list(diffs))) print() @@ -73,42 +80,42 @@ def report(analysis): ) print() - if analysis.similar_missing_packages: - rows = [] - print( - Colors.bold + "Probably pairings of missing/extra packages:", - Colors.reset, - "to aid in troubleshooting missed/extra packages", - ) - for similar_packages in analysis.similar_missing_packages: - rows.append( - [ - INDENT, - repr(similar_packages.pkg), - "--->", - repr(similar_packages.missed), - ] - ) - print_rows(rows) - print() + # if analysis.similar_missing_packages: + # rows = [] + # print( + # Colors.bold + "Probably pairings of missing/extra packages:", + # Colors.reset, + # "to aid in troubleshooting missed/extra packages", + # ) + # for similar_packages in analysis.similar_missing_packages: + # rows.append( + # [ + # INDENT, + # repr(similar_packages.pkg), + # "--->", + # repr(similar_packages.missed), + # ] + # ) + # print_rows(rows) + # print() - if analysis.unmatched_missing_packages and analysis.extra_packages: - rows = [] - print( - Colors.bold + "Probably missed packages:", - Colors.reset, - "a probable pair was not found", - ) - for p in analysis.unmatched_missing_packages: - rows.append([INDENT, repr(p)]) - print_rows(rows) - print() + # if analysis.unmatched_missing_packages and analysis.extra_packages: + # rows = [] + # print( + # Colors.bold + "Probably missed packages:", + # Colors.reset, + # "a probable pair was not found", + # ) + # for p in analysis.unmatched_missing_packages: + # rows.append([INDENT, repr(p)]) + # print_rows(rows) + # print() - print(Colors.bold + "Summary:", Colors.reset) + print(Colors.bold + "Summary:", Colors.reset, image) print(" Inline Packages : %d" % len(analysis.inline_data.packages)) print(" Syft Packages : %d" % len(analysis.syft_data.packages)) print( - " (extra) : %d (note: this is ignored in the analysis!)" + " (extra) : %d (note: this is ignored by the quality gate!)" % len(analysis.extra_packages) ) print(" (missing) : %d" % len(analysis.missing_packages)) @@ -150,12 +157,37 @@ def report(analysis): ) ) - overall_score = ( - analysis.percent_overlapping_packages + analysis.percent_overlapping_metadata - ) / 2.0 - print(Colors.bold + " Overall Score: %2.1f %%" % overall_score, Colors.reset) +def enforce_quality_gate(title, actual_value, lower_gate_value, upper_gate_value): + if actual_value < lower_gate_value: + print( + Colors.bold + + " %s Quality Gate:\t" % title + + Colors.FG.red + + "FAIL (is not >= %d %%)" % lower_gate_value, + Colors.reset, + ) + return False + elif actual_value > upper_gate_value: + print( + Colors.bold + + " %s Quality Gate:\t" % title + + Colors.FG.orange + + "FAIL (lower threshold is artificially low and should be updated)", + Colors.reset, + ) + return False + + print( + Colors.bold + + " %s Quality Gate:\t" % title + + Colors.FG.green + + "Pass (>= %d %%)" % lower_gate_value, + Colors.reset, + ) + + return True def main(image): cwd = os.path.dirname(os.path.abspath(__file__)) @@ -170,41 +202,27 @@ def main(image): ) # show some useful report data for debugging / warm fuzzies - report(analysis) + report(image, analysis) # enforce a quality gate based on the comparison of package values and metadata values - upper_gate_value = IMAGE_UPPER_THRESHOLD[image] * 100 - lower_gate_value = IMAGE_QUALITY_GATE[image] * 100 - if analysis.quality_gate_score < lower_gate_value: - print( - Colors.bold - + " Quality Gate: " - + Colors.FG.red - + "FAILED (is not >= %d %%)\n" % lower_gate_value, - Colors.reset, - ) - return 1 - elif analysis.quality_gate_score > upper_gate_value: - print( - Colors.bold - + " Quality Gate: " - + Colors.FG.orange - + "FAILED (lower threshold is artificially low and should be updated)\n", - Colors.reset, - ) - return 1 - else: - print( - Colors.bold - + " Quality Gate: " - + Colors.FG.green - + "pass (>= %d %%)\n" % lower_gate_value, - Colors.reset, - ) + success = True + success &= enforce_quality_gate( + title="Package", + actual_value=analysis.percent_overlapping_packages, + lower_gate_value=PACKAGE_QUALITY_GATE[image] * 100, + upper_gate_value=PACKAGE_UPPER_THRESHOLD[image] * 100 + ) + success &= enforce_quality_gate( + title="Metadata", + actual_value=analysis.percent_overlapping_metadata, + lower_gate_value=METADATA_QUALITY_GATE[image] * 100, + upper_gate_value=METADATA_UPPER_THRESHOLD[image] * 100 + ) + if not success: + return 1 return 0 - if __name__ == "__main__": if len(sys.argv) != 2: sys.exit("provide an image") diff --git a/test/inline-compare/utils/inline.py b/test/inline-compare/utils/inline.py index 1e9d77276..9bc118500 100644 --- a/test/inline-compare/utils/inline.py +++ b/test/inline-compare/utils/inline.py @@ -1,4 +1,5 @@ import os +import re import json import collections @@ -66,22 +67,27 @@ class InlineScan: elif pkg_type in ("java-jpi", "java-hpi"): pkg_type = "java-?pi" + # this would usually be "package" but this would not be able to account for duplicate dependencies in + # nested jars of the same name. Fallback to the package name if there is no given location + name = entry["location"] + + # replace fields with "N/A" with None + for k, v in dict(entry).items(): + if v in ("", "N/A"): + entry[k] = None + + extra = {} + # extra = dict(entry) + # extra.pop('type') + # extra.pop('maven-version') + # extra.pop("origin") # don't include origin as syft does not compact several fields into 1 + pkg = utils.package.Package( - name=entry["package"], + name=name, type=pkg_type, ) packages.add(pkg) - extra = dict(entry) - extra.pop('type') - extra.pop('maven-version') - for k, v in dict(extra).items(): - if v in ("", "N/A"): - extra[k] = None - - # temp temp temp - extra.pop("location") - metadata[pkg.type][pkg] = utils.package.Metadata( version=entry["maven-version"], extra=tuple(sorted(extra.items())), diff --git a/test/inline-compare/utils/package.py b/test/inline-compare/utils/package.py index 06d36b800..b4ac2c796 100644 --- a/test/inline-compare/utils/package.py +++ b/test/inline-compare/utils/package.py @@ -144,13 +144,3 @@ class Analysis: float(len(self.overlapping_packages) + len(self.similar_missing_packages)) / float(len(self.inline_data.packages)) ) * 100.0 - - @property - def quality_gate_score(self): - """ - The result of the analysis in the form of an aggregated percentage; it is up to the caller to use this value - and enforce a quality gate. - """ - return ( - self.percent_overlapping_packages + self.percent_overlapping_metadata - ) / 2.0 diff --git a/test/inline-compare/utils/syft.py b/test/inline-compare/utils/syft.py index 3bd1ac59c..a9b95c825 100644 --- a/test/inline-compare/utils/syft.py +++ b/test/inline-compare/utils/syft.py @@ -46,50 +46,41 @@ class Syft: elif pkg_type in ("apk",): pkg_type = "apkg" + name = entry["name"] + version = entry["version"] + + if "java" in pkg_type: + # lets match what inline scan expects to output + + # specVersion = dig(entry, "metadata", "manifest", "specificationVersion") or None + # implVersion = dig(entry, "metadata", "manifest", "implementationVersion") or None + virtualPath = dig(entry, "metadata", "virtualPath") + + # package = dig(entry, "metadata", "pomProperties", "artifactId") + # if not package: + # package = "%s-%s" % (dig(entry, "name"), dig(entry, "version")) + # + # extra = { + # "implementation-version": implVersion, + # "specification-version": specVersion, + # "location": virtualPath, + # "package": package, + # } + + # we need to use the virtual path instead of the name to account for nested dependencies with the same + # package name (but potentially different metadata) + name = virtualPath + + elif pkg_type == "apkg": + version = entry["version"].split("-")[0] + pkg = utils.package.Package( - name=entry["name"], + name=name, type=pkg_type, ) packages.add(pkg) - if "java" in pkg_type: - # lets match what inline scan expects to output - - path = dig(entry, "locations", 0, "path") - specVendor = dig(entry, "metadata", "manifest", "specificationVendor") - implVendor = dig(entry, "metadata", "manifest", "implementationVendor") - - specVersion = dig(entry, "metadata", "manifest", "specificationVersion") or None - implVersion = dig(entry, "metadata", "manifest", "implementationVersion") or None - - extra = { - "implementation-version": implVersion, - "specification-version": specVersion, - "origin": dig(entry, "metadata", "pomProperties", "groupId"), - "location": path, - "package": dig(entry, "metadata", "pomProperties", "artifactId"), - } - - if dig(entry, "metadata", "parentPackage"): - extra['origin'] = dig(entry, "metadata", "pomProperties", "groupId") - else: - # this is a nested package... - if specVendor: - extra['origin'] = specVendor - elif implVendor: - extra['origin'] = implVendor - - pomPath = dig(entry, "metadata", "pomProperties", "Path") - if path and pomPath: - extra["location"] = "%s:%s" % (path, pomPath), - - # temp temp temp - extra.pop("location") - - elif pkg_type == "apkg": - entry["version"] = entry["version"].split("-")[0] - - metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"], extra=tuple(sorted(extra.items()))) + metadata[pkg.type][pkg] = utils.package.Metadata(version=version, extra=tuple(sorted(extra.items()))) return utils.package.Info(packages=frozenset(packages), metadata=metadata)