From a4f22e65fc670f3d921c8a8c037d6334cc0f6f95 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Thu, 29 Oct 2020 13:52:51 -0400 Subject: [PATCH] expand compare testing images Signed-off-by: Alex Goodman --- test/inline-compare/compare-all.sh | 3 +- test/inline-compare/compare.py | 73 +++++++++++++++-------------- test/inline-compare/utils/inline.py | 8 +--- test/inline-compare/utils/syft.py | 25 ++-------- 4 files changed, 45 insertions(+), 64 deletions(-) diff --git a/test/inline-compare/compare-all.sh b/test/inline-compare/compare-all.sh index 85a8e9e20..146afe9b2 100755 --- a/test/inline-compare/compare-all.sh +++ b/test/inline-compare/compare-all.sh @@ -1,8 +1,7 @@ #!/usr/bin/env bash set -eu -# TODO: Add "alpine:3.12.0" back in when we've figured out how to handle the apk version field w/ and w/o release information (see issue: https://github.com/anchore/syft/pull/195) -images=("debian:10.5" "centos:8.2.2004" "rails:5.0.1") +images=("debian:10.5" "centos:8.2.2004" "rails:5.0.1" "alpine:3.12.0" "anchore/test_images:java" "anchore/test_images:py38" "anchore/anchore-engine:v0.8.2" "jenkins/jenkins:2.249.2-lts-jdk11" ) # gather all image analyses for img in "${images[@]}"; do diff --git a/test/inline-compare/compare.py b/test/inline-compare/compare.py index 6d963b624..819f80756 100755 --- a/test/inline-compare/compare.py +++ b/test/inline-compare/compare.py @@ -14,7 +14,9 @@ INDENT = " " PACKAGE_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THRESHOLD, **{}) METADATA_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THRESHOLD, **{ - "anchore/test_images:java": 0.58, + # syft is better at detecting package versions in specific cases, leading to a drop in matching metadata + "anchore/test_images:java": 0.52, + "jenkins/jenkins:2.249.2-lts-jdk11": 0.82, }) # We additionally fail if an image is above a particular threshold. Why? We expect the lower threshold to be 90%, @@ -26,7 +28,8 @@ METADATA_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THR PACKAGE_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{}) METADATA_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{ # syft is better at detecting package versions in specific cases, leading to a drop in matching metadata - "anchore/test_images:java": 0.65, + "anchore/test_images:java": 0.54, + "jenkins/jenkins:2.249.2-lts-jdk11": 0.84, }) @@ -71,45 +74,45 @@ def report(image, analysis): print(INDENT + "for: " + repr(pkg), "(top is syft, bottom is inline)") print(INDENT+INDENT+("\n"+INDENT+INDENT).join(list(diffs))) - print() - else: + if not analysis.missing_metadata: print( INDENT, - "There are mismatches, but only due to packages Syft did not find (but inline did).", + "There are mismatches, but only due to packages Syft did not find (but inline did).\n", ) + + if analysis.similar_missing_packages: + rows = [] + print( + Colors.bold + "Probably pairings of missing/extra packages:", + Colors.reset, + "to aid in troubleshooting missed/extra packages", + ) + for similar_packages in analysis.similar_missing_packages: + rows.append( + [ + INDENT, + repr(similar_packages.pkg), + "--->", + repr(similar_packages.missed), + ] + ) + print_rows(rows) print() - # if analysis.similar_missing_packages: - # rows = [] - # print( - # Colors.bold + "Probably pairings of missing/extra packages:", - # Colors.reset, - # "to aid in troubleshooting missed/extra packages", - # ) - # for similar_packages in analysis.similar_missing_packages: - # rows.append( - # [ - # INDENT, - # repr(similar_packages.pkg), - # "--->", - # repr(similar_packages.missed), - # ] - # ) - # print_rows(rows) - # print() + show_probable_mismatches = analysis.unmatched_missing_packages and analysis.extra_packages and len(analysis.unmatched_missing_packages) != len(analysis.missing_packages) - # if analysis.unmatched_missing_packages and analysis.extra_packages: - # rows = [] - # print( - # Colors.bold + "Probably missed packages:", - # Colors.reset, - # "a probable pair was not found", - # ) - # for p in analysis.unmatched_missing_packages: - # rows.append([INDENT, repr(p)]) - # print_rows(rows) - # print() + if show_probable_mismatches: + rows = [] + print( + Colors.bold + "Probably missed packages:", + Colors.reset, + "a probable pair was not found", + ) + for p in analysis.unmatched_missing_packages: + rows.append([INDENT, repr(p)]) + print_rows(rows) + print() print(Colors.bold + "Summary:", Colors.reset, image) print(" Inline Packages : %d" % len(analysis.inline_data.packages)) @@ -121,7 +124,7 @@ def report(image, analysis): print(" (missing) : %d" % len(analysis.missing_packages)) print() - if analysis.unmatched_missing_packages and analysis.extra_packages: + if show_probable_mismatches: print( " Probable Package Matches : %d (matches not made, but were probably found by both Inline and Syft)" % len(analysis.similar_missing_packages) diff --git a/test/inline-compare/utils/inline.py b/test/inline-compare/utils/inline.py index 9bc118500..11637cf9a 100644 --- a/test/inline-compare/utils/inline.py +++ b/test/inline-compare/utils/inline.py @@ -76,12 +76,6 @@ class InlineScan: if v in ("", "N/A"): entry[k] = None - extra = {} - # extra = dict(entry) - # extra.pop('type') - # extra.pop('maven-version') - # extra.pop("origin") # don't include origin as syft does not compact several fields into 1 - pkg = utils.package.Package( name=name, type=pkg_type, @@ -90,7 +84,7 @@ class InlineScan: metadata[pkg.type][pkg] = utils.package.Metadata( version=entry["maven-version"], - extra=tuple(sorted(extra.items())), + extra=tuple(), ) return packages, metadata diff --git a/test/inline-compare/utils/syft.py b/test/inline-compare/utils/syft.py index a9b95c825..52b5c7899 100644 --- a/test/inline-compare/utils/syft.py +++ b/test/inline-compare/utils/syft.py @@ -50,29 +50,14 @@ class Syft: version = entry["version"] if "java" in pkg_type: - # lets match what inline scan expects to output - - # specVersion = dig(entry, "metadata", "manifest", "specificationVersion") or None - # implVersion = dig(entry, "metadata", "manifest", "implementationVersion") or None - virtualPath = dig(entry, "metadata", "virtualPath") - - # package = dig(entry, "metadata", "pomProperties", "artifactId") - # if not package: - # package = "%s-%s" % (dig(entry, "name"), dig(entry, "version")) - # - # extra = { - # "implementation-version": implVersion, - # "specification-version": specVersion, - # "location": virtualPath, - # "package": package, - # } - # we need to use the virtual path instead of the name to account for nested dependencies with the same # package name (but potentially different metadata) - name = virtualPath + name = dig(entry, "metadata", "virtualPath") elif pkg_type == "apkg": - version = entry["version"].split("-")[0] + # inline scan strips off the release from the version, which should be normalized here + fields = entry["version"].split("-") + version = "-".join(fields[:-1]) pkg = utils.package.Package( name=name, @@ -81,6 +66,6 @@ class Syft: packages.add(pkg) - metadata[pkg.type][pkg] = utils.package.Metadata(version=version, extra=tuple(sorted(extra.items()))) + metadata[pkg.type][pkg] = utils.package.Metadata(version=version, extra=tuple()) return utils.package.Info(packages=frozenset(packages), metadata=metadata)