expand compare testing images

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2020-10-29 13:52:51 -04:00
parent ab45be98b8
commit a4f22e65fc
No known key found for this signature in database
GPG Key ID: 5CB45AE22BAB7EA7
4 changed files with 45 additions and 64 deletions

View File

@ -1,8 +1,7 @@
#!/usr/bin/env bash #!/usr/bin/env bash
set -eu set -eu
# TODO: Add "alpine:3.12.0" back in when we've figured out how to handle the apk version field w/ and w/o release information (see issue: https://github.com/anchore/syft/pull/195) images=("debian:10.5" "centos:8.2.2004" "rails:5.0.1" "alpine:3.12.0" "anchore/test_images:java" "anchore/test_images:py38" "anchore/anchore-engine:v0.8.2" "jenkins/jenkins:2.249.2-lts-jdk11" )
images=("debian:10.5" "centos:8.2.2004" "rails:5.0.1")
# gather all image analyses # gather all image analyses
for img in "${images[@]}"; do for img in "${images[@]}"; do

View File

@ -14,7 +14,9 @@ INDENT = " "
PACKAGE_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THRESHOLD, **{}) PACKAGE_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THRESHOLD, **{})
METADATA_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THRESHOLD, **{ METADATA_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THRESHOLD, **{
"anchore/test_images:java": 0.58, # syft is better at detecting package versions in specific cases, leading to a drop in matching metadata
"anchore/test_images:java": 0.52,
"jenkins/jenkins:2.249.2-lts-jdk11": 0.82,
}) })
# We additionally fail if an image is above a particular threshold. Why? We expect the lower threshold to be 90%, # We additionally fail if an image is above a particular threshold. Why? We expect the lower threshold to be 90%,
@ -26,7 +28,8 @@ METADATA_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THR
PACKAGE_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{}) PACKAGE_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{})
METADATA_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{ METADATA_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{
# syft is better at detecting package versions in specific cases, leading to a drop in matching metadata # syft is better at detecting package versions in specific cases, leading to a drop in matching metadata
"anchore/test_images:java": 0.65, "anchore/test_images:java": 0.54,
"jenkins/jenkins:2.249.2-lts-jdk11": 0.84,
}) })
@ -71,45 +74,45 @@ def report(image, analysis):
print(INDENT + "for: " + repr(pkg), "(top is syft, bottom is inline)") print(INDENT + "for: " + repr(pkg), "(top is syft, bottom is inline)")
print(INDENT+INDENT+("\n"+INDENT+INDENT).join(list(diffs))) print(INDENT+INDENT+("\n"+INDENT+INDENT).join(list(diffs)))
print()
else: if not analysis.missing_metadata:
print( print(
INDENT, INDENT,
"There are mismatches, but only due to packages Syft did not find (but inline did).", "There are mismatches, but only due to packages Syft did not find (but inline did).\n",
) )
if analysis.similar_missing_packages:
rows = []
print(
Colors.bold + "Probably pairings of missing/extra packages:",
Colors.reset,
"to aid in troubleshooting missed/extra packages",
)
for similar_packages in analysis.similar_missing_packages:
rows.append(
[
INDENT,
repr(similar_packages.pkg),
"--->",
repr(similar_packages.missed),
]
)
print_rows(rows)
print() print()
# if analysis.similar_missing_packages: show_probable_mismatches = analysis.unmatched_missing_packages and analysis.extra_packages and len(analysis.unmatched_missing_packages) != len(analysis.missing_packages)
# rows = []
# print(
# Colors.bold + "Probably pairings of missing/extra packages:",
# Colors.reset,
# "to aid in troubleshooting missed/extra packages",
# )
# for similar_packages in analysis.similar_missing_packages:
# rows.append(
# [
# INDENT,
# repr(similar_packages.pkg),
# "--->",
# repr(similar_packages.missed),
# ]
# )
# print_rows(rows)
# print()
# if analysis.unmatched_missing_packages and analysis.extra_packages: if show_probable_mismatches:
# rows = [] rows = []
# print( print(
# Colors.bold + "Probably missed packages:", Colors.bold + "Probably missed packages:",
# Colors.reset, Colors.reset,
# "a probable pair was not found", "a probable pair was not found",
# ) )
# for p in analysis.unmatched_missing_packages: for p in analysis.unmatched_missing_packages:
# rows.append([INDENT, repr(p)]) rows.append([INDENT, repr(p)])
# print_rows(rows) print_rows(rows)
# print() print()
print(Colors.bold + "Summary:", Colors.reset, image) print(Colors.bold + "Summary:", Colors.reset, image)
print(" Inline Packages : %d" % len(analysis.inline_data.packages)) print(" Inline Packages : %d" % len(analysis.inline_data.packages))
@ -121,7 +124,7 @@ def report(image, analysis):
print(" (missing) : %d" % len(analysis.missing_packages)) print(" (missing) : %d" % len(analysis.missing_packages))
print() print()
if analysis.unmatched_missing_packages and analysis.extra_packages: if show_probable_mismatches:
print( print(
" Probable Package Matches : %d (matches not made, but were probably found by both Inline and Syft)" " Probable Package Matches : %d (matches not made, but were probably found by both Inline and Syft)"
% len(analysis.similar_missing_packages) % len(analysis.similar_missing_packages)

View File

@ -76,12 +76,6 @@ class InlineScan:
if v in ("", "N/A"): if v in ("", "N/A"):
entry[k] = None entry[k] = None
extra = {}
# extra = dict(entry)
# extra.pop('type')
# extra.pop('maven-version')
# extra.pop("origin") # don't include origin as syft does not compact several fields into 1
pkg = utils.package.Package( pkg = utils.package.Package(
name=name, name=name,
type=pkg_type, type=pkg_type,
@ -90,7 +84,7 @@ class InlineScan:
metadata[pkg.type][pkg] = utils.package.Metadata( metadata[pkg.type][pkg] = utils.package.Metadata(
version=entry["maven-version"], version=entry["maven-version"],
extra=tuple(sorted(extra.items())), extra=tuple(),
) )
return packages, metadata return packages, metadata

View File

@ -50,29 +50,14 @@ class Syft:
version = entry["version"] version = entry["version"]
if "java" in pkg_type: if "java" in pkg_type:
# lets match what inline scan expects to output
# specVersion = dig(entry, "metadata", "manifest", "specificationVersion") or None
# implVersion = dig(entry, "metadata", "manifest", "implementationVersion") or None
virtualPath = dig(entry, "metadata", "virtualPath")
# package = dig(entry, "metadata", "pomProperties", "artifactId")
# if not package:
# package = "%s-%s" % (dig(entry, "name"), dig(entry, "version"))
#
# extra = {
# "implementation-version": implVersion,
# "specification-version": specVersion,
# "location": virtualPath,
# "package": package,
# }
# we need to use the virtual path instead of the name to account for nested dependencies with the same # we need to use the virtual path instead of the name to account for nested dependencies with the same
# package name (but potentially different metadata) # package name (but potentially different metadata)
name = virtualPath name = dig(entry, "metadata", "virtualPath")
elif pkg_type == "apkg": elif pkg_type == "apkg":
version = entry["version"].split("-")[0] # inline scan strips off the release from the version, which should be normalized here
fields = entry["version"].split("-")
version = "-".join(fields[:-1])
pkg = utils.package.Package( pkg = utils.package.Package(
name=name, name=name,
@ -81,6 +66,6 @@ class Syft:
packages.add(pkg) packages.add(pkg)
metadata[pkg.type][pkg] = utils.package.Metadata(version=version, extra=tuple(sorted(extra.items()))) metadata[pkg.type][pkg] = utils.package.Metadata(version=version, extra=tuple())
return utils.package.Info(packages=frozenset(packages), metadata=metadata) return utils.package.Info(packages=frozenset(packages), metadata=metadata)