partial java comparison with extra metadata

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2020-10-29 12:40:49 -04:00
parent 1230650771
commit fc991bc62e
No known key found for this signature in database
GPG Key ID: 5CB45AE22BAB7EA7
4 changed files with 136 additions and 131 deletions

View File

@ -9,9 +9,13 @@ from utils.format import Colors, print_rows
from utils.inline import InlineScan
from utils.syft import Syft
QUALITY_GATE_THRESHOLD = 0.95
DEFAULT_QUALITY_GATE_THRESHOLD = 0.95
INDENT = " "
IMAGE_QUALITY_GATE = collections.defaultdict(lambda: QUALITY_GATE_THRESHOLD, **{})
PACKAGE_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THRESHOLD, **{})
METADATA_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THRESHOLD, **{
"anchore/test_images:java": 0.58,
})
# We additionally fail if an image is above a particular threshold. Why? We expect the lower threshold to be 90%,
# however additional functionality in grype is still being implemented, so this threshold may not be able to be met.
@ -19,10 +23,14 @@ IMAGE_QUALITY_GATE = collections.defaultdict(lambda: QUALITY_GATE_THRESHOLD, **{
# issues/enhancements are done we want to ensure that the lower threshold is bumped up to catch regression. The only way
# to do this is to select an upper threshold for images with known threshold values, so we have a failure that
# loudly indicates the lower threshold should be bumped.
IMAGE_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{})
PACKAGE_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{})
METADATA_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{
# syft is better at detecting package versions in specific cases, leading to a drop in matching metadata
"anchore/test_images:java": 0.65,
})
def report(analysis):
def report(image, analysis):
if analysis.extra_packages:
rows = []
print(
@ -48,7 +56,6 @@ def report(analysis):
print()
if analysis.missing_metadata:
rows = []
print(
Colors.bold + "Syft mismatched metadata:",
Colors.reset,
@ -62,7 +69,7 @@ def report(analysis):
diffs = difflib.ndiff([repr(syft_metadata_item)], [repr(metadata)])
print(INDENT + "for: " + repr(pkg))
print(INDENT + "for: " + repr(pkg), "(top is syft, bottom is inline)")
print(INDENT+INDENT+("\n"+INDENT+INDENT).join(list(diffs)))
print()
@ -73,42 +80,42 @@ def report(analysis):
)
print()
if analysis.similar_missing_packages:
rows = []
print(
Colors.bold + "Probably pairings of missing/extra packages:",
Colors.reset,
"to aid in troubleshooting missed/extra packages",
)
for similar_packages in analysis.similar_missing_packages:
rows.append(
[
INDENT,
repr(similar_packages.pkg),
"--->",
repr(similar_packages.missed),
]
)
print_rows(rows)
print()
# if analysis.similar_missing_packages:
# rows = []
# print(
# Colors.bold + "Probably pairings of missing/extra packages:",
# Colors.reset,
# "to aid in troubleshooting missed/extra packages",
# )
# for similar_packages in analysis.similar_missing_packages:
# rows.append(
# [
# INDENT,
# repr(similar_packages.pkg),
# "--->",
# repr(similar_packages.missed),
# ]
# )
# print_rows(rows)
# print()
if analysis.unmatched_missing_packages and analysis.extra_packages:
rows = []
print(
Colors.bold + "Probably missed packages:",
Colors.reset,
"a probable pair was not found",
)
for p in analysis.unmatched_missing_packages:
rows.append([INDENT, repr(p)])
print_rows(rows)
print()
# if analysis.unmatched_missing_packages and analysis.extra_packages:
# rows = []
# print(
# Colors.bold + "Probably missed packages:",
# Colors.reset,
# "a probable pair was not found",
# )
# for p in analysis.unmatched_missing_packages:
# rows.append([INDENT, repr(p)])
# print_rows(rows)
# print()
print(Colors.bold + "Summary:", Colors.reset)
print(Colors.bold + "Summary:", Colors.reset, image)
print(" Inline Packages : %d" % len(analysis.inline_data.packages))
print(" Syft Packages : %d" % len(analysis.syft_data.packages))
print(
" (extra) : %d (note: this is ignored in the analysis!)"
" (extra) : %d (note: this is ignored by the quality gate!)"
% len(analysis.extra_packages)
)
print(" (missing) : %d" % len(analysis.missing_packages))
@ -150,12 +157,37 @@ def report(analysis):
)
)
overall_score = (
analysis.percent_overlapping_packages + analysis.percent_overlapping_metadata
) / 2.0
print(Colors.bold + " Overall Score: %2.1f %%" % overall_score, Colors.reset)
def enforce_quality_gate(title, actual_value, lower_gate_value, upper_gate_value):
if actual_value < lower_gate_value:
print(
Colors.bold
+ " %s Quality Gate:\t" % title
+ Colors.FG.red
+ "FAIL (is not >= %d %%)" % lower_gate_value,
Colors.reset,
)
return False
elif actual_value > upper_gate_value:
print(
Colors.bold
+ " %s Quality Gate:\t" % title
+ Colors.FG.orange
+ "FAIL (lower threshold is artificially low and should be updated)",
Colors.reset,
)
return False
print(
Colors.bold
+ " %s Quality Gate:\t" % title
+ Colors.FG.green
+ "Pass (>= %d %%)" % lower_gate_value,
Colors.reset,
)
return True
def main(image):
cwd = os.path.dirname(os.path.abspath(__file__))
@ -170,41 +202,27 @@ def main(image):
)
# show some useful report data for debugging / warm fuzzies
report(analysis)
report(image, analysis)
# enforce a quality gate based on the comparison of package values and metadata values
upper_gate_value = IMAGE_UPPER_THRESHOLD[image] * 100
lower_gate_value = IMAGE_QUALITY_GATE[image] * 100
if analysis.quality_gate_score < lower_gate_value:
print(
Colors.bold
+ " Quality Gate: "
+ Colors.FG.red
+ "FAILED (is not >= %d %%)\n" % lower_gate_value,
Colors.reset,
)
return 1
elif analysis.quality_gate_score > upper_gate_value:
print(
Colors.bold
+ " Quality Gate: "
+ Colors.FG.orange
+ "FAILED (lower threshold is artificially low and should be updated)\n",
Colors.reset,
)
return 1
else:
print(
Colors.bold
+ " Quality Gate: "
+ Colors.FG.green
+ "pass (>= %d %%)\n" % lower_gate_value,
Colors.reset,
)
success = True
success &= enforce_quality_gate(
title="Package",
actual_value=analysis.percent_overlapping_packages,
lower_gate_value=PACKAGE_QUALITY_GATE[image] * 100,
upper_gate_value=PACKAGE_UPPER_THRESHOLD[image] * 100
)
success &= enforce_quality_gate(
title="Metadata",
actual_value=analysis.percent_overlapping_metadata,
lower_gate_value=METADATA_QUALITY_GATE[image] * 100,
upper_gate_value=METADATA_UPPER_THRESHOLD[image] * 100
)
if not success:
return 1
return 0
if __name__ == "__main__":
if len(sys.argv) != 2:
sys.exit("provide an image")

View File

@ -1,4 +1,5 @@
import os
import re
import json
import collections
@ -66,22 +67,27 @@ class InlineScan:
elif pkg_type in ("java-jpi", "java-hpi"):
pkg_type = "java-?pi"
# this would usually be "package" but this would not be able to account for duplicate dependencies in
# nested jars of the same name. Fallback to the package name if there is no given location
name = entry["location"]
# replace fields with "N/A" with None
for k, v in dict(entry).items():
if v in ("", "N/A"):
entry[k] = None
extra = {}
# extra = dict(entry)
# extra.pop('type')
# extra.pop('maven-version')
# extra.pop("origin") # don't include origin as syft does not compact several fields into 1
pkg = utils.package.Package(
name=entry["package"],
name=name,
type=pkg_type,
)
packages.add(pkg)
extra = dict(entry)
extra.pop('type')
extra.pop('maven-version')
for k, v in dict(extra).items():
if v in ("", "N/A"):
extra[k] = None
# temp temp temp
extra.pop("location")
metadata[pkg.type][pkg] = utils.package.Metadata(
version=entry["maven-version"],
extra=tuple(sorted(extra.items())),

View File

@ -144,13 +144,3 @@ class Analysis:
float(len(self.overlapping_packages) + len(self.similar_missing_packages))
/ float(len(self.inline_data.packages))
) * 100.0
@property
def quality_gate_score(self):
"""
The result of the analysis in the form of an aggregated percentage; it is up to the caller to use this value
and enforce a quality gate.
"""
return (
self.percent_overlapping_packages + self.percent_overlapping_metadata
) / 2.0

View File

@ -46,50 +46,41 @@ class Syft:
elif pkg_type in ("apk",):
pkg_type = "apkg"
name = entry["name"]
version = entry["version"]
if "java" in pkg_type:
# lets match what inline scan expects to output
# specVersion = dig(entry, "metadata", "manifest", "specificationVersion") or None
# implVersion = dig(entry, "metadata", "manifest", "implementationVersion") or None
virtualPath = dig(entry, "metadata", "virtualPath")
# package = dig(entry, "metadata", "pomProperties", "artifactId")
# if not package:
# package = "%s-%s" % (dig(entry, "name"), dig(entry, "version"))
#
# extra = {
# "implementation-version": implVersion,
# "specification-version": specVersion,
# "location": virtualPath,
# "package": package,
# }
# we need to use the virtual path instead of the name to account for nested dependencies with the same
# package name (but potentially different metadata)
name = virtualPath
elif pkg_type == "apkg":
version = entry["version"].split("-")[0]
pkg = utils.package.Package(
name=entry["name"],
name=name,
type=pkg_type,
)
packages.add(pkg)
if "java" in pkg_type:
# lets match what inline scan expects to output
path = dig(entry, "locations", 0, "path")
specVendor = dig(entry, "metadata", "manifest", "specificationVendor")
implVendor = dig(entry, "metadata", "manifest", "implementationVendor")
specVersion = dig(entry, "metadata", "manifest", "specificationVersion") or None
implVersion = dig(entry, "metadata", "manifest", "implementationVersion") or None
extra = {
"implementation-version": implVersion,
"specification-version": specVersion,
"origin": dig(entry, "metadata", "pomProperties", "groupId"),
"location": path,
"package": dig(entry, "metadata", "pomProperties", "artifactId"),
}
if dig(entry, "metadata", "parentPackage"):
extra['origin'] = dig(entry, "metadata", "pomProperties", "groupId")
else:
# this is a nested package...
if specVendor:
extra['origin'] = specVendor
elif implVendor:
extra['origin'] = implVendor
pomPath = dig(entry, "metadata", "pomProperties", "Path")
if path and pomPath:
extra["location"] = "%s:%s" % (path, pomPath),
# temp temp temp
extra.pop("location")
elif pkg_type == "apkg":
entry["version"] = entry["version"].split("-")[0]
metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"], extra=tuple(sorted(extra.items())))
metadata[pkg.type][pkg] = utils.package.Metadata(version=version, extra=tuple(sorted(extra.items())))
return utils.package.Info(packages=frozenset(packages), metadata=metadata)