partial java comparison with extra metadata

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2020-10-29 12:40:49 -04:00
parent 1230650771
commit fc991bc62e
No known key found for this signature in database
GPG Key ID: 5CB45AE22BAB7EA7
4 changed files with 136 additions and 131 deletions

View File

@ -9,9 +9,13 @@ from utils.format import Colors, print_rows
from utils.inline import InlineScan from utils.inline import InlineScan
from utils.syft import Syft from utils.syft import Syft
QUALITY_GATE_THRESHOLD = 0.95 DEFAULT_QUALITY_GATE_THRESHOLD = 0.95
INDENT = " " INDENT = " "
IMAGE_QUALITY_GATE = collections.defaultdict(lambda: QUALITY_GATE_THRESHOLD, **{})
PACKAGE_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THRESHOLD, **{})
METADATA_QUALITY_GATE = collections.defaultdict(lambda: DEFAULT_QUALITY_GATE_THRESHOLD, **{
"anchore/test_images:java": 0.58,
})
# We additionally fail if an image is above a particular threshold. Why? We expect the lower threshold to be 90%, # We additionally fail if an image is above a particular threshold. Why? We expect the lower threshold to be 90%,
# however additional functionality in grype is still being implemented, so this threshold may not be able to be met. # however additional functionality in grype is still being implemented, so this threshold may not be able to be met.
@ -19,10 +23,14 @@ IMAGE_QUALITY_GATE = collections.defaultdict(lambda: QUALITY_GATE_THRESHOLD, **{
# issues/enhancements are done we want to ensure that the lower threshold is bumped up to catch regression. The only way # issues/enhancements are done we want to ensure that the lower threshold is bumped up to catch regression. The only way
# to do this is to select an upper threshold for images with known threshold values, so we have a failure that # to do this is to select an upper threshold for images with known threshold values, so we have a failure that
# loudly indicates the lower threshold should be bumped. # loudly indicates the lower threshold should be bumped.
IMAGE_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{}) PACKAGE_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{})
METADATA_UPPER_THRESHOLD = collections.defaultdict(lambda: 1, **{
# syft is better at detecting package versions in specific cases, leading to a drop in matching metadata
"anchore/test_images:java": 0.65,
})
def report(analysis): def report(image, analysis):
if analysis.extra_packages: if analysis.extra_packages:
rows = [] rows = []
print( print(
@ -48,7 +56,6 @@ def report(analysis):
print() print()
if analysis.missing_metadata: if analysis.missing_metadata:
rows = []
print( print(
Colors.bold + "Syft mismatched metadata:", Colors.bold + "Syft mismatched metadata:",
Colors.reset, Colors.reset,
@ -62,7 +69,7 @@ def report(analysis):
diffs = difflib.ndiff([repr(syft_metadata_item)], [repr(metadata)]) diffs = difflib.ndiff([repr(syft_metadata_item)], [repr(metadata)])
print(INDENT + "for: " + repr(pkg)) print(INDENT + "for: " + repr(pkg), "(top is syft, bottom is inline)")
print(INDENT+INDENT+("\n"+INDENT+INDENT).join(list(diffs))) print(INDENT+INDENT+("\n"+INDENT+INDENT).join(list(diffs)))
print() print()
@ -73,42 +80,42 @@ def report(analysis):
) )
print() print()
if analysis.similar_missing_packages: # if analysis.similar_missing_packages:
rows = [] # rows = []
print( # print(
Colors.bold + "Probably pairings of missing/extra packages:", # Colors.bold + "Probably pairings of missing/extra packages:",
Colors.reset, # Colors.reset,
"to aid in troubleshooting missed/extra packages", # "to aid in troubleshooting missed/extra packages",
) # )
for similar_packages in analysis.similar_missing_packages: # for similar_packages in analysis.similar_missing_packages:
rows.append( # rows.append(
[ # [
INDENT, # INDENT,
repr(similar_packages.pkg), # repr(similar_packages.pkg),
"--->", # "--->",
repr(similar_packages.missed), # repr(similar_packages.missed),
] # ]
) # )
print_rows(rows) # print_rows(rows)
print() # print()
if analysis.unmatched_missing_packages and analysis.extra_packages: # if analysis.unmatched_missing_packages and analysis.extra_packages:
rows = [] # rows = []
print( # print(
Colors.bold + "Probably missed packages:", # Colors.bold + "Probably missed packages:",
Colors.reset, # Colors.reset,
"a probable pair was not found", # "a probable pair was not found",
) # )
for p in analysis.unmatched_missing_packages: # for p in analysis.unmatched_missing_packages:
rows.append([INDENT, repr(p)]) # rows.append([INDENT, repr(p)])
print_rows(rows) # print_rows(rows)
print() # print()
print(Colors.bold + "Summary:", Colors.reset) print(Colors.bold + "Summary:", Colors.reset, image)
print(" Inline Packages : %d" % len(analysis.inline_data.packages)) print(" Inline Packages : %d" % len(analysis.inline_data.packages))
print(" Syft Packages : %d" % len(analysis.syft_data.packages)) print(" Syft Packages : %d" % len(analysis.syft_data.packages))
print( print(
" (extra) : %d (note: this is ignored in the analysis!)" " (extra) : %d (note: this is ignored by the quality gate!)"
% len(analysis.extra_packages) % len(analysis.extra_packages)
) )
print(" (missing) : %d" % len(analysis.missing_packages)) print(" (missing) : %d" % len(analysis.missing_packages))
@ -150,12 +157,37 @@ def report(analysis):
) )
) )
overall_score = (
analysis.percent_overlapping_packages + analysis.percent_overlapping_metadata
) / 2.0
print(Colors.bold + " Overall Score: %2.1f %%" % overall_score, Colors.reset) def enforce_quality_gate(title, actual_value, lower_gate_value, upper_gate_value):
if actual_value < lower_gate_value:
print(
Colors.bold
+ " %s Quality Gate:\t" % title
+ Colors.FG.red
+ "FAIL (is not >= %d %%)" % lower_gate_value,
Colors.reset,
)
return False
elif actual_value > upper_gate_value:
print(
Colors.bold
+ " %s Quality Gate:\t" % title
+ Colors.FG.orange
+ "FAIL (lower threshold is artificially low and should be updated)",
Colors.reset,
)
return False
print(
Colors.bold
+ " %s Quality Gate:\t" % title
+ Colors.FG.green
+ "Pass (>= %d %%)" % lower_gate_value,
Colors.reset,
)
return True
def main(image): def main(image):
cwd = os.path.dirname(os.path.abspath(__file__)) cwd = os.path.dirname(os.path.abspath(__file__))
@ -170,41 +202,27 @@ def main(image):
) )
# show some useful report data for debugging / warm fuzzies # show some useful report data for debugging / warm fuzzies
report(analysis) report(image, analysis)
# enforce a quality gate based on the comparison of package values and metadata values # enforce a quality gate based on the comparison of package values and metadata values
upper_gate_value = IMAGE_UPPER_THRESHOLD[image] * 100 success = True
lower_gate_value = IMAGE_QUALITY_GATE[image] * 100 success &= enforce_quality_gate(
if analysis.quality_gate_score < lower_gate_value: title="Package",
print( actual_value=analysis.percent_overlapping_packages,
Colors.bold lower_gate_value=PACKAGE_QUALITY_GATE[image] * 100,
+ " Quality Gate: " upper_gate_value=PACKAGE_UPPER_THRESHOLD[image] * 100
+ Colors.FG.red )
+ "FAILED (is not >= %d %%)\n" % lower_gate_value, success &= enforce_quality_gate(
Colors.reset, title="Metadata",
) actual_value=analysis.percent_overlapping_metadata,
return 1 lower_gate_value=METADATA_QUALITY_GATE[image] * 100,
elif analysis.quality_gate_score > upper_gate_value: upper_gate_value=METADATA_UPPER_THRESHOLD[image] * 100
print( )
Colors.bold
+ " Quality Gate: "
+ Colors.FG.orange
+ "FAILED (lower threshold is artificially low and should be updated)\n",
Colors.reset,
)
return 1
else:
print(
Colors.bold
+ " Quality Gate: "
+ Colors.FG.green
+ "pass (>= %d %%)\n" % lower_gate_value,
Colors.reset,
)
if not success:
return 1
return 0 return 0
if __name__ == "__main__": if __name__ == "__main__":
if len(sys.argv) != 2: if len(sys.argv) != 2:
sys.exit("provide an image") sys.exit("provide an image")

View File

@ -1,4 +1,5 @@
import os import os
import re
import json import json
import collections import collections
@ -66,22 +67,27 @@ class InlineScan:
elif pkg_type in ("java-jpi", "java-hpi"): elif pkg_type in ("java-jpi", "java-hpi"):
pkg_type = "java-?pi" pkg_type = "java-?pi"
# this would usually be "package" but this would not be able to account for duplicate dependencies in
# nested jars of the same name. Fallback to the package name if there is no given location
name = entry["location"]
# replace fields with "N/A" with None
for k, v in dict(entry).items():
if v in ("", "N/A"):
entry[k] = None
extra = {}
# extra = dict(entry)
# extra.pop('type')
# extra.pop('maven-version')
# extra.pop("origin") # don't include origin as syft does not compact several fields into 1
pkg = utils.package.Package( pkg = utils.package.Package(
name=entry["package"], name=name,
type=pkg_type, type=pkg_type,
) )
packages.add(pkg) packages.add(pkg)
extra = dict(entry)
extra.pop('type')
extra.pop('maven-version')
for k, v in dict(extra).items():
if v in ("", "N/A"):
extra[k] = None
# temp temp temp
extra.pop("location")
metadata[pkg.type][pkg] = utils.package.Metadata( metadata[pkg.type][pkg] = utils.package.Metadata(
version=entry["maven-version"], version=entry["maven-version"],
extra=tuple(sorted(extra.items())), extra=tuple(sorted(extra.items())),

View File

@ -144,13 +144,3 @@ class Analysis:
float(len(self.overlapping_packages) + len(self.similar_missing_packages)) float(len(self.overlapping_packages) + len(self.similar_missing_packages))
/ float(len(self.inline_data.packages)) / float(len(self.inline_data.packages))
) * 100.0 ) * 100.0
@property
def quality_gate_score(self):
"""
The result of the analysis in the form of an aggregated percentage; it is up to the caller to use this value
and enforce a quality gate.
"""
return (
self.percent_overlapping_packages + self.percent_overlapping_metadata
) / 2.0

View File

@ -46,50 +46,41 @@ class Syft:
elif pkg_type in ("apk",): elif pkg_type in ("apk",):
pkg_type = "apkg" pkg_type = "apkg"
name = entry["name"]
version = entry["version"]
if "java" in pkg_type:
# lets match what inline scan expects to output
# specVersion = dig(entry, "metadata", "manifest", "specificationVersion") or None
# implVersion = dig(entry, "metadata", "manifest", "implementationVersion") or None
virtualPath = dig(entry, "metadata", "virtualPath")
# package = dig(entry, "metadata", "pomProperties", "artifactId")
# if not package:
# package = "%s-%s" % (dig(entry, "name"), dig(entry, "version"))
#
# extra = {
# "implementation-version": implVersion,
# "specification-version": specVersion,
# "location": virtualPath,
# "package": package,
# }
# we need to use the virtual path instead of the name to account for nested dependencies with the same
# package name (but potentially different metadata)
name = virtualPath
elif pkg_type == "apkg":
version = entry["version"].split("-")[0]
pkg = utils.package.Package( pkg = utils.package.Package(
name=entry["name"], name=name,
type=pkg_type, type=pkg_type,
) )
packages.add(pkg) packages.add(pkg)
if "java" in pkg_type: metadata[pkg.type][pkg] = utils.package.Metadata(version=version, extra=tuple(sorted(extra.items())))
# lets match what inline scan expects to output
path = dig(entry, "locations", 0, "path")
specVendor = dig(entry, "metadata", "manifest", "specificationVendor")
implVendor = dig(entry, "metadata", "manifest", "implementationVendor")
specVersion = dig(entry, "metadata", "manifest", "specificationVersion") or None
implVersion = dig(entry, "metadata", "manifest", "implementationVersion") or None
extra = {
"implementation-version": implVersion,
"specification-version": specVersion,
"origin": dig(entry, "metadata", "pomProperties", "groupId"),
"location": path,
"package": dig(entry, "metadata", "pomProperties", "artifactId"),
}
if dig(entry, "metadata", "parentPackage"):
extra['origin'] = dig(entry, "metadata", "pomProperties", "groupId")
else:
# this is a nested package...
if specVendor:
extra['origin'] = specVendor
elif implVendor:
extra['origin'] = implVendor
pomPath = dig(entry, "metadata", "pomProperties", "Path")
if path and pomPath:
extra["location"] = "%s:%s" % (path, pomPath),
# temp temp temp
extra.pop("location")
elif pkg_type == "apkg":
entry["version"] = entry["version"].split("-")[0]
metadata[pkg.type][pkg] = utils.package.Metadata(version=entry["version"], extra=tuple(sorted(extra.items())))
return utils.package.Info(packages=frozenset(packages), metadata=metadata) return utils.package.Info(packages=frozenset(packages), metadata=metadata)