fix: improved Python binary detection (#1648)

This commit is contained in:
Keith Zantow 2023-03-07 10:52:29 -05:00 committed by GitHub
parent 096d2b7bff
commit 7714bc0521
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 299 additions and 100 deletions

View File

@ -54,6 +54,13 @@ jobs:
path: syft/pkg/cataloger/golang/test-fixtures/archs/binaries
key: ${{ runner.os }}-unit-go-binaries-cache-${{ hashFiles( 'syft/pkg/cataloger/golang/test-fixtures/archs/binaries.fingerprint' ) }}
- name: Restore binary cataloger test-fixture cache
id: unit-binary-cataloger-cache
uses: actions/cache@v3
with:
path: syft/pkg/cataloger/binary/test-fixtures/classifiers/dynamic
key: ${{ runner.os }}-unit-binary-cataloger-cache-${{ hashFiles( 'syft/pkg/cataloger/binary/test-fixtures/cache.fingerprint' ) }}
- name: Run unit tests
run: make unit

View File

@ -189,6 +189,10 @@ fingerprints:
cd test/integration/test-fixtures && \
make cache.fingerprint
# for BINARY test fixtures
cd syft/pkg/cataloger/binary/test-fixtures && \
make cache.fingerprint
# for JAVA BUILD test fixtures
cd syft/pkg/cataloger/java/test-fixtures/java-builds && \
make packages.fingerprint
@ -214,6 +218,7 @@ fixtures:
$(call title,Generating test fixtures)
cd syft/pkg/cataloger/java/test-fixtures/java-builds && make
cd syft/pkg/cataloger/rpm/test-fixtures && make
cd syft/pkg/cataloger/binary/test-fixtures && make
.PHONY: show-test-image-cache
show-test-image-cache: ## Show all docker and image tar cache

View File

@ -74,12 +74,7 @@ func catalog(resolver source.FileResolver, cls classifier) (packages []pkg.Packa
return nil, err
}
for _, location := range locations {
reader, err := resolver.FileContentsByLocation(location)
if err != nil {
return nil, err
}
locationReader := source.NewLocationReadCloser(location, reader)
pkgs, err := cls.EvidenceMatcher(cls, locationReader)
pkgs, err := cls.EvidenceMatcher(resolver, cls, location)
if err != nil {
return nil, err
}

View File

@ -4,8 +4,11 @@ import (
"errors"
"fmt"
"io"
"strings"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@ -68,25 +71,6 @@ func Test_Cataloger_DefaultClassifiers_PositiveCases(t *testing.T) {
Metadata: metadata("postgresql-binary"),
},
},
{
name: "positive-python-duplicates",
fixtureDir: "test-fixtures/classifiers/positive/python-duplicates",
expected: pkg.Package{
Name: "python",
Version: "3.8.16",
Type: "binary",
PURL: "pkg:generic/python@3.8.16",
Locations: locations("dir/python3.8", "python3.8", "libpython3.8.so", "patchlevel.h"),
Metadata: pkg.BinaryMetadata{
Matches: []pkg.ClassifierMatch{
match("python-binary", "dir/python3.8"),
match("python-binary", "python3.8"),
match("python-binary-lib", "libpython3.8.so"),
match("cpython-source", "patchlevel.h"),
},
},
},
},
{
name: "positive-traefik-2.9.6",
fixtureDir: "test-fixtures/classifiers/positive/traefik-2.9.6",
@ -314,26 +298,82 @@ func Test_Cataloger_DefaultClassifiers_PositiveCases(t *testing.T) {
Metadata: metadata("python-binary-lib"),
},
},
{
name: "positive-python-3.11.2-from-shared-lib",
fixtureDir: "test-fixtures/classifiers/dynamic/python-binary-shared-lib-3.11",
expected: pkg.Package{
Name: "python",
Version: "3.11.2",
PURL: "pkg:generic/python@3.11.2",
Locations: locations("python3", "libpython3.11.so.1.0"),
Metadata: pkg.BinaryMetadata{
Matches: []pkg.ClassifierMatch{
match("python-binary", "python3"),
match("python-binary", "libpython3.11.so.1.0"),
match("python-binary-lib", "libpython3.11.so.1.0"),
},
},
},
},
{
name: "positive-python-3.9-from-shared-redhat-lib",
fixtureDir: "test-fixtures/classifiers/dynamic/python-binary-shared-lib-redhat-3.9",
expected: pkg.Package{
Name: "python",
Version: "3.9.13",
PURL: "pkg:generic/python@3.9.13",
Locations: locations("python3.9", "libpython3.9.so.1.0"),
Metadata: pkg.BinaryMetadata{
Matches: []pkg.ClassifierMatch{
match("python-binary", "python3.9"),
match("python-binary", "libpython3.9.so.1.0"),
match("python-binary-lib", "libpython3.9.so.1.0"),
},
},
},
},
{
name: "positive-python-binary-with-version-3.9",
fixtureDir: "test-fixtures/classifiers/dynamic/python-binary-with-version-3.9",
expected: pkg.Package{
Name: "python",
Version: "3.9.2",
PURL: "pkg:generic/python@3.9.2",
Locations: locations("python3.9"),
Metadata: pkg.BinaryMetadata{
Matches: []pkg.ClassifierMatch{
match("python-binary", "python3.9"),
},
},
},
},
{
name: "positive-python3.6",
fixtureDir: "test-fixtures/classifiers/positive/python-binary-3.6",
expected: pkg.Package{
Name: "python",
Version: "3.6.3a-vZ9",
PURL: "pkg:generic/python@3.6.3a-vZ9",
Version: "3.6.3",
PURL: "pkg:generic/python@3.6.3",
Locations: locations("python3.6"),
Metadata: metadata("python-binary"),
},
},
{
name: "positive-patchlevel.h",
fixtureDir: "test-fixtures/classifiers/positive/python-source-3.9",
name: "positive-python-duplicates",
fixtureDir: "test-fixtures/classifiers/positive/python-duplicates",
expected: pkg.Package{
Name: "python",
Version: "3.9-aZ5",
PURL: "pkg:generic/python@3.9-aZ5",
Locations: locations("patchlevel.h"),
Metadata: metadata("cpython-source"),
Version: "3.8.16",
Type: "binary",
PURL: "pkg:generic/python@3.8.16",
Locations: locations("dir/python3.8", "python3.8", "libpython3.8.so"),
Metadata: pkg.BinaryMetadata{
Matches: []pkg.ClassifierMatch{
match("python-binary", "dir/python3.8"),
match("python-binary", "python3.8"),
match("python-binary-lib", "libpython3.8.so"),
},
},
},
},
{
@ -491,17 +531,6 @@ func Test_Cataloger_DefaultClassifiers_PositiveCases(t *testing.T) {
require.NoError(t, err)
for _, p := range packages {
expectedLocations := test.expected.Locations.ToSlice()
gotLocations := p.Locations.ToSlice()
require.Len(t, gotLocations, len(expectedLocations))
for i, expectedLocation := range expectedLocations {
gotLocation := gotLocations[i]
if expectedLocation.RealPath != gotLocation.RealPath {
t.Fatalf("locations do not match; expected: %v got: %v", expectedLocations, gotLocations)
}
}
assertPackagesAreEqual(t, test.expected, p)
}
})
@ -611,6 +640,21 @@ func match(classifier string, paths ...string) pkg.ClassifierMatch {
}
func assertPackagesAreEqual(t *testing.T, expected pkg.Package, p pkg.Package) {
var failMessages []string
expectedLocations := expected.Locations.ToSlice()
gotLocations := p.Locations.ToSlice()
if len(expectedLocations) != len(gotLocations) {
failMessages = append(failMessages, "locations are not equal length")
} else {
for i, expectedLocation := range expectedLocations {
gotLocation := gotLocations[i]
if expectedLocation.RealPath != gotLocation.RealPath {
failMessages = append(failMessages, fmt.Sprintf("locations do not match; expected: %v got: %v", expectedLocation.RealPath, gotLocation.RealPath))
}
}
}
m1 := expected.Metadata.(pkg.BinaryMetadata).Matches
m2 := p.Metadata.(pkg.BinaryMetadata).Matches
matches := true
@ -633,17 +677,26 @@ func assertPackagesAreEqual(t *testing.T, expected pkg.Package, p pkg.Package) {
} else {
matches = false
}
if !matches {
failMessages = append(failMessages, "classifier matches not equal")
}
if expected.Name != p.Name ||
expected.Version != p.Version ||
expected.PURL != p.PURL ||
!matches {
assert.Failf(t, "packages not equal", "%v != %v", stringifyPkg(expected), stringifyPkg(p))
expected.PURL != p.PURL {
failMessages = append(failMessages, "packages do not match")
}
}
func stringifyPkg(p pkg.Package) string {
matches := p.Metadata.(pkg.BinaryMetadata).Matches
return fmt.Sprintf("(name=%s, version=%s, purl=%s, matches=%+v)", p.Name, p.Version, p.PURL, matches)
if len(failMessages) > 0 {
assert.Failf(t, strings.Join(failMessages, "; "), "diff: %s",
cmp.Diff(expected, p,
cmp.Transformer("Locations", func(l source.LocationSet) []source.Location {
return l.ToSlice()
}),
cmpopts.IgnoreUnexported(pkg.Package{}, source.Location{}),
cmpopts.IgnoreFields(pkg.Package{}, "CPEs", "FoundBy", "MetadataType", "Type"),
))
}
}
type panicyResolver struct {

View File

@ -2,6 +2,9 @@ package binary
import (
"bytes"
"debug/elf"
"debug/macho"
"debug/pe"
"fmt"
"io"
"reflect"
@ -10,6 +13,7 @@ import (
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/cpe"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/unionreader"
@ -49,12 +53,12 @@ type classifier struct {
}
// evidenceMatcher is a function called to catalog Packages that match some sort of evidence
type evidenceMatcher func(classifier classifier, reader source.LocationReadCloser) ([]pkg.Package, error)
type evidenceMatcher func(resolver source.FileResolver, classifier classifier, location source.Location) ([]pkg.Package, error)
func evidenceMatchers(matchers ...evidenceMatcher) evidenceMatcher {
return func(classifier classifier, reader source.LocationReadCloser) ([]pkg.Package, error) {
return func(resolver source.FileResolver, classifier classifier, location source.Location) ([]pkg.Package, error) {
for _, matcher := range matchers {
match, err := matcher(classifier, reader)
match, err := matcher(resolver, classifier, location)
if err != nil {
return nil, err
}
@ -68,12 +72,12 @@ func evidenceMatchers(matchers ...evidenceMatcher) evidenceMatcher {
func fileNameTemplateVersionMatcher(fileNamePattern string, contentTemplate string) evidenceMatcher {
pat := regexp.MustCompile(fileNamePattern)
return func(classifier classifier, reader source.LocationReadCloser) ([]pkg.Package, error) {
if !pat.MatchString(reader.RealPath) {
return func(resolver source.FileResolver, classifier classifier, location source.Location) ([]pkg.Package, error) {
if !pat.MatchString(location.RealPath) {
return nil, nil
}
filepathNamedGroupValues := internal.MatchNamedCaptureGroups(pat, reader.RealPath)
filepathNamedGroupValues := internal.MatchNamedCaptureGroups(pat, location.RealPath)
tmpl, err := template.New("").Parse(contentTemplate)
if err != nil {
@ -91,26 +95,70 @@ func fileNameTemplateVersionMatcher(fileNamePattern string, contentTemplate stri
return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err)
}
contents, err := getContents(reader)
contents, err := getContents(resolver, location)
if err != nil {
return nil, fmt.Errorf("unable to get read contents for file: %w", err)
}
matchMetadata := internal.MatchNamedCaptureGroups(tmplPattern, string(contents))
return singlePackage(classifier, reader, matchMetadata), nil
return singlePackage(classifier, location, matchMetadata), nil
}
}
func fileContentsVersionMatcher(pattern string) evidenceMatcher {
pat := regexp.MustCompile(pattern)
return func(classifier classifier, reader source.LocationReadCloser) ([]pkg.Package, error) {
contents, err := getContents(reader)
return func(resolver source.FileResolver, classifier classifier, location source.Location) ([]pkg.Package, error) {
contents, err := getContents(resolver, location)
if err != nil {
return nil, fmt.Errorf("unable to get read contents for file: %w", err)
}
matchMetadata := internal.MatchNamedCaptureGroups(pat, string(contents))
return singlePackage(classifier, reader, matchMetadata), nil
return singlePackage(classifier, location, matchMetadata), nil
}
}
//nolint:gocognit
func sharedLibraryLookup(sharedLibraryPattern string, sharedLibraryMatcher evidenceMatcher) evidenceMatcher {
pat := regexp.MustCompile(sharedLibraryPattern)
return func(resolver source.FileResolver, classifier classifier, location source.Location) (packages []pkg.Package, _ error) {
libs, err := sharedLibraries(resolver, location)
if err != nil {
return nil, err
}
for _, lib := range libs {
if !pat.MatchString(lib) {
continue
}
locations, err := resolver.FilesByGlob("**/" + lib)
if err != nil {
return nil, err
}
for _, libraryLication := range locations {
pkgs, err := sharedLibraryMatcher(resolver, classifier, libraryLication)
if err != nil {
return nil, err
}
for _, p := range pkgs {
// set the source binary as the first location
locationSet := source.NewLocationSet(location)
locationSet.Add(p.Locations.ToSlice()...)
p.Locations = locationSet
meta, _ := p.Metadata.(pkg.BinaryMetadata)
p.Metadata = pkg.BinaryMetadata{
Matches: append([]pkg.ClassifierMatch{
{
Classifier: classifier.Class,
Location: location,
},
}, meta.Matches...),
}
packages = append(packages, p)
}
}
}
return packages, nil
}
}
@ -122,7 +170,7 @@ func mustPURL(purl string) packageurl.PackageURL {
return p
}
func singlePackage(classifier classifier, reader source.LocationReadCloser, matchMetadata map[string]string) []pkg.Package {
func singlePackage(classifier classifier, location source.Location, matchMetadata map[string]string) []pkg.Package {
version, ok := matchMetadata["version"]
if !ok {
return nil
@ -140,7 +188,7 @@ func singlePackage(classifier classifier, reader source.LocationReadCloser, matc
p := pkg.Package{
Name: classifier.Package,
Version: version,
Locations: source.NewLocationSet(reader.Location),
Locations: source.NewLocationSet(location),
Type: pkg.BinaryPkg,
CPEs: cpes,
FoundBy: catalogerName,
@ -149,7 +197,7 @@ func singlePackage(classifier classifier, reader source.LocationReadCloser, matc
Matches: []pkg.ClassifierMatch{
{
Classifier: classifier.Class,
Location: reader.Location,
Location: location,
},
},
},
@ -174,8 +222,13 @@ func singlePackage(classifier classifier, reader source.LocationReadCloser, matc
return []pkg.Package{p}
}
func getContents(reader source.LocationReadCloser) ([]byte, error) {
unionReader, err := unionreader.GetUnionReader(reader.ReadCloser)
func getContents(resolver source.FileResolver, location source.Location) ([]byte, error) {
reader, err := resolver.FileContentsByLocation(location)
if err != nil {
return nil, err
}
unionReader, err := unionreader.GetUnionReader(reader)
if err != nil {
return nil, fmt.Errorf("unable to get union reader for file: %w", err)
}
@ -195,3 +248,43 @@ func singleCPE(cpeString string) []cpe.CPE {
cpe.Must(cpeString),
}
}
// sharedLibraries returns a list of all shared libraries found within a binary, currently
// supporting: elf, macho, and windows pe
func sharedLibraries(resolver source.FileResolver, location source.Location) ([]string, error) {
contents, err := getContents(resolver, location)
if err != nil {
return nil, err
}
r := bytes.NewReader(contents)
e, _ := elf.NewFile(r)
if e != nil {
symbols, err := e.ImportedLibraries()
if err != nil {
log.Debugf("unable to read elf binary at: %s -- %s", location.RealPath, err)
}
return symbols, nil
}
m, _ := macho.NewFile(r)
if m != nil {
symbols, err := m.ImportedLibraries()
if err != nil {
log.Debugf("unable to read macho binary at: %s -- %s", location.RealPath, err)
}
return symbols, nil
}
p, _ := pe.NewFile(r)
if p != nil {
symbols, err := p.ImportedLibraries()
if err != nil {
log.Debugf("unable to read pe binary at: %s -- %s", location.RealPath, err)
}
return symbols, nil
}
return nil, nil
}

View File

@ -67,10 +67,8 @@ func Test_ClassifierCPEs(t *testing.T) {
locations, err := resolver.FilesByPath(test.fixture)
require.NoError(t, err)
require.Len(t, locations, 1)
location := locations[0]
readCloser, err := resolver.FileContentsByLocation(location)
require.NoError(t, err)
pkgs, err := test.classifier.EvidenceMatcher(test.classifier, source.NewLocationReadCloser(location, readCloser))
pkgs, err := test.classifier.EvidenceMatcher(resolver, test.classifier, locations[0])
require.NoError(t, err)
require.Len(t, pkgs, 1)

View File

@ -9,9 +9,16 @@ var defaultClassifiers = []classifier{
{
Class: "python-binary",
FileGlob: "**/python*",
EvidenceMatcher: fileNameTemplateVersionMatcher(
`(.*/|^)python(?P<version>[0-9]+\.[0-9]+)$`,
`(?m)(?P<version>{{ .version }}\.[0-9]+[-_a-zA-Z0-9]*)`),
EvidenceMatcher: evidenceMatchers(
// try to find version information from libpython shared libraries
sharedLibraryLookup(
`^libpython[0-9]+(?:\.[0-9]+)+\.so.*$`,
libpythonMatcher),
// check for version information in the binary
fileNameTemplateVersionMatcher(
`(?:.*/|^)python(?P<version>[0-9]+(?:\.[0-9]+)+)$`,
pythonVersionTemplate),
),
Package: "python",
PURL: mustPURL("pkg:generic/python@version"),
CPEs: []cpe.CPE{
@ -22,21 +29,7 @@ var defaultClassifiers = []classifier{
{
Class: "python-binary-lib",
FileGlob: "**/libpython*.so*",
EvidenceMatcher: fileNameTemplateVersionMatcher(
`(.*/|^)libpython(?P<version>[0-9]+\.[0-9]+).so.*$`,
`(?m)(?P<version>{{ .version }}\.[0-9]+[-_a-zA-Z0-9]*)`),
Package: "python",
PURL: mustPURL("pkg:generic/python@version"),
CPEs: []cpe.CPE{
cpe.Must("cpe:2.3:a:python_software_foundation:python:*:*:*:*:*:*:*:*"),
cpe.Must("cpe:2.3:a:python:python:*:*:*:*:*:*:*:*"),
},
},
{
Class: "cpython-source",
FileGlob: "**/patchlevel.h",
EvidenceMatcher: fileContentsVersionMatcher(
`(?m)#define\s+PY_VERSION\s+"?(?P<version>[0-9\.\-_a-zA-Z]+)"?`),
EvidenceMatcher: libpythonMatcher,
Package: "python",
PURL: mustPURL("pkg:generic/python@version"),
CPEs: []cpe.CPE{
@ -228,3 +221,11 @@ var defaultClassifiers = []classifier{
CPEs: singleCPE("cpe:2.3:a:rust-lang:rust:*:*:*:*:*:*:*:*"),
},
}
// in both binaries and shared libraries, the version pattern is [NUL]3.11.2[NUL]
var pythonVersionTemplate = `(?m)\x00(?P<version>{{ .version }}[-._a-zA-Z0-9]*)\x00`
var libpythonMatcher = fileNameTemplateVersionMatcher(
`(?:.*/|^)libpython(?P<version>[0-9]+(?:\.[0-9]+)+)\.so.*$`,
pythonVersionTemplate,
)

View File

@ -0,0 +1 @@
classifiers/dynamic

View File

@ -0,0 +1,38 @@
.PHONY: all
all: \
classifiers/dynamic/python-binary-shared-lib-3.11 \
classifiers/dynamic/python-binary-shared-lib-redhat-3.9 \
classifiers/dynamic/python-binary-with-version-3.9
classifiers/dynamic/python-binary-shared-lib-3.11:
$(eval $@_image := "python:3.11-slim@sha256:0b106e1d2bf485c2a41474bc9cd5103e9eea4e179f40f10741b53b127059221e")
./get-image-file.sh $($@_image) \
/usr/local/bin/python3.11 \
$@/python3
./get-image-file.sh $($@_image) \
/usr/local/lib/libpython3.11.so.1.0 \
$@/libpython3.11.so.1.0
classifiers/dynamic/python-binary-shared-lib-redhat-3.9:
$(eval $@_image := "registry.access.redhat.com/ubi8/python-39@sha256:f3cf958b96ce016b63e3e163e488f52e42891304dafef5a0811563f22e3cbad0")
./get-image-file.sh $($@_image) \
/usr/bin/python3.9 \
$@/python3.9
./get-image-file.sh $($@_image) \
/usr/lib64/libpython3.9.so.1.0 \
$@/libpython3.9.so.1.0
classifiers/dynamic/python-binary-with-version-3.9:
$(eval $@_image := "python:3.9.16-bullseye@sha256:93fb93c461a2e47a2176706fad1f39eaacd5dd40e19c0b018699a28c03eb2e2a")
./get-image-file.sh $($@_image) \
/usr/bin/python3.9 \
$@/python3.9
.PHONY: clean
clean:
rm -rf classifiers/dynamic
.PHONY: cache.fingerprint
cache.fingerprint: # for CI
$(title,Install test fixture fingerprint)
@find ./classifiers/dynamic/* -type f -exec md5sum {} + | awk '{print $1}' | sort | tee /dev/stderr | md5sum | tee cache.fingerprint >> cache.fingerprint

View File

@ -1,7 +0,0 @@
# note: this SHOULD match as python 3.9
some source code...
#define PY_VERSION 3.9-aZ5
more source!

View File

@ -0,0 +1,15 @@
#!/usr/bin/env bash
set -uxe
CTRID=$(docker create $1)
function cleanup() {
docker rm "${CTRID}"
}
trap cleanup EXIT
set +e
mkdir -p $(dirname $3)
docker cp ${CTRID}:$2 $3