feat: Generic Binary Cataloger (#1336)

This commit is contained in:
Keith Zantow 2022-11-29 18:28:10 -05:00 committed by GitHub
parent 7a69e2129b
commit 4f39287216
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
36 changed files with 579 additions and 740 deletions

View File

@ -22,7 +22,6 @@ func Tasks(app *config.Application) ([]Task, error) {
generateCatalogFileMetadataTask,
generateCatalogFileDigestsTask,
generateCatalogSecretsTask,
generateCatalogFileClassificationsTask,
generateCatalogContentsTask,
}
@ -162,34 +161,6 @@ func generateCatalogSecretsTask(app *config.Application) (Task, error) {
return task, nil
}
func generateCatalogFileClassificationsTask(app *config.Application) (Task, error) {
if !app.FileClassification.Cataloger.Enabled {
return nil, nil
}
// TODO: in the future we could expose out the classifiers via configuration
classifierCataloger, err := file.NewClassificationCataloger(file.DefaultClassifiers)
if err != nil {
return nil, err
}
task := func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(app.FileClassification.Cataloger.ScopeOpt)
if err != nil {
return nil, err
}
result, err := classifierCataloger.Catalog(resolver)
if err != nil {
return nil, err
}
results.FileClassifications = result
return nil, nil
}
return task, nil
}
func generateCatalogContentsTask(app *config.Application) (Task, error) {
if !app.FileContents.Cataloger.Enabled {
return nil, nil

View File

@ -1,38 +0,0 @@
package file
import (
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/source"
)
type ClassificationCataloger struct {
classifiers []Classifier
}
func NewClassificationCataloger(classifiers []Classifier) (*ClassificationCataloger, error) {
return &ClassificationCataloger{
classifiers: classifiers,
}, nil
}
func (i *ClassificationCataloger) Catalog(resolver source.FileResolver) (map[source.Coordinates][]Classification, error) {
results := make(map[source.Coordinates][]Classification)
numResults := 0
for _, location := range allRegularFiles(resolver) {
for _, classifier := range i.classifiers {
result, err := classifier.Classify(resolver, location)
if err != nil {
log.Warnf("file classification cataloger failed with class=%q at location=%+v: %+v", classifier.Class, location, err)
continue
}
if result != nil {
results[location.Coordinates] = append(results[location.Coordinates], *result)
numResults++
}
}
}
log.Debugf("file classifier discovered %d results", numResults)
return results, nil
}

View File

@ -1,210 +0,0 @@
package file
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/anchore/stereoscope/pkg/imagetest"
"github.com/anchore/syft/syft/source"
)
func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) {
tests := []struct {
name string
fixtureDir string
location string
expected []Classification
expectedErr func(assert.TestingT, error, ...interface{}) bool
}{
{
name: "positive-libpython3.7.so",
fixtureDir: "test-fixtures/classifiers/positive",
location: "libpython3.7.so",
expected: []Classification{
{
Class: "python-binary",
Metadata: map[string]string{
"version": "3.7.4a-vZ9",
},
},
},
expectedErr: assert.NoError,
},
{
name: "positive-python3.6",
fixtureDir: "test-fixtures/classifiers/positive",
location: "python3.6",
expected: []Classification{
{
Class: "python-binary",
Metadata: map[string]string{
"version": "3.6.3a-vZ9",
},
},
},
expectedErr: assert.NoError,
},
{
name: "positive-patchlevel.h",
fixtureDir: "test-fixtures/classifiers/positive",
location: "patchlevel.h",
expected: []Classification{
{
Class: "cpython-source",
Metadata: map[string]string{
"version": "3.9-aZ5",
},
},
},
expectedErr: assert.NoError,
},
{
name: "positive-go",
fixtureDir: "test-fixtures/classifiers/positive",
location: "go",
expected: []Classification{
{
Class: "go-binary",
Metadata: map[string]string{
"version": "1.14",
},
},
},
expectedErr: assert.NoError,
},
{
name: "positive-go-hint",
fixtureDir: "test-fixtures/classifiers/positive",
location: "VERSION",
expected: []Classification{
{
Class: "go-binary-hint",
Metadata: map[string]string{
"version": "1.15",
},
},
},
expectedErr: assert.NoError,
},
{
name: "positive-busybox",
fixtureDir: "test-fixtures/classifiers/positive",
location: "[", // note: busybox is a link to [
expected: []Classification{
{
Class: "busybox-binary",
VirtualPath: "busybox",
Metadata: map[string]string{
"version": "3.33.3",
},
},
},
expectedErr: assert.NoError,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
c, err := NewClassificationCataloger(DefaultClassifiers)
test.expectedErr(t, err)
src, err := source.NewFromDirectory(test.fixtureDir)
test.expectedErr(t, err)
resolver, err := src.FileResolver(source.SquashedScope)
test.expectedErr(t, err)
actualResults, err := c.Catalog(resolver)
test.expectedErr(t, err)
ok := false
for actualLoc, actualClassification := range actualResults {
if test.location == actualLoc.RealPath {
ok = true
assert.Equal(t, test.expected, actualClassification)
}
}
if !ok {
t.Fatalf("could not find test location=%q", test.location)
}
})
}
}
func TestClassifierCataloger_DefaultClassifiers_PositiveCases_Image(t *testing.T) {
tests := []struct {
name string
fixtureImage string
location string
expected []Classification
expectedErr func(assert.TestingT, error, ...interface{}) bool
}{
{
name: "busybox-regression",
fixtureImage: "image-busybox",
location: "/bin/[",
expected: []Classification{
{
Class: "busybox-binary",
VirtualPath: "/bin/busybox",
Metadata: map[string]string{
"version": "1.35.0",
},
},
},
expectedErr: assert.NoError,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
c, err := NewClassificationCataloger(DefaultClassifiers)
test.expectedErr(t, err)
img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureImage)
src, err := source.NewFromImage(img, "test-img")
test.expectedErr(t, err)
resolver, err := src.FileResolver(source.SquashedScope)
test.expectedErr(t, err)
actualResults, err := c.Catalog(resolver)
test.expectedErr(t, err)
ok := false
for actuaLoc, actualClassification := range actualResults {
if actuaLoc.RealPath == test.location {
ok = true
assert.Equal(t, test.expected, actualClassification)
}
}
if !ok {
t.Fatalf("could not find test location=%q", test.location)
}
})
}
}
func TestClassifierCataloger_DefaultClassifiers_NegativeCases(t *testing.T) {
c, err := NewClassificationCataloger(DefaultClassifiers)
assert.NoError(t, err)
src, err := source.NewFromDirectory("test-fixtures/classifiers/negative")
assert.NoError(t, err)
resolver, err := src.FileResolver(source.SquashedScope)
assert.NoError(t, err)
actualResults, err := c.Catalog(resolver)
assert.NoError(t, err)
assert.Equal(t, 0, len(actualResults))
}

View File

@ -1,153 +0,0 @@
package file
import (
"bytes"
"fmt"
"io"
"regexp"
"text/template"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/source"
)
var DefaultClassifiers = []Classifier{
{
Class: "python-binary",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(`(.*/|^)python(?P<version>[0-9]+\.[0-9]+)$`),
regexp.MustCompile(`(.*/|^)libpython(?P<version>[0-9]+\.[0-9]+).so.*$`),
},
EvidencePatternTemplates: []string{
`(?m)(?P<version>{{ .version }}\.[0-9]+[-_a-zA-Z0-9]*)`,
},
},
{
Class: "cpython-source",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(`(.*/|^)patchlevel.h$`),
},
EvidencePatternTemplates: []string{
`(?m)#define\s+PY_VERSION\s+"?(?P<version>[0-9\.\-_a-zA-Z]+)"?`,
},
},
{
Class: "go-binary",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(`(.*/|^)go$`),
},
EvidencePatternTemplates: []string{
`(?m)go(?P<version>[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`,
},
},
{
Class: "nodejs-binary",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(`(.*/|^)node$`),
},
EvidencePatternTemplates: []string{
// regex that matches node.js/vx.y.z
`(?m)node\.js\/v(?P<version>[0-9]+\.[0-9]+\.[0-9]+)`,
},
},
{
Class: "go-binary-hint",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(`(.*/|^)VERSION$`),
},
EvidencePatternTemplates: []string{
`(?m)go(?P<version>[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`,
},
},
{
Class: "busybox-binary",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(`(.*/|^)busybox$`),
},
EvidencePatternTemplates: []string{
`(?m)BusyBox\s+v(?P<version>[0-9]+\.[0-9]+\.[0-9]+)`,
},
},
}
type Classifier struct {
Class string
FilepathPatterns []*regexp.Regexp
EvidencePatternTemplates []string
}
type Classification struct {
Class string `json:"class"`
VirtualPath string `json:"virtual_path"`
Metadata map[string]string `json:"metadata"`
}
func (c Classifier) Classify(resolver source.FileResolver, location source.Location) (*Classification, error) {
doesFilepathMatch, filepathNamedGroupValues := FilepathMatches(c.FilepathPatterns, location)
if !doesFilepathMatch {
return nil, nil
}
contentReader, err := resolver.FileContentsByLocation(location)
if err != nil {
return nil, err
}
defer internal.CloseAndLogError(contentReader, location.VirtualPath)
// TODO: there is room for improvement here, as this may use an excessive amount of memory. Alternate approach is to leverage a RuneReader.
contents, err := io.ReadAll(contentReader)
if err != nil {
return nil, err
}
var result *Classification
for _, patternTemplate := range c.EvidencePatternTemplates {
tmpl, err := template.New("").Parse(patternTemplate)
if err != nil {
return nil, fmt.Errorf("unable to parse classifier template=%q : %w", patternTemplate, err)
}
patternBuf := &bytes.Buffer{}
err = tmpl.Execute(patternBuf, filepathNamedGroupValues)
if err != nil {
return nil, fmt.Errorf("unable to render template: %w", err)
}
pattern, err := regexp.Compile(patternBuf.String())
if err != nil {
return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err)
}
if !pattern.Match(contents) {
continue
}
matchMetadata := internal.MatchNamedCaptureGroups(pattern, string(contents))
if result == nil {
result = &Classification{
Class: c.Class,
VirtualPath: location.VirtualPath,
Metadata: matchMetadata,
}
} else {
for key, value := range matchMetadata {
result.Metadata[key] = value
}
}
}
return result, nil
}
func FilepathMatches(patterns []*regexp.Regexp, location source.Location) (bool, map[string]string) {
for _, path := range []string{location.RealPath, location.VirtualPath} {
if path == "" {
continue
}
for _, pattern := range patterns {
if pattern.MatchString(path) {
return true, internal.MatchNamedCaptureGroups(pattern, path)
}
}
}
return false, nil
}

View File

@ -1,97 +0,0 @@
package file
import (
"regexp"
"testing"
"github.com/stretchr/testify/assert"
"github.com/anchore/syft/syft/source"
)
func TestFilepathMatches(t *testing.T) {
tests := []struct {
name string
location source.Location
patterns []string
expectedMatches bool
expectedNamedGroups map[string]string
}{
{
name: "simple-filename-match",
location: source.Location{
Coordinates: source.Coordinates{
RealPath: "python2.7",
},
},
patterns: []string{
`python([0-9]+\.[0-9]+)$`,
},
expectedMatches: true,
},
{
name: "filepath-match",
location: source.Location{
Coordinates: source.Coordinates{
RealPath: "/usr/bin/python2.7",
},
},
patterns: []string{
`python([0-9]+\.[0-9]+)$`,
},
expectedMatches: true,
},
{
name: "virtual-filepath-match",
location: source.Location{
VirtualPath: "/usr/bin/python2.7",
},
patterns: []string{
`python([0-9]+\.[0-9]+)$`,
},
expectedMatches: true,
},
{
name: "full-filepath-match",
location: source.Location{
VirtualPath: "/usr/bin/python2.7",
},
patterns: []string{
`.*/bin/python([0-9]+\.[0-9]+)$`,
},
expectedMatches: true,
},
{
name: "anchored-filename-match-FAILS",
location: source.Location{
Coordinates: source.Coordinates{
RealPath: "/usr/bin/python2.7",
},
},
patterns: []string{
`^python([0-9]+\.[0-9]+)$`,
},
expectedMatches: false,
},
{
name: "empty-filename-match-FAILS",
location: source.Location{},
patterns: []string{
`^python([0-9]+\.[0-9]+)$`,
},
expectedMatches: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
var patterns []*regexp.Regexp
for _, p := range test.patterns {
patterns = append(patterns, regexp.MustCompile(p))
}
actualMatches, actualNamedGroups := FilepathMatches(patterns, test.location)
assert.Equal(t, test.expectedMatches, actualMatches)
assert.Equal(t, test.expectedNamedGroups, actualNamedGroups)
})
}
}

View File

@ -6,12 +6,11 @@ import (
)
type File struct {
ID string `json:"id"`
Location source.Coordinates `json:"location"`
Metadata *FileMetadataEntry `json:"metadata,omitempty"`
Contents string `json:"contents,omitempty"`
Digests []file.Digest `json:"digests,omitempty"`
Classifications []file.Classification `json:"classifications,omitempty"`
ID string `json:"id"`
Location source.Coordinates `json:"location"`
Metadata *FileMetadataEntry `json:"metadata,omitempty"`
Contents string `json:"contents,omitempty"`
Digests []file.Digest `json:"digests,omitempty"`
}
type FileMetadataEntry struct {

View File

@ -102,23 +102,17 @@ func toFile(s sbom.SBOM) []model.File {
digests = digestsForLocation
}
var classifications []file.Classification
if classificationsForLocation, exists := artifacts.FileClassifications[coordinates]; exists {
classifications = classificationsForLocation
}
var contents string
if contentsForLocation, exists := artifacts.FileContents[coordinates]; exists {
contents = contentsForLocation
}
results = append(results, model.File{
ID: string(coordinates.ID()),
Location: coordinates,
Metadata: toFileMetadataEntry(coordinates, metadata),
Digests: digests,
Classifications: classifications,
Contents: contents,
ID: string(coordinates.ID()),
Location: coordinates,
Metadata: toFileMetadataEntry(coordinates, metadata),
Digests: digests,
Contents: contents,
})
}

View File

@ -0,0 +1,78 @@
package binary
import (
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
)
const catalogerName = "binary-cataloger"
func NewCataloger() *Cataloger {
return &Cataloger{}
}
// Cataloger is the cataloger responsible for surfacing evidence of a very limited set of binary files,
// which have been identified by the classifiers. The Cataloger is _NOT_ a place to catalog any and every
// binary, but rather the specific set that has been curated to be important, predominantly related to toolchain-
// related runtimes like Python, Go, Java, or Node. Some exceptions can be made for widely-used binaries such
// as busybox.
type Cataloger struct{}
// Name returns a string that uniquely describes the Cataloger
func (c Cataloger) Name() string {
return catalogerName
}
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages
// after analyzing the catalog source.
func (c Cataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, []artifact.Relationship, error) {
var packages []pkg.Package
var relationships []artifact.Relationship
for _, classifier := range defaultClassifiers {
locations, err := resolver.FilesByGlob(classifier.FileGlob)
if err != nil {
return nil, nil, err
}
for _, location := range locations {
reader, err := resolver.FileContentsByLocation(location)
if err != nil {
return nil, nil, err
}
locationReader := source.NewLocationReadCloser(location, reader)
newPkgs, err := classifier.EvidenceMatcher(classifier, locationReader)
if err != nil {
return nil, nil, err
}
newPackages:
for i := range newPkgs {
newPkg := &newPkgs[i]
for j := range packages {
p := &packages[j]
// consolidate identical packages found in different locations,
// but continue to track each location
if packagesMatch(p, newPkg) {
p.Locations.Add(newPkg.Locations.ToSlice()...)
continue newPackages
}
}
packages = append(packages, *newPkg)
}
}
}
return packages, relationships, nil
}
// packagesMatch returns true if the binary packages "match" based on basic criteria
func packagesMatch(p1 *pkg.Package, p2 *pkg.Package) bool {
if p1.Name != p2.Name ||
p1.Version != p2.Version ||
p1.Language != p2.Language ||
p1.Type != p2.Type {
return false
}
return true
}

View File

@ -0,0 +1,201 @@
package binary
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/anchore/stereoscope/pkg/imagetest"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
)
func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) {
tests := []struct {
name string
fixtureDir string
expected pkg.Package
}{
{
name: "positive-libpython3.7.so",
fixtureDir: "test-fixtures/classifiers/positive",
expected: pkg.Package{
Name: "python",
Version: "3.7.4a-vZ9",
Locations: singleLocation("libpython3.7.so"),
Metadata: pkg.BinaryMetadata{
Classifier: "python-binary-lib",
},
},
},
{
name: "positive-python3.6",
fixtureDir: "test-fixtures/classifiers/positive",
expected: pkg.Package{
Name: "python",
Version: "3.6.3a-vZ9",
Locations: singleLocation("python3.6"),
Metadata: pkg.BinaryMetadata{
Classifier: "python-binary",
},
},
},
{
name: "positive-patchlevel.h",
fixtureDir: "test-fixtures/classifiers/positive",
expected: pkg.Package{
Name: "python",
Version: "3.9-aZ5",
Locations: singleLocation("patchlevel.h"),
Metadata: pkg.BinaryMetadata{
Classifier: "cpython-source",
},
},
},
{
name: "positive-go",
fixtureDir: "test-fixtures/classifiers/positive",
expected: pkg.Package{
Name: "go",
Version: "1.14",
Locations: singleLocation("go"),
Metadata: pkg.BinaryMetadata{
Classifier: "go-binary",
},
},
},
{
name: "positive-go-hint",
fixtureDir: "test-fixtures/classifiers/positive",
expected: pkg.Package{
Name: "go",
Version: "1.15",
Locations: singleLocation("VERSION"),
Metadata: pkg.BinaryMetadata{
Classifier: "go-binary-hint",
},
},
},
{
name: "positive-busybox",
fixtureDir: "test-fixtures/classifiers/positive",
expected: pkg.Package{
Name: "busybox",
Version: "3.33.3",
Locations: singleLocation("["), // note: busybox is a link to [
Metadata: pkg.BinaryMetadata{
Classifier: "busybox-binary",
VirtualPath: "busybox",
},
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
c := NewCataloger()
src, err := source.NewFromDirectory(test.fixtureDir)
require.NoError(t, err)
resolver, err := src.FileResolver(source.SquashedScope)
require.NoError(t, err)
packages, _, err := c.Catalog(resolver)
require.NoError(t, err)
ok := false
for _, p := range packages {
if test.expected.Locations.ToSlice()[0].RealPath == p.Locations.ToSlice()[0].RealPath {
ok = true
assertPackagesAreEqual(t, test.expected, p)
}
}
if !ok {
t.Fatalf("could not find test location=%q", test.expected.Locations.ToSlice()[0].RealPath)
}
})
}
}
func TestClassifierCataloger_DefaultClassifiers_PositiveCases_Image(t *testing.T) {
tests := []struct {
name string
fixtureImage string
expected pkg.Package
}{
{
name: "busybox-regression",
fixtureImage: "image-busybox",
expected: pkg.Package{
Name: "busybox",
Version: "1.35.0",
Locations: singleLocation("/bin/["),
Metadata: pkg.BinaryMetadata{
Classifier: "busybox-binary",
VirtualPath: "/bin/busybox",
},
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
c := NewCataloger()
img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureImage)
src, err := source.NewFromImage(img, "test-img")
require.NoError(t, err)
resolver, err := src.FileResolver(source.SquashedScope)
require.NoError(t, err)
packages, _, err := c.Catalog(resolver)
require.NoError(t, err)
ok := false
for _, p := range packages {
if test.expected.Locations.ToSlice()[0].RealPath == p.Locations.ToSlice()[0].RealPath {
ok = true
assertPackagesAreEqual(t, test.expected, p)
}
}
if !ok {
t.Fatalf("could not find test location=%q", test.expected.Locations.ToSlice()[0].RealPath)
}
})
}
}
func TestClassifierCataloger_DefaultClassifiers_NegativeCases(t *testing.T) {
c := NewCataloger()
src, err := source.NewFromDirectory("test-fixtures/classifiers/negative")
assert.NoError(t, err)
resolver, err := src.FileResolver(source.SquashedScope)
assert.NoError(t, err)
actualResults, _, err := c.Catalog(resolver)
assert.NoError(t, err)
assert.Equal(t, 0, len(actualResults))
}
func singleLocation(s string) source.LocationSet {
return source.NewLocationSet(source.NewLocation(s))
}
func assertPackagesAreEqual(t *testing.T, expected pkg.Package, p pkg.Package) {
meta1 := expected.Metadata.(pkg.BinaryMetadata)
meta2 := p.Metadata.(pkg.BinaryMetadata)
if expected.Name != p.Name ||
expected.Version != p.Version ||
meta1.Classifier != meta2.Classifier {
assert.Failf(t, "packages not equal", "%v != %v", expected, p)
}
}

View File

@ -0,0 +1,180 @@
package binary
import (
"bytes"
"fmt"
"io"
"reflect"
"regexp"
"text/template"
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/unionreader"
"github.com/anchore/syft/syft/source"
)
var emptyPURL = packageurl.PackageURL{}
// classifier is a generic package classifier that can be used to match a package definition
// to a file that meets the given content criteria of the evidenceMatcher.
type classifier struct {
Class string
// FileGlob is a selector to narrow down file inspection using the **/glob* syntax
FileGlob string
// EvidenceMatcher is what will be used to match against the file in the source
// location. If the matcher returns a package, the file will be considered a candidate.
EvidenceMatcher evidenceMatcher
// Information below is used to specify the Package information when returned
// Package is the name to use for the package
Package string
// Language is the language to classify this package as
Language pkg.Language
// Type is the package type to use for the package
Type pkg.Type
// PURL is the Package URL to use when generating a package
PURL packageurl.PackageURL
// CPEs are the specific CPEs we want to include for this binary with updated version information
CPEs []pkg.CPE
}
// evidenceMatcher is a function called to catalog Packages that match some sort of evidence
type evidenceMatcher func(classifier classifier, reader source.LocationReadCloser) ([]pkg.Package, error)
func fileNameTemplateVersionMatcher(fileNamePattern string, contentTemplate string) evidenceMatcher {
pat := regexp.MustCompile(fileNamePattern)
return func(classifier classifier, reader source.LocationReadCloser) ([]pkg.Package, error) {
if !pat.MatchString(reader.RealPath) {
return nil, nil
}
filepathNamedGroupValues := internal.MatchNamedCaptureGroups(pat, reader.RealPath)
tmpl, err := template.New("").Parse(contentTemplate)
if err != nil {
return nil, fmt.Errorf("unable to parse classifier template=%q : %w", contentTemplate, err)
}
patternBuf := &bytes.Buffer{}
err = tmpl.Execute(patternBuf, filepathNamedGroupValues)
if err != nil {
return nil, fmt.Errorf("unable to render template: %w", err)
}
tmplPattern, err := regexp.Compile(patternBuf.String())
if err != nil {
return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err)
}
contents, err := getContents(reader)
if err != nil {
return nil, fmt.Errorf("unable to get read contents for file: %w", err)
}
matchMetadata := internal.MatchNamedCaptureGroups(tmplPattern, string(contents))
version, ok := matchMetadata["version"]
if ok {
return singlePackage(classifier, reader, version), nil
}
return nil, nil
}
}
func fileContentsVersionMatcher(pattern string) evidenceMatcher {
pat := regexp.MustCompile(pattern)
return func(classifier classifier, reader source.LocationReadCloser) ([]pkg.Package, error) {
contents, err := getContents(reader)
if err != nil {
return nil, fmt.Errorf("unable to get read contents for file: %w", err)
}
matchMetadata := internal.MatchNamedCaptureGroups(pat, string(contents))
version, ok := matchMetadata["version"]
if ok {
return singlePackage(classifier, reader, version), nil
}
return nil, nil
}
}
func mustPURL(purl string) packageurl.PackageURL {
p, err := packageurl.FromString(purl)
if err != nil {
panic(fmt.Sprintf("invalid PURL: %s", p))
}
return p
}
func singlePackage(classifier classifier, reader source.LocationReadCloser, version string) []pkg.Package {
var cpes []pkg.CPE
for _, cpe := range classifier.CPEs {
cpe.Version = version
cpes = append(cpes, cpe)
}
p := pkg.Package{
Name: classifier.Package,
Version: version,
Language: pkg.Binary,
Locations: source.NewLocationSet(reader.Location),
Type: pkg.BinaryPkg,
CPEs: cpes,
MetadataType: pkg.BinaryMetadataType,
Metadata: pkg.BinaryMetadata{
Classifier: classifier.Class,
RealPath: reader.RealPath,
VirtualPath: reader.VirtualPath,
},
}
if classifier.Type != "" {
p.Type = classifier.Type
}
if !reflect.DeepEqual(classifier.PURL, emptyPURL) {
purl := classifier.PURL
purl.Version = version
p.PURL = purl.ToString()
}
if classifier.Language != "" {
p.Language = classifier.Language
}
p.SetID()
return []pkg.Package{p}
}
func getContents(reader source.LocationReadCloser) ([]byte, error) {
unionReader, err := unionreader.GetUnionReader(reader.ReadCloser)
if err != nil {
return nil, fmt.Errorf("unable to get union reader for file: %w", err)
}
// TODO: there may be room for improvement here, as this may use an excessive amount of memory. Alternate approach is to leverage a RuneReader.
contents, err := io.ReadAll(unionReader)
if err != nil {
return nil, fmt.Errorf("unable to get contents for file: %w", err)
}
return contents, nil
}
// singleCPE returns a []pkg.CPE based on the cpe string or panics if the CPE is invalid
func singleCPE(cpe string) []pkg.CPE {
return []pkg.CPE{
pkg.MustCPE(cpe),
}
}

View File

@ -1,7 +1,6 @@
package generic
package binary
import (
"regexp"
"testing"
"github.com/stretchr/testify/require"
@ -14,35 +13,27 @@ func Test_ClassifierCPEs(t *testing.T) {
tests := []struct {
name string
fixture string
classifier Classifier
classifier classifier
cpes []string
}{
{
name: "no CPEs",
fixture: "test-fixtures/version.txt",
classifier: Classifier{
Package: "some-app",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(".*/version.txt"),
},
EvidencePatterns: []*regexp.Regexp{
regexp.MustCompile(`(?m)my-verison:(?P<version>[0-9.]+)`),
},
CPEs: []pkg.CPE{},
classifier: classifier{
Package: "some-app",
FileGlob: ".*/version.txt",
EvidenceMatcher: fileContentsVersionMatcher(`(?m)my-verison:(?P<version>[0-9.]+)`),
CPEs: []pkg.CPE{},
},
cpes: nil,
},
{
name: "one CPE",
fixture: "test-fixtures/version.txt",
classifier: Classifier{
Package: "some-app",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(".*/version.txt"),
},
EvidencePatterns: []*regexp.Regexp{
regexp.MustCompile(`(?m)my-verison:(?P<version>[0-9.]+)`),
},
classifier: classifier{
Package: "some-app",
FileGlob: ".*/version.txt",
EvidenceMatcher: fileContentsVersionMatcher(`(?m)my-verison:(?P<version>[0-9.]+)`),
CPEs: []pkg.CPE{
pkg.MustCPE("cpe:2.3:a:some:app:*:*:*:*:*:*:*:*"),
},
@ -54,14 +45,10 @@ func Test_ClassifierCPEs(t *testing.T) {
{
name: "multiple CPEs",
fixture: "test-fixtures/version.txt",
classifier: Classifier{
Package: "some-app",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(".*/version.txt"),
},
EvidencePatterns: []*regexp.Regexp{
regexp.MustCompile(`(?m)my-verison:(?P<version>[0-9.]+)`),
},
classifier: classifier{
Package: "some-app",
FileGlob: ".*/version.txt",
EvidenceMatcher: fileContentsVersionMatcher(`(?m)my-verison:(?P<version>[0-9.]+)`),
CPEs: []pkg.CPE{
pkg.MustCPE("cpe:2.3:a:some:app:*:*:*:*:*:*:*:*"),
pkg.MustCPE("cpe:2.3:a:some:apps:*:*:*:*:*:*:*:*"),
@ -83,9 +70,13 @@ func Test_ClassifierCPEs(t *testing.T) {
location := locations[0]
readCloser, err := resolver.FileContentsByLocation(location)
require.NoError(t, err)
p, _, err := test.classifier.Examine(source.NewLocationReadCloser(location, readCloser))
pkgs, err := test.classifier.EvidenceMatcher(test.classifier, source.NewLocationReadCloser(location, readCloser))
require.NoError(t, err)
require.Len(t, pkgs, 1)
p := pkgs[0]
var cpes []string
for _, c := range p.CPEs {
cpes = append(cpes, pkg.CPEString(c))

View File

@ -0,0 +1,60 @@
package binary
import "github.com/anchore/syft/syft/pkg"
var defaultClassifiers = []classifier{
{
Class: "python-binary",
FileGlob: "**/python*",
EvidenceMatcher: fileNameTemplateVersionMatcher(
`(.*/|^)python(?P<version>[0-9]+\.[0-9]+)$`,
`(?m)(?P<version>{{ .version }}\.[0-9]+[-_a-zA-Z0-9]*)`),
Package: "python",
},
{
Class: "python-binary-lib",
FileGlob: "**/libpython*.so*",
EvidenceMatcher: fileNameTemplateVersionMatcher(
`(.*/|^)libpython(?P<version>[0-9]+\.[0-9]+).so.*$`,
`(?m)(?P<version>{{ .version }}\.[0-9]+[-_a-zA-Z0-9]*)`),
Package: "python",
},
{
Class: "cpython-source",
FileGlob: "**/patchlevel.h",
EvidenceMatcher: fileContentsVersionMatcher(
`(?m)#define\s+PY_VERSION\s+"?(?P<version>[0-9\.\-_a-zA-Z]+)"?`),
Package: "python",
},
{
Class: "go-binary",
FileGlob: "**/go",
EvidenceMatcher: fileContentsVersionMatcher(
`(?m)go(?P<version>[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`),
Package: "go",
},
{
Class: "nodejs-binary",
FileGlob: "**/node",
EvidenceMatcher: fileContentsVersionMatcher(
`(?m)node\.js\/v(?P<version>[0-9]+\.[0-9]+\.[0-9]+)`),
Package: "node.js",
Language: pkg.JavaScript,
PURL: mustPURL("pkg:generic/node@version"),
CPEs: singleCPE("cpe:2.3:a:nodejs:node.js:*:*:*:*:*:*:*:*"),
},
{
Class: "go-binary-hint",
FileGlob: "**/VERSION",
EvidenceMatcher: fileContentsVersionMatcher(
`(?m)go(?P<version>[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`),
Package: "go",
},
{
Class: "busybox-binary",
FileGlob: "**/busybox",
EvidenceMatcher: fileContentsVersionMatcher(
`(?m)BusyBox\s+v(?P<version>[0-9]+\.[0-9]+\.[0-9]+)`),
Package: "busybox",
},
}

View File

@ -12,6 +12,7 @@ import (
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/alpm"
"github.com/anchore/syft/syft/pkg/cataloger/apkdb"
"github.com/anchore/syft/syft/pkg/cataloger/binary"
"github.com/anchore/syft/syft/pkg/cataloger/cpp"
"github.com/anchore/syft/syft/pkg/cataloger/dart"
"github.com/anchore/syft/syft/pkg/cataloger/deb"
@ -40,7 +41,6 @@ func ImageCatalogers(cfg Config) []pkg.Cataloger {
python.NewPythonPackageCataloger(),
php.NewPHPComposerInstalledCataloger(),
javascript.NewJavascriptPackageCataloger(),
javascript.NewNodeBinaryCataloger(),
deb.NewDpkgdbCataloger(),
rpm.NewRpmDBCataloger(),
java.NewJavaCataloger(cfg.Java()),
@ -49,6 +49,7 @@ func ImageCatalogers(cfg Config) []pkg.Cataloger {
dotnet.NewDotnetDepsCataloger(),
portage.NewPortageCataloger(),
sbom.NewSBOMCataloger(),
binary.NewCataloger(),
}, cfg.Catalogers)
}
@ -61,7 +62,6 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger {
python.NewPythonPackageCataloger(),
php.NewPHPComposerLockCataloger(),
javascript.NewJavascriptLockCataloger(),
javascript.NewNodeBinaryCataloger(),
deb.NewDpkgdbCataloger(),
rpm.NewRpmDBCataloger(),
rpm.NewFileCataloger(),
@ -78,6 +78,7 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger {
portage.NewPortageCataloger(),
haskell.NewHackageCataloger(),
sbom.NewSBOMCataloger(),
binary.NewCataloger(),
}, cfg.Catalogers)
}
@ -91,7 +92,6 @@ func AllCatalogers(cfg Config) []pkg.Cataloger {
python.NewPythonPackageCataloger(),
javascript.NewJavascriptLockCataloger(),
javascript.NewJavascriptPackageCataloger(),
javascript.NewNodeBinaryCataloger(),
deb.NewDpkgdbCataloger(),
rpm.NewRpmDBCataloger(),
rpm.NewFileCataloger(),
@ -111,6 +111,7 @@ func AllCatalogers(cfg Config) []pkg.Cataloger {
portage.NewPortageCataloger(),
haskell.NewHackageCataloger(),
sbom.NewSBOMCataloger(),
binary.NewCataloger(),
}, cfg.Catalogers)
}

View File

@ -1,100 +0,0 @@
package generic
import (
"fmt"
"io"
"path"
"regexp"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/unionreader"
"github.com/anchore/syft/syft/source"
)
// Classifier is a generic package classifier that can be used to match a package definition
// to a file that meets the given content criteria of the EvidencePatternTemplates.
type Classifier struct {
Package string
// FilepathPatterns is a list of regular expressions that will be used to match against the file path of a given
// source location. If any of the patterns match, the file will be considered a candidate for parsing.
// If no patterns are provided, the reader is automatically considered a candidate.
FilepathPatterns []*regexp.Regexp
// EvidencePatterns is a list of regular expressions that will be used to match against the file contents of a
// given file in the source location. If any of the patterns match, the file will be considered a candidate for parsing.
EvidencePatterns []*regexp.Regexp
// CPEs are the specific CPEs we want to include for this binary with updated version information
CPEs []pkg.CPE
}
func (c Classifier) Examine(reader source.LocationReadCloser) (p *pkg.Package, r *artifact.Relationship, err error) {
doesFilepathMatch := true
if len(c.FilepathPatterns) > 0 {
doesFilepathMatch, _ = file.FilepathMatches(c.FilepathPatterns, reader.Location)
}
if !doesFilepathMatch {
return nil, nil, fmt.Errorf("location: %s did not match any patterns for package=%q", reader.Location, c.Package)
}
contents, err := getContents(reader)
if err != nil {
return nil, nil, fmt.Errorf("unable to get read contents for file: %w", err)
}
var classifiedPackage *pkg.Package
for _, evidencePattern := range c.EvidencePatterns {
if !evidencePattern.Match(contents) {
continue
}
matchMetadata := internal.MatchNamedCaptureGroups(evidencePattern, string(contents))
version, ok := matchMetadata["version"]
if !ok {
log.Debugf("no version found in binary from pattern %v", evidencePattern)
continue
}
var cpes []pkg.CPE
for _, cpe := range c.CPEs {
cpe.Version = version
if err == nil {
cpes = append(cpes, cpe)
}
}
classifiedPackage = &pkg.Package{
Name: path.Base(reader.VirtualPath),
Version: version,
Language: pkg.Binary,
Locations: source.NewLocationSet(reader.Location),
Type: pkg.BinaryPkg,
CPEs: cpes,
MetadataType: pkg.BinaryMetadataType,
Metadata: pkg.BinaryMetadata{
Classifier: c.Package,
RealPath: reader.RealPath,
VirtualPath: reader.VirtualPath,
},
}
break
}
return classifiedPackage, nil, nil
}
func getContents(reader source.LocationReadCloser) ([]byte, error) {
unionReader, err := unionreader.GetUnionReader(reader.ReadCloser)
if err != nil {
return nil, fmt.Errorf("unable to get union reader for file: %w", err)
}
contents, err := io.ReadAll(unionReader)
if err != nil {
return nil, fmt.Errorf("unable to get contents for file: %w", err)
}
return contents, nil
}

View File

@ -4,7 +4,6 @@ Package javascript provides a concrete Cataloger implementation for JavaScript e
package javascript
import (
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
@ -20,8 +19,3 @@ func NewJavascriptLockCataloger() *generic.Cataloger {
WithParserByGlobs(parseYarnLock, "**/yarn.lock").
WithParserByGlobs(parsePnpmLock, "**/pnpm-lock.yaml")
}
func NewNodeBinaryCataloger() *generic.Cataloger {
return generic.NewCataloger("node-binary-cataloger").
WithParserByMimeTypes(parseNodeBinary, internal.ExecutableMIMETypeSet.List()...)
}

View File

@ -1,43 +0,0 @@
package javascript
import (
"regexp"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/anchore/syft/syft/source"
)
var nodeClassifier = generic.Classifier{
Package: "node.js", // Note: this purposely matches the "node.js" string to aid nvd vuln matching
FilepathPatterns: []*regexp.Regexp{
// note: should we just parse all files resolved with executable mimetypes
// regexp that matches node binary
regexp.MustCompile(`(.*/|^)node$`),
},
EvidencePatterns: []*regexp.Regexp{
// regex that matches node.js/vx.y.z
regexp.MustCompile(`(?m)node\.js\/v(?P<version>[0-9]+\.[0-9]+\.[0-9]+)`),
},
CPEs: []pkg.CPE{
pkg.MustCPE("cpe:2.3:a:nodejs:node.js:*:*:*:*:*:*:*:*"),
},
}
func parseNodeBinary(_ source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
p, _, err := nodeClassifier.Examine(reader)
if err != nil {
log.Trace("failed to find node.js package: %+v", err)
return nil, nil, nil // we can silently fail here to reduce warning noise
}
// TODO add node specific metadata to the packages to help with vulnerability matching
if p != nil {
p.Language = pkg.JavaScript
p.SetID()
return []pkg.Package{*p}, nil, nil
}
return nil, nil, nil
}

View File

@ -0,0 +1,7 @@
package pkg
type FileMetadata struct {
Classifier string `mapstructure:"Classifier" json:"classifier"`
RealPath string `mapstructure:"RealPath" json:"realPath"`
VirtualPath string `mapstructure:"VirtualPath" json:"virtualPath"`
}

View File

@ -25,6 +25,7 @@ const (
CPP Language = "c++"
Haskell Language = "haskell"
Binary Language = "binary"
File Language = "file"
)
// AllLanguages is a set of all programming languages detected by syft.

View File

@ -18,13 +18,12 @@ type SBOM struct {
}
type Artifacts struct {
PackageCatalog *pkg.Catalog
FileMetadata map[source.Coordinates]source.FileMetadata
FileDigests map[source.Coordinates][]file.Digest
FileClassifications map[source.Coordinates][]file.Classification
FileContents map[source.Coordinates]string
Secrets map[source.Coordinates][]file.SearchResult
LinuxDistribution *linux.Release
PackageCatalog *pkg.Catalog
FileMetadata map[source.Coordinates]source.FileMetadata
FileDigests map[source.Coordinates][]file.Digest
FileContents map[source.Coordinates]string
Secrets map[source.Coordinates][]file.SearchResult
LinuxDistribution *linux.Release
}
type Descriptor struct {
@ -55,9 +54,6 @@ func (s SBOM) AllCoordinates() []source.Coordinates {
for coordinates := range s.Artifacts.FileContents {
set.Add(coordinates)
}
for coordinates := range s.Artifacts.FileClassifications {
set.Add(coordinates)
}
for coordinates := range s.Artifacts.FileDigests {
set.Add(coordinates)
}

View File

@ -105,7 +105,7 @@ func TestPackagesCmdFlags(t *testing.T) {
name: "squashed-scope-flag-hidden-packages",
args: []string{"packages", "-o", "json", "-s", "squashed", hiddenPackagesImage},
assertions: []traitAssertion{
assertPackageCount(162),
assertPackageCount(163),
assertNotInOutput("vsftpd"), // hidden package
assertSuccessfulReturnCode,
},
@ -114,7 +114,7 @@ func TestPackagesCmdFlags(t *testing.T) {
name: "all-layers-scope-flag",
args: []string{"packages", "-o", "json", "-s", "all-layers", hiddenPackagesImage},
assertions: []traitAssertion{
assertPackageCount(163), // packages are now deduplicated for this case
assertPackageCount(164), // packages are now deduplicated for this case
assertInOutput("all-layers"),
assertInOutput("vsftpd"), // hidden package
assertSuccessfulReturnCode,
@ -127,7 +127,7 @@ func TestPackagesCmdFlags(t *testing.T) {
"SYFT_PACKAGE_CATALOGER_SCOPE": "all-layers",
},
assertions: []traitAssertion{
assertPackageCount(163), // packages are now deduplicated for this case
assertPackageCount(164), // packages are now deduplicated for this case
assertInOutput("all-layers"),
assertInOutput("vsftpd"), // hidden package
assertSuccessfulReturnCode,

View File

@ -9,6 +9,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
)
@ -21,7 +22,7 @@ func TestPackageDeduplication(t *testing.T) {
}{
{
scope: source.AllLayersScope,
packageCount: 172, // without deduplication this would be 618
packageCount: 173, // without deduplication this would be 618
instanceCount: map[string]int{
"basesystem": 1,
"wget": 1,
@ -40,7 +41,7 @@ func TestPackageDeduplication(t *testing.T) {
},
{
scope: source.SquashedScope,
packageCount: 170,
packageCount: 171,
instanceCount: map[string]int{
"basesystem": 1,
"wget": 1,
@ -62,6 +63,12 @@ func TestPackageDeduplication(t *testing.T) {
t.Run(string(tt.scope), func(t *testing.T) {
sbom, _ := catalogFixtureImage(t, "image-vertical-package-dups", tt.scope, nil)
for _, p := range sbom.Artifacts.PackageCatalog.Sorted() {
if p.Type == pkg.BinaryPkg {
assert.NotEmpty(t, p.Name)
}
}
assert.Equal(t, tt.packageCount, sbom.Artifacts.PackageCatalog.PackageCount())
for name, expectedInstanceCount := range tt.instanceCount {
pkgs := sbom.Artifacts.PackageCatalog.PackagesByName(name)