mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 08:23:15 +01:00
feat: Generic Binary Cataloger (#1336)
This commit is contained in:
parent
7a69e2129b
commit
4f39287216
@ -22,7 +22,6 @@ func Tasks(app *config.Application) ([]Task, error) {
|
||||
generateCatalogFileMetadataTask,
|
||||
generateCatalogFileDigestsTask,
|
||||
generateCatalogSecretsTask,
|
||||
generateCatalogFileClassificationsTask,
|
||||
generateCatalogContentsTask,
|
||||
}
|
||||
|
||||
@ -162,34 +161,6 @@ func generateCatalogSecretsTask(app *config.Application) (Task, error) {
|
||||
return task, nil
|
||||
}
|
||||
|
||||
func generateCatalogFileClassificationsTask(app *config.Application) (Task, error) {
|
||||
if !app.FileClassification.Cataloger.Enabled {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// TODO: in the future we could expose out the classifiers via configuration
|
||||
classifierCataloger, err := file.NewClassificationCataloger(file.DefaultClassifiers)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
task := func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
|
||||
resolver, err := src.FileResolver(app.FileClassification.Cataloger.ScopeOpt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
result, err := classifierCataloger.Catalog(resolver)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
results.FileClassifications = result
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return task, nil
|
||||
}
|
||||
|
||||
func generateCatalogContentsTask(app *config.Application) (Task, error) {
|
||||
if !app.FileContents.Cataloger.Enabled {
|
||||
return nil, nil
|
||||
|
||||
@ -1,38 +0,0 @@
|
||||
package file
|
||||
|
||||
import (
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
type ClassificationCataloger struct {
|
||||
classifiers []Classifier
|
||||
}
|
||||
|
||||
func NewClassificationCataloger(classifiers []Classifier) (*ClassificationCataloger, error) {
|
||||
return &ClassificationCataloger{
|
||||
classifiers: classifiers,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (i *ClassificationCataloger) Catalog(resolver source.FileResolver) (map[source.Coordinates][]Classification, error) {
|
||||
results := make(map[source.Coordinates][]Classification)
|
||||
|
||||
numResults := 0
|
||||
for _, location := range allRegularFiles(resolver) {
|
||||
for _, classifier := range i.classifiers {
|
||||
result, err := classifier.Classify(resolver, location)
|
||||
if err != nil {
|
||||
log.Warnf("file classification cataloger failed with class=%q at location=%+v: %+v", classifier.Class, location, err)
|
||||
continue
|
||||
}
|
||||
if result != nil {
|
||||
results[location.Coordinates] = append(results[location.Coordinates], *result)
|
||||
numResults++
|
||||
}
|
||||
}
|
||||
}
|
||||
log.Debugf("file classifier discovered %d results", numResults)
|
||||
|
||||
return results, nil
|
||||
}
|
||||
@ -1,210 +0,0 @@
|
||||
package file
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"github.com/anchore/stereoscope/pkg/imagetest"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
fixtureDir string
|
||||
location string
|
||||
expected []Classification
|
||||
expectedErr func(assert.TestingT, error, ...interface{}) bool
|
||||
}{
|
||||
{
|
||||
name: "positive-libpython3.7.so",
|
||||
fixtureDir: "test-fixtures/classifiers/positive",
|
||||
location: "libpython3.7.so",
|
||||
expected: []Classification{
|
||||
{
|
||||
Class: "python-binary",
|
||||
Metadata: map[string]string{
|
||||
"version": "3.7.4a-vZ9",
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedErr: assert.NoError,
|
||||
},
|
||||
{
|
||||
name: "positive-python3.6",
|
||||
fixtureDir: "test-fixtures/classifiers/positive",
|
||||
location: "python3.6",
|
||||
expected: []Classification{
|
||||
{
|
||||
Class: "python-binary",
|
||||
Metadata: map[string]string{
|
||||
"version": "3.6.3a-vZ9",
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedErr: assert.NoError,
|
||||
},
|
||||
{
|
||||
name: "positive-patchlevel.h",
|
||||
fixtureDir: "test-fixtures/classifiers/positive",
|
||||
location: "patchlevel.h",
|
||||
expected: []Classification{
|
||||
{
|
||||
Class: "cpython-source",
|
||||
Metadata: map[string]string{
|
||||
"version": "3.9-aZ5",
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedErr: assert.NoError,
|
||||
},
|
||||
{
|
||||
name: "positive-go",
|
||||
fixtureDir: "test-fixtures/classifiers/positive",
|
||||
location: "go",
|
||||
expected: []Classification{
|
||||
{
|
||||
Class: "go-binary",
|
||||
Metadata: map[string]string{
|
||||
"version": "1.14",
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedErr: assert.NoError,
|
||||
},
|
||||
{
|
||||
name: "positive-go-hint",
|
||||
fixtureDir: "test-fixtures/classifiers/positive",
|
||||
location: "VERSION",
|
||||
expected: []Classification{
|
||||
{
|
||||
Class: "go-binary-hint",
|
||||
Metadata: map[string]string{
|
||||
"version": "1.15",
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedErr: assert.NoError,
|
||||
},
|
||||
{
|
||||
name: "positive-busybox",
|
||||
fixtureDir: "test-fixtures/classifiers/positive",
|
||||
location: "[", // note: busybox is a link to [
|
||||
expected: []Classification{
|
||||
{
|
||||
Class: "busybox-binary",
|
||||
VirtualPath: "busybox",
|
||||
Metadata: map[string]string{
|
||||
"version": "3.33.3",
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedErr: assert.NoError,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
|
||||
c, err := NewClassificationCataloger(DefaultClassifiers)
|
||||
test.expectedErr(t, err)
|
||||
|
||||
src, err := source.NewFromDirectory(test.fixtureDir)
|
||||
test.expectedErr(t, err)
|
||||
|
||||
resolver, err := src.FileResolver(source.SquashedScope)
|
||||
test.expectedErr(t, err)
|
||||
|
||||
actualResults, err := c.Catalog(resolver)
|
||||
test.expectedErr(t, err)
|
||||
|
||||
ok := false
|
||||
for actualLoc, actualClassification := range actualResults {
|
||||
if test.location == actualLoc.RealPath {
|
||||
ok = true
|
||||
assert.Equal(t, test.expected, actualClassification)
|
||||
}
|
||||
}
|
||||
|
||||
if !ok {
|
||||
t.Fatalf("could not find test location=%q", test.location)
|
||||
}
|
||||
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestClassifierCataloger_DefaultClassifiers_PositiveCases_Image(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
fixtureImage string
|
||||
location string
|
||||
expected []Classification
|
||||
expectedErr func(assert.TestingT, error, ...interface{}) bool
|
||||
}{
|
||||
{
|
||||
name: "busybox-regression",
|
||||
fixtureImage: "image-busybox",
|
||||
location: "/bin/[",
|
||||
expected: []Classification{
|
||||
{
|
||||
Class: "busybox-binary",
|
||||
VirtualPath: "/bin/busybox",
|
||||
Metadata: map[string]string{
|
||||
"version": "1.35.0",
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedErr: assert.NoError,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
|
||||
c, err := NewClassificationCataloger(DefaultClassifiers)
|
||||
test.expectedErr(t, err)
|
||||
|
||||
img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureImage)
|
||||
src, err := source.NewFromImage(img, "test-img")
|
||||
test.expectedErr(t, err)
|
||||
|
||||
resolver, err := src.FileResolver(source.SquashedScope)
|
||||
test.expectedErr(t, err)
|
||||
|
||||
actualResults, err := c.Catalog(resolver)
|
||||
test.expectedErr(t, err)
|
||||
|
||||
ok := false
|
||||
for actuaLoc, actualClassification := range actualResults {
|
||||
if actuaLoc.RealPath == test.location {
|
||||
ok = true
|
||||
assert.Equal(t, test.expected, actualClassification)
|
||||
}
|
||||
}
|
||||
|
||||
if !ok {
|
||||
t.Fatalf("could not find test location=%q", test.location)
|
||||
}
|
||||
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestClassifierCataloger_DefaultClassifiers_NegativeCases(t *testing.T) {
|
||||
|
||||
c, err := NewClassificationCataloger(DefaultClassifiers)
|
||||
assert.NoError(t, err)
|
||||
|
||||
src, err := source.NewFromDirectory("test-fixtures/classifiers/negative")
|
||||
assert.NoError(t, err)
|
||||
|
||||
resolver, err := src.FileResolver(source.SquashedScope)
|
||||
assert.NoError(t, err)
|
||||
|
||||
actualResults, err := c.Catalog(resolver)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 0, len(actualResults))
|
||||
|
||||
}
|
||||
@ -1,153 +0,0 @@
|
||||
package file
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"regexp"
|
||||
"text/template"
|
||||
|
||||
"github.com/anchore/syft/internal"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
var DefaultClassifiers = []Classifier{
|
||||
{
|
||||
Class: "python-binary",
|
||||
FilepathPatterns: []*regexp.Regexp{
|
||||
regexp.MustCompile(`(.*/|^)python(?P<version>[0-9]+\.[0-9]+)$`),
|
||||
regexp.MustCompile(`(.*/|^)libpython(?P<version>[0-9]+\.[0-9]+).so.*$`),
|
||||
},
|
||||
EvidencePatternTemplates: []string{
|
||||
`(?m)(?P<version>{{ .version }}\.[0-9]+[-_a-zA-Z0-9]*)`,
|
||||
},
|
||||
},
|
||||
{
|
||||
Class: "cpython-source",
|
||||
FilepathPatterns: []*regexp.Regexp{
|
||||
regexp.MustCompile(`(.*/|^)patchlevel.h$`),
|
||||
},
|
||||
EvidencePatternTemplates: []string{
|
||||
`(?m)#define\s+PY_VERSION\s+"?(?P<version>[0-9\.\-_a-zA-Z]+)"?`,
|
||||
},
|
||||
},
|
||||
{
|
||||
Class: "go-binary",
|
||||
FilepathPatterns: []*regexp.Regexp{
|
||||
regexp.MustCompile(`(.*/|^)go$`),
|
||||
},
|
||||
EvidencePatternTemplates: []string{
|
||||
`(?m)go(?P<version>[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`,
|
||||
},
|
||||
},
|
||||
{
|
||||
Class: "nodejs-binary",
|
||||
FilepathPatterns: []*regexp.Regexp{
|
||||
regexp.MustCompile(`(.*/|^)node$`),
|
||||
},
|
||||
EvidencePatternTemplates: []string{
|
||||
// regex that matches node.js/vx.y.z
|
||||
`(?m)node\.js\/v(?P<version>[0-9]+\.[0-9]+\.[0-9]+)`,
|
||||
},
|
||||
},
|
||||
{
|
||||
Class: "go-binary-hint",
|
||||
FilepathPatterns: []*regexp.Regexp{
|
||||
regexp.MustCompile(`(.*/|^)VERSION$`),
|
||||
},
|
||||
EvidencePatternTemplates: []string{
|
||||
`(?m)go(?P<version>[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`,
|
||||
},
|
||||
},
|
||||
{
|
||||
Class: "busybox-binary",
|
||||
FilepathPatterns: []*regexp.Regexp{
|
||||
regexp.MustCompile(`(.*/|^)busybox$`),
|
||||
},
|
||||
EvidencePatternTemplates: []string{
|
||||
`(?m)BusyBox\s+v(?P<version>[0-9]+\.[0-9]+\.[0-9]+)`,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
type Classifier struct {
|
||||
Class string
|
||||
FilepathPatterns []*regexp.Regexp
|
||||
EvidencePatternTemplates []string
|
||||
}
|
||||
|
||||
type Classification struct {
|
||||
Class string `json:"class"`
|
||||
VirtualPath string `json:"virtual_path"`
|
||||
Metadata map[string]string `json:"metadata"`
|
||||
}
|
||||
|
||||
func (c Classifier) Classify(resolver source.FileResolver, location source.Location) (*Classification, error) {
|
||||
doesFilepathMatch, filepathNamedGroupValues := FilepathMatches(c.FilepathPatterns, location)
|
||||
if !doesFilepathMatch {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
contentReader, err := resolver.FileContentsByLocation(location)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer internal.CloseAndLogError(contentReader, location.VirtualPath)
|
||||
|
||||
// TODO: there is room for improvement here, as this may use an excessive amount of memory. Alternate approach is to leverage a RuneReader.
|
||||
contents, err := io.ReadAll(contentReader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var result *Classification
|
||||
for _, patternTemplate := range c.EvidencePatternTemplates {
|
||||
tmpl, err := template.New("").Parse(patternTemplate)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to parse classifier template=%q : %w", patternTemplate, err)
|
||||
}
|
||||
|
||||
patternBuf := &bytes.Buffer{}
|
||||
err = tmpl.Execute(patternBuf, filepathNamedGroupValues)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to render template: %w", err)
|
||||
}
|
||||
|
||||
pattern, err := regexp.Compile(patternBuf.String())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err)
|
||||
}
|
||||
|
||||
if !pattern.Match(contents) {
|
||||
continue
|
||||
}
|
||||
|
||||
matchMetadata := internal.MatchNamedCaptureGroups(pattern, string(contents))
|
||||
if result == nil {
|
||||
result = &Classification{
|
||||
Class: c.Class,
|
||||
VirtualPath: location.VirtualPath,
|
||||
Metadata: matchMetadata,
|
||||
}
|
||||
} else {
|
||||
for key, value := range matchMetadata {
|
||||
result.Metadata[key] = value
|
||||
}
|
||||
}
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func FilepathMatches(patterns []*regexp.Regexp, location source.Location) (bool, map[string]string) {
|
||||
for _, path := range []string{location.RealPath, location.VirtualPath} {
|
||||
if path == "" {
|
||||
continue
|
||||
}
|
||||
for _, pattern := range patterns {
|
||||
if pattern.MatchString(path) {
|
||||
return true, internal.MatchNamedCaptureGroups(pattern, path)
|
||||
}
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
@ -1,97 +0,0 @@
|
||||
package file
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
func TestFilepathMatches(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
location source.Location
|
||||
patterns []string
|
||||
expectedMatches bool
|
||||
expectedNamedGroups map[string]string
|
||||
}{
|
||||
{
|
||||
name: "simple-filename-match",
|
||||
location: source.Location{
|
||||
Coordinates: source.Coordinates{
|
||||
RealPath: "python2.7",
|
||||
},
|
||||
},
|
||||
patterns: []string{
|
||||
`python([0-9]+\.[0-9]+)$`,
|
||||
},
|
||||
expectedMatches: true,
|
||||
},
|
||||
{
|
||||
name: "filepath-match",
|
||||
location: source.Location{
|
||||
Coordinates: source.Coordinates{
|
||||
RealPath: "/usr/bin/python2.7",
|
||||
},
|
||||
},
|
||||
patterns: []string{
|
||||
`python([0-9]+\.[0-9]+)$`,
|
||||
},
|
||||
expectedMatches: true,
|
||||
},
|
||||
{
|
||||
name: "virtual-filepath-match",
|
||||
location: source.Location{
|
||||
VirtualPath: "/usr/bin/python2.7",
|
||||
},
|
||||
patterns: []string{
|
||||
`python([0-9]+\.[0-9]+)$`,
|
||||
},
|
||||
expectedMatches: true,
|
||||
},
|
||||
{
|
||||
name: "full-filepath-match",
|
||||
location: source.Location{
|
||||
VirtualPath: "/usr/bin/python2.7",
|
||||
},
|
||||
patterns: []string{
|
||||
`.*/bin/python([0-9]+\.[0-9]+)$`,
|
||||
},
|
||||
expectedMatches: true,
|
||||
},
|
||||
{
|
||||
name: "anchored-filename-match-FAILS",
|
||||
location: source.Location{
|
||||
Coordinates: source.Coordinates{
|
||||
RealPath: "/usr/bin/python2.7",
|
||||
},
|
||||
},
|
||||
patterns: []string{
|
||||
`^python([0-9]+\.[0-9]+)$`,
|
||||
},
|
||||
expectedMatches: false,
|
||||
},
|
||||
{
|
||||
name: "empty-filename-match-FAILS",
|
||||
location: source.Location{},
|
||||
patterns: []string{
|
||||
`^python([0-9]+\.[0-9]+)$`,
|
||||
},
|
||||
expectedMatches: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
var patterns []*regexp.Regexp
|
||||
for _, p := range test.patterns {
|
||||
patterns = append(patterns, regexp.MustCompile(p))
|
||||
}
|
||||
actualMatches, actualNamedGroups := FilepathMatches(patterns, test.location)
|
||||
assert.Equal(t, test.expectedMatches, actualMatches)
|
||||
assert.Equal(t, test.expectedNamedGroups, actualNamedGroups)
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -6,12 +6,11 @@ import (
|
||||
)
|
||||
|
||||
type File struct {
|
||||
ID string `json:"id"`
|
||||
Location source.Coordinates `json:"location"`
|
||||
Metadata *FileMetadataEntry `json:"metadata,omitempty"`
|
||||
Contents string `json:"contents,omitempty"`
|
||||
Digests []file.Digest `json:"digests,omitempty"`
|
||||
Classifications []file.Classification `json:"classifications,omitempty"`
|
||||
ID string `json:"id"`
|
||||
Location source.Coordinates `json:"location"`
|
||||
Metadata *FileMetadataEntry `json:"metadata,omitempty"`
|
||||
Contents string `json:"contents,omitempty"`
|
||||
Digests []file.Digest `json:"digests,omitempty"`
|
||||
}
|
||||
|
||||
type FileMetadataEntry struct {
|
||||
|
||||
@ -102,23 +102,17 @@ func toFile(s sbom.SBOM) []model.File {
|
||||
digests = digestsForLocation
|
||||
}
|
||||
|
||||
var classifications []file.Classification
|
||||
if classificationsForLocation, exists := artifacts.FileClassifications[coordinates]; exists {
|
||||
classifications = classificationsForLocation
|
||||
}
|
||||
|
||||
var contents string
|
||||
if contentsForLocation, exists := artifacts.FileContents[coordinates]; exists {
|
||||
contents = contentsForLocation
|
||||
}
|
||||
|
||||
results = append(results, model.File{
|
||||
ID: string(coordinates.ID()),
|
||||
Location: coordinates,
|
||||
Metadata: toFileMetadataEntry(coordinates, metadata),
|
||||
Digests: digests,
|
||||
Classifications: classifications,
|
||||
Contents: contents,
|
||||
ID: string(coordinates.ID()),
|
||||
Location: coordinates,
|
||||
Metadata: toFileMetadataEntry(coordinates, metadata),
|
||||
Digests: digests,
|
||||
Contents: contents,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
78
syft/pkg/cataloger/binary/cataloger.go
Normal file
78
syft/pkg/cataloger/binary/cataloger.go
Normal file
@ -0,0 +1,78 @@
|
||||
package binary
|
||||
|
||||
import (
|
||||
"github.com/anchore/syft/syft/artifact"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
const catalogerName = "binary-cataloger"
|
||||
|
||||
func NewCataloger() *Cataloger {
|
||||
return &Cataloger{}
|
||||
}
|
||||
|
||||
// Cataloger is the cataloger responsible for surfacing evidence of a very limited set of binary files,
|
||||
// which have been identified by the classifiers. The Cataloger is _NOT_ a place to catalog any and every
|
||||
// binary, but rather the specific set that has been curated to be important, predominantly related to toolchain-
|
||||
// related runtimes like Python, Go, Java, or Node. Some exceptions can be made for widely-used binaries such
|
||||
// as busybox.
|
||||
type Cataloger struct{}
|
||||
|
||||
// Name returns a string that uniquely describes the Cataloger
|
||||
func (c Cataloger) Name() string {
|
||||
return catalogerName
|
||||
}
|
||||
|
||||
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages
|
||||
// after analyzing the catalog source.
|
||||
func (c Cataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
var packages []pkg.Package
|
||||
var relationships []artifact.Relationship
|
||||
|
||||
for _, classifier := range defaultClassifiers {
|
||||
locations, err := resolver.FilesByGlob(classifier.FileGlob)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
for _, location := range locations {
|
||||
reader, err := resolver.FileContentsByLocation(location)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
locationReader := source.NewLocationReadCloser(location, reader)
|
||||
newPkgs, err := classifier.EvidenceMatcher(classifier, locationReader)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
newPackages:
|
||||
for i := range newPkgs {
|
||||
newPkg := &newPkgs[i]
|
||||
for j := range packages {
|
||||
p := &packages[j]
|
||||
// consolidate identical packages found in different locations,
|
||||
// but continue to track each location
|
||||
if packagesMatch(p, newPkg) {
|
||||
p.Locations.Add(newPkg.Locations.ToSlice()...)
|
||||
continue newPackages
|
||||
}
|
||||
}
|
||||
packages = append(packages, *newPkg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return packages, relationships, nil
|
||||
}
|
||||
|
||||
// packagesMatch returns true if the binary packages "match" based on basic criteria
|
||||
func packagesMatch(p1 *pkg.Package, p2 *pkg.Package) bool {
|
||||
if p1.Name != p2.Name ||
|
||||
p1.Version != p2.Version ||
|
||||
p1.Language != p2.Language ||
|
||||
p1.Type != p2.Type {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
201
syft/pkg/cataloger/binary/cataloger_test.go
Normal file
201
syft/pkg/cataloger/binary/cataloger_test.go
Normal file
@ -0,0 +1,201 @@
|
||||
package binary
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/anchore/stereoscope/pkg/imagetest"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
fixtureDir string
|
||||
expected pkg.Package
|
||||
}{
|
||||
{
|
||||
name: "positive-libpython3.7.so",
|
||||
fixtureDir: "test-fixtures/classifiers/positive",
|
||||
expected: pkg.Package{
|
||||
Name: "python",
|
||||
Version: "3.7.4a-vZ9",
|
||||
Locations: singleLocation("libpython3.7.so"),
|
||||
Metadata: pkg.BinaryMetadata{
|
||||
Classifier: "python-binary-lib",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "positive-python3.6",
|
||||
fixtureDir: "test-fixtures/classifiers/positive",
|
||||
expected: pkg.Package{
|
||||
Name: "python",
|
||||
Version: "3.6.3a-vZ9",
|
||||
Locations: singleLocation("python3.6"),
|
||||
Metadata: pkg.BinaryMetadata{
|
||||
Classifier: "python-binary",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "positive-patchlevel.h",
|
||||
fixtureDir: "test-fixtures/classifiers/positive",
|
||||
expected: pkg.Package{
|
||||
Name: "python",
|
||||
Version: "3.9-aZ5",
|
||||
Locations: singleLocation("patchlevel.h"),
|
||||
Metadata: pkg.BinaryMetadata{
|
||||
Classifier: "cpython-source",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "positive-go",
|
||||
fixtureDir: "test-fixtures/classifiers/positive",
|
||||
expected: pkg.Package{
|
||||
Name: "go",
|
||||
Version: "1.14",
|
||||
Locations: singleLocation("go"),
|
||||
Metadata: pkg.BinaryMetadata{
|
||||
Classifier: "go-binary",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "positive-go-hint",
|
||||
fixtureDir: "test-fixtures/classifiers/positive",
|
||||
expected: pkg.Package{
|
||||
Name: "go",
|
||||
Version: "1.15",
|
||||
Locations: singleLocation("VERSION"),
|
||||
Metadata: pkg.BinaryMetadata{
|
||||
Classifier: "go-binary-hint",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "positive-busybox",
|
||||
fixtureDir: "test-fixtures/classifiers/positive",
|
||||
expected: pkg.Package{
|
||||
Name: "busybox",
|
||||
Version: "3.33.3",
|
||||
Locations: singleLocation("["), // note: busybox is a link to [
|
||||
Metadata: pkg.BinaryMetadata{
|
||||
Classifier: "busybox-binary",
|
||||
VirtualPath: "busybox",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
c := NewCataloger()
|
||||
|
||||
src, err := source.NewFromDirectory(test.fixtureDir)
|
||||
require.NoError(t, err)
|
||||
|
||||
resolver, err := src.FileResolver(source.SquashedScope)
|
||||
require.NoError(t, err)
|
||||
|
||||
packages, _, err := c.Catalog(resolver)
|
||||
require.NoError(t, err)
|
||||
|
||||
ok := false
|
||||
for _, p := range packages {
|
||||
if test.expected.Locations.ToSlice()[0].RealPath == p.Locations.ToSlice()[0].RealPath {
|
||||
ok = true
|
||||
assertPackagesAreEqual(t, test.expected, p)
|
||||
}
|
||||
}
|
||||
|
||||
if !ok {
|
||||
t.Fatalf("could not find test location=%q", test.expected.Locations.ToSlice()[0].RealPath)
|
||||
}
|
||||
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestClassifierCataloger_DefaultClassifiers_PositiveCases_Image(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
fixtureImage string
|
||||
expected pkg.Package
|
||||
}{
|
||||
{
|
||||
name: "busybox-regression",
|
||||
fixtureImage: "image-busybox",
|
||||
expected: pkg.Package{
|
||||
Name: "busybox",
|
||||
Version: "1.35.0",
|
||||
Locations: singleLocation("/bin/["),
|
||||
Metadata: pkg.BinaryMetadata{
|
||||
Classifier: "busybox-binary",
|
||||
VirtualPath: "/bin/busybox",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
c := NewCataloger()
|
||||
|
||||
img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureImage)
|
||||
src, err := source.NewFromImage(img, "test-img")
|
||||
require.NoError(t, err)
|
||||
|
||||
resolver, err := src.FileResolver(source.SquashedScope)
|
||||
require.NoError(t, err)
|
||||
|
||||
packages, _, err := c.Catalog(resolver)
|
||||
require.NoError(t, err)
|
||||
|
||||
ok := false
|
||||
for _, p := range packages {
|
||||
if test.expected.Locations.ToSlice()[0].RealPath == p.Locations.ToSlice()[0].RealPath {
|
||||
ok = true
|
||||
assertPackagesAreEqual(t, test.expected, p)
|
||||
}
|
||||
}
|
||||
|
||||
if !ok {
|
||||
t.Fatalf("could not find test location=%q", test.expected.Locations.ToSlice()[0].RealPath)
|
||||
}
|
||||
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestClassifierCataloger_DefaultClassifiers_NegativeCases(t *testing.T) {
|
||||
c := NewCataloger()
|
||||
|
||||
src, err := source.NewFromDirectory("test-fixtures/classifiers/negative")
|
||||
assert.NoError(t, err)
|
||||
|
||||
resolver, err := src.FileResolver(source.SquashedScope)
|
||||
assert.NoError(t, err)
|
||||
|
||||
actualResults, _, err := c.Catalog(resolver)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 0, len(actualResults))
|
||||
}
|
||||
|
||||
func singleLocation(s string) source.LocationSet {
|
||||
return source.NewLocationSet(source.NewLocation(s))
|
||||
}
|
||||
|
||||
func assertPackagesAreEqual(t *testing.T, expected pkg.Package, p pkg.Package) {
|
||||
meta1 := expected.Metadata.(pkg.BinaryMetadata)
|
||||
meta2 := p.Metadata.(pkg.BinaryMetadata)
|
||||
if expected.Name != p.Name ||
|
||||
expected.Version != p.Version ||
|
||||
meta1.Classifier != meta2.Classifier {
|
||||
assert.Failf(t, "packages not equal", "%v != %v", expected, p)
|
||||
}
|
||||
}
|
||||
180
syft/pkg/cataloger/binary/classifier.go
Normal file
180
syft/pkg/cataloger/binary/classifier.go
Normal file
@ -0,0 +1,180 @@
|
||||
package binary
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"text/template"
|
||||
|
||||
"github.com/anchore/packageurl-go"
|
||||
"github.com/anchore/syft/internal"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/internal/unionreader"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
var emptyPURL = packageurl.PackageURL{}
|
||||
|
||||
// classifier is a generic package classifier that can be used to match a package definition
|
||||
// to a file that meets the given content criteria of the evidenceMatcher.
|
||||
type classifier struct {
|
||||
Class string
|
||||
|
||||
// FileGlob is a selector to narrow down file inspection using the **/glob* syntax
|
||||
FileGlob string
|
||||
|
||||
// EvidenceMatcher is what will be used to match against the file in the source
|
||||
// location. If the matcher returns a package, the file will be considered a candidate.
|
||||
EvidenceMatcher evidenceMatcher
|
||||
|
||||
// Information below is used to specify the Package information when returned
|
||||
|
||||
// Package is the name to use for the package
|
||||
Package string
|
||||
|
||||
// Language is the language to classify this package as
|
||||
Language pkg.Language
|
||||
|
||||
// Type is the package type to use for the package
|
||||
Type pkg.Type
|
||||
|
||||
// PURL is the Package URL to use when generating a package
|
||||
PURL packageurl.PackageURL
|
||||
|
||||
// CPEs are the specific CPEs we want to include for this binary with updated version information
|
||||
CPEs []pkg.CPE
|
||||
}
|
||||
|
||||
// evidenceMatcher is a function called to catalog Packages that match some sort of evidence
|
||||
type evidenceMatcher func(classifier classifier, reader source.LocationReadCloser) ([]pkg.Package, error)
|
||||
|
||||
func fileNameTemplateVersionMatcher(fileNamePattern string, contentTemplate string) evidenceMatcher {
|
||||
pat := regexp.MustCompile(fileNamePattern)
|
||||
return func(classifier classifier, reader source.LocationReadCloser) ([]pkg.Package, error) {
|
||||
if !pat.MatchString(reader.RealPath) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
filepathNamedGroupValues := internal.MatchNamedCaptureGroups(pat, reader.RealPath)
|
||||
|
||||
tmpl, err := template.New("").Parse(contentTemplate)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to parse classifier template=%q : %w", contentTemplate, err)
|
||||
}
|
||||
|
||||
patternBuf := &bytes.Buffer{}
|
||||
err = tmpl.Execute(patternBuf, filepathNamedGroupValues)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to render template: %w", err)
|
||||
}
|
||||
|
||||
tmplPattern, err := regexp.Compile(patternBuf.String())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err)
|
||||
}
|
||||
|
||||
contents, err := getContents(reader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to get read contents for file: %w", err)
|
||||
}
|
||||
|
||||
matchMetadata := internal.MatchNamedCaptureGroups(tmplPattern, string(contents))
|
||||
|
||||
version, ok := matchMetadata["version"]
|
||||
if ok {
|
||||
return singlePackage(classifier, reader, version), nil
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
}
|
||||
|
||||
func fileContentsVersionMatcher(pattern string) evidenceMatcher {
|
||||
pat := regexp.MustCompile(pattern)
|
||||
return func(classifier classifier, reader source.LocationReadCloser) ([]pkg.Package, error) {
|
||||
contents, err := getContents(reader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to get read contents for file: %w", err)
|
||||
}
|
||||
|
||||
matchMetadata := internal.MatchNamedCaptureGroups(pat, string(contents))
|
||||
version, ok := matchMetadata["version"]
|
||||
if ok {
|
||||
return singlePackage(classifier, reader, version), nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
}
|
||||
|
||||
func mustPURL(purl string) packageurl.PackageURL {
|
||||
p, err := packageurl.FromString(purl)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("invalid PURL: %s", p))
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
func singlePackage(classifier classifier, reader source.LocationReadCloser, version string) []pkg.Package {
|
||||
var cpes []pkg.CPE
|
||||
for _, cpe := range classifier.CPEs {
|
||||
cpe.Version = version
|
||||
cpes = append(cpes, cpe)
|
||||
}
|
||||
|
||||
p := pkg.Package{
|
||||
Name: classifier.Package,
|
||||
Version: version,
|
||||
Language: pkg.Binary,
|
||||
Locations: source.NewLocationSet(reader.Location),
|
||||
Type: pkg.BinaryPkg,
|
||||
CPEs: cpes,
|
||||
MetadataType: pkg.BinaryMetadataType,
|
||||
Metadata: pkg.BinaryMetadata{
|
||||
Classifier: classifier.Class,
|
||||
RealPath: reader.RealPath,
|
||||
VirtualPath: reader.VirtualPath,
|
||||
},
|
||||
}
|
||||
|
||||
if classifier.Type != "" {
|
||||
p.Type = classifier.Type
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(classifier.PURL, emptyPURL) {
|
||||
purl := classifier.PURL
|
||||
purl.Version = version
|
||||
p.PURL = purl.ToString()
|
||||
}
|
||||
|
||||
if classifier.Language != "" {
|
||||
p.Language = classifier.Language
|
||||
}
|
||||
|
||||
p.SetID()
|
||||
|
||||
return []pkg.Package{p}
|
||||
}
|
||||
|
||||
func getContents(reader source.LocationReadCloser) ([]byte, error) {
|
||||
unionReader, err := unionreader.GetUnionReader(reader.ReadCloser)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to get union reader for file: %w", err)
|
||||
}
|
||||
|
||||
// TODO: there may be room for improvement here, as this may use an excessive amount of memory. Alternate approach is to leverage a RuneReader.
|
||||
contents, err := io.ReadAll(unionReader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to get contents for file: %w", err)
|
||||
}
|
||||
|
||||
return contents, nil
|
||||
}
|
||||
|
||||
// singleCPE returns a []pkg.CPE based on the cpe string or panics if the CPE is invalid
|
||||
func singleCPE(cpe string) []pkg.CPE {
|
||||
return []pkg.CPE{
|
||||
pkg.MustCPE(cpe),
|
||||
}
|
||||
}
|
||||
@ -1,7 +1,6 @@
|
||||
package generic
|
||||
package binary
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
@ -14,35 +13,27 @@ func Test_ClassifierCPEs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
fixture string
|
||||
classifier Classifier
|
||||
classifier classifier
|
||||
cpes []string
|
||||
}{
|
||||
{
|
||||
name: "no CPEs",
|
||||
fixture: "test-fixtures/version.txt",
|
||||
classifier: Classifier{
|
||||
Package: "some-app",
|
||||
FilepathPatterns: []*regexp.Regexp{
|
||||
regexp.MustCompile(".*/version.txt"),
|
||||
},
|
||||
EvidencePatterns: []*regexp.Regexp{
|
||||
regexp.MustCompile(`(?m)my-verison:(?P<version>[0-9.]+)`),
|
||||
},
|
||||
CPEs: []pkg.CPE{},
|
||||
classifier: classifier{
|
||||
Package: "some-app",
|
||||
FileGlob: ".*/version.txt",
|
||||
EvidenceMatcher: fileContentsVersionMatcher(`(?m)my-verison:(?P<version>[0-9.]+)`),
|
||||
CPEs: []pkg.CPE{},
|
||||
},
|
||||
cpes: nil,
|
||||
},
|
||||
{
|
||||
name: "one CPE",
|
||||
fixture: "test-fixtures/version.txt",
|
||||
classifier: Classifier{
|
||||
Package: "some-app",
|
||||
FilepathPatterns: []*regexp.Regexp{
|
||||
regexp.MustCompile(".*/version.txt"),
|
||||
},
|
||||
EvidencePatterns: []*regexp.Regexp{
|
||||
regexp.MustCompile(`(?m)my-verison:(?P<version>[0-9.]+)`),
|
||||
},
|
||||
classifier: classifier{
|
||||
Package: "some-app",
|
||||
FileGlob: ".*/version.txt",
|
||||
EvidenceMatcher: fileContentsVersionMatcher(`(?m)my-verison:(?P<version>[0-9.]+)`),
|
||||
CPEs: []pkg.CPE{
|
||||
pkg.MustCPE("cpe:2.3:a:some:app:*:*:*:*:*:*:*:*"),
|
||||
},
|
||||
@ -54,14 +45,10 @@ func Test_ClassifierCPEs(t *testing.T) {
|
||||
{
|
||||
name: "multiple CPEs",
|
||||
fixture: "test-fixtures/version.txt",
|
||||
classifier: Classifier{
|
||||
Package: "some-app",
|
||||
FilepathPatterns: []*regexp.Regexp{
|
||||
regexp.MustCompile(".*/version.txt"),
|
||||
},
|
||||
EvidencePatterns: []*regexp.Regexp{
|
||||
regexp.MustCompile(`(?m)my-verison:(?P<version>[0-9.]+)`),
|
||||
},
|
||||
classifier: classifier{
|
||||
Package: "some-app",
|
||||
FileGlob: ".*/version.txt",
|
||||
EvidenceMatcher: fileContentsVersionMatcher(`(?m)my-verison:(?P<version>[0-9.]+)`),
|
||||
CPEs: []pkg.CPE{
|
||||
pkg.MustCPE("cpe:2.3:a:some:app:*:*:*:*:*:*:*:*"),
|
||||
pkg.MustCPE("cpe:2.3:a:some:apps:*:*:*:*:*:*:*:*"),
|
||||
@ -83,9 +70,13 @@ func Test_ClassifierCPEs(t *testing.T) {
|
||||
location := locations[0]
|
||||
readCloser, err := resolver.FileContentsByLocation(location)
|
||||
require.NoError(t, err)
|
||||
p, _, err := test.classifier.Examine(source.NewLocationReadCloser(location, readCloser))
|
||||
pkgs, err := test.classifier.EvidenceMatcher(test.classifier, source.NewLocationReadCloser(location, readCloser))
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Len(t, pkgs, 1)
|
||||
|
||||
p := pkgs[0]
|
||||
|
||||
var cpes []string
|
||||
for _, c := range p.CPEs {
|
||||
cpes = append(cpes, pkg.CPEString(c))
|
||||
60
syft/pkg/cataloger/binary/default_classifiers.go
Normal file
60
syft/pkg/cataloger/binary/default_classifiers.go
Normal file
@ -0,0 +1,60 @@
|
||||
package binary
|
||||
|
||||
import "github.com/anchore/syft/syft/pkg"
|
||||
|
||||
var defaultClassifiers = []classifier{
|
||||
{
|
||||
Class: "python-binary",
|
||||
FileGlob: "**/python*",
|
||||
EvidenceMatcher: fileNameTemplateVersionMatcher(
|
||||
`(.*/|^)python(?P<version>[0-9]+\.[0-9]+)$`,
|
||||
`(?m)(?P<version>{{ .version }}\.[0-9]+[-_a-zA-Z0-9]*)`),
|
||||
Package: "python",
|
||||
},
|
||||
{
|
||||
Class: "python-binary-lib",
|
||||
FileGlob: "**/libpython*.so*",
|
||||
EvidenceMatcher: fileNameTemplateVersionMatcher(
|
||||
`(.*/|^)libpython(?P<version>[0-9]+\.[0-9]+).so.*$`,
|
||||
`(?m)(?P<version>{{ .version }}\.[0-9]+[-_a-zA-Z0-9]*)`),
|
||||
Package: "python",
|
||||
},
|
||||
{
|
||||
Class: "cpython-source",
|
||||
FileGlob: "**/patchlevel.h",
|
||||
EvidenceMatcher: fileContentsVersionMatcher(
|
||||
`(?m)#define\s+PY_VERSION\s+"?(?P<version>[0-9\.\-_a-zA-Z]+)"?`),
|
||||
Package: "python",
|
||||
},
|
||||
{
|
||||
Class: "go-binary",
|
||||
FileGlob: "**/go",
|
||||
EvidenceMatcher: fileContentsVersionMatcher(
|
||||
`(?m)go(?P<version>[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`),
|
||||
Package: "go",
|
||||
},
|
||||
{
|
||||
Class: "nodejs-binary",
|
||||
FileGlob: "**/node",
|
||||
EvidenceMatcher: fileContentsVersionMatcher(
|
||||
`(?m)node\.js\/v(?P<version>[0-9]+\.[0-9]+\.[0-9]+)`),
|
||||
Package: "node.js",
|
||||
Language: pkg.JavaScript,
|
||||
PURL: mustPURL("pkg:generic/node@version"),
|
||||
CPEs: singleCPE("cpe:2.3:a:nodejs:node.js:*:*:*:*:*:*:*:*"),
|
||||
},
|
||||
{
|
||||
Class: "go-binary-hint",
|
||||
FileGlob: "**/VERSION",
|
||||
EvidenceMatcher: fileContentsVersionMatcher(
|
||||
`(?m)go(?P<version>[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`),
|
||||
Package: "go",
|
||||
},
|
||||
{
|
||||
Class: "busybox-binary",
|
||||
FileGlob: "**/busybox",
|
||||
EvidenceMatcher: fileContentsVersionMatcher(
|
||||
`(?m)BusyBox\s+v(?P<version>[0-9]+\.[0-9]+\.[0-9]+)`),
|
||||
Package: "busybox",
|
||||
},
|
||||
}
|
||||
@ -12,6 +12,7 @@ import (
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/alpm"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/apkdb"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/binary"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/cpp"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/dart"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/deb"
|
||||
@ -40,7 +41,6 @@ func ImageCatalogers(cfg Config) []pkg.Cataloger {
|
||||
python.NewPythonPackageCataloger(),
|
||||
php.NewPHPComposerInstalledCataloger(),
|
||||
javascript.NewJavascriptPackageCataloger(),
|
||||
javascript.NewNodeBinaryCataloger(),
|
||||
deb.NewDpkgdbCataloger(),
|
||||
rpm.NewRpmDBCataloger(),
|
||||
java.NewJavaCataloger(cfg.Java()),
|
||||
@ -49,6 +49,7 @@ func ImageCatalogers(cfg Config) []pkg.Cataloger {
|
||||
dotnet.NewDotnetDepsCataloger(),
|
||||
portage.NewPortageCataloger(),
|
||||
sbom.NewSBOMCataloger(),
|
||||
binary.NewCataloger(),
|
||||
}, cfg.Catalogers)
|
||||
}
|
||||
|
||||
@ -61,7 +62,6 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger {
|
||||
python.NewPythonPackageCataloger(),
|
||||
php.NewPHPComposerLockCataloger(),
|
||||
javascript.NewJavascriptLockCataloger(),
|
||||
javascript.NewNodeBinaryCataloger(),
|
||||
deb.NewDpkgdbCataloger(),
|
||||
rpm.NewRpmDBCataloger(),
|
||||
rpm.NewFileCataloger(),
|
||||
@ -78,6 +78,7 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger {
|
||||
portage.NewPortageCataloger(),
|
||||
haskell.NewHackageCataloger(),
|
||||
sbom.NewSBOMCataloger(),
|
||||
binary.NewCataloger(),
|
||||
}, cfg.Catalogers)
|
||||
}
|
||||
|
||||
@ -91,7 +92,6 @@ func AllCatalogers(cfg Config) []pkg.Cataloger {
|
||||
python.NewPythonPackageCataloger(),
|
||||
javascript.NewJavascriptLockCataloger(),
|
||||
javascript.NewJavascriptPackageCataloger(),
|
||||
javascript.NewNodeBinaryCataloger(),
|
||||
deb.NewDpkgdbCataloger(),
|
||||
rpm.NewRpmDBCataloger(),
|
||||
rpm.NewFileCataloger(),
|
||||
@ -111,6 +111,7 @@ func AllCatalogers(cfg Config) []pkg.Cataloger {
|
||||
portage.NewPortageCataloger(),
|
||||
haskell.NewHackageCataloger(),
|
||||
sbom.NewSBOMCataloger(),
|
||||
binary.NewCataloger(),
|
||||
}, cfg.Catalogers)
|
||||
}
|
||||
|
||||
|
||||
@ -1,100 +0,0 @@
|
||||
package generic
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"path"
|
||||
"regexp"
|
||||
|
||||
"github.com/anchore/syft/internal"
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/artifact"
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/internal/unionreader"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
// Classifier is a generic package classifier that can be used to match a package definition
|
||||
// to a file that meets the given content criteria of the EvidencePatternTemplates.
|
||||
type Classifier struct {
|
||||
Package string
|
||||
// FilepathPatterns is a list of regular expressions that will be used to match against the file path of a given
|
||||
// source location. If any of the patterns match, the file will be considered a candidate for parsing.
|
||||
// If no patterns are provided, the reader is automatically considered a candidate.
|
||||
FilepathPatterns []*regexp.Regexp
|
||||
// EvidencePatterns is a list of regular expressions that will be used to match against the file contents of a
|
||||
// given file in the source location. If any of the patterns match, the file will be considered a candidate for parsing.
|
||||
EvidencePatterns []*regexp.Regexp
|
||||
// CPEs are the specific CPEs we want to include for this binary with updated version information
|
||||
CPEs []pkg.CPE
|
||||
}
|
||||
|
||||
func (c Classifier) Examine(reader source.LocationReadCloser) (p *pkg.Package, r *artifact.Relationship, err error) {
|
||||
doesFilepathMatch := true
|
||||
if len(c.FilepathPatterns) > 0 {
|
||||
doesFilepathMatch, _ = file.FilepathMatches(c.FilepathPatterns, reader.Location)
|
||||
}
|
||||
|
||||
if !doesFilepathMatch {
|
||||
return nil, nil, fmt.Errorf("location: %s did not match any patterns for package=%q", reader.Location, c.Package)
|
||||
}
|
||||
|
||||
contents, err := getContents(reader)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("unable to get read contents for file: %w", err)
|
||||
}
|
||||
|
||||
var classifiedPackage *pkg.Package
|
||||
for _, evidencePattern := range c.EvidencePatterns {
|
||||
if !evidencePattern.Match(contents) {
|
||||
continue
|
||||
}
|
||||
|
||||
matchMetadata := internal.MatchNamedCaptureGroups(evidencePattern, string(contents))
|
||||
version, ok := matchMetadata["version"]
|
||||
if !ok {
|
||||
log.Debugf("no version found in binary from pattern %v", evidencePattern)
|
||||
continue
|
||||
}
|
||||
|
||||
var cpes []pkg.CPE
|
||||
for _, cpe := range c.CPEs {
|
||||
cpe.Version = version
|
||||
if err == nil {
|
||||
cpes = append(cpes, cpe)
|
||||
}
|
||||
}
|
||||
|
||||
classifiedPackage = &pkg.Package{
|
||||
Name: path.Base(reader.VirtualPath),
|
||||
Version: version,
|
||||
Language: pkg.Binary,
|
||||
Locations: source.NewLocationSet(reader.Location),
|
||||
Type: pkg.BinaryPkg,
|
||||
CPEs: cpes,
|
||||
MetadataType: pkg.BinaryMetadataType,
|
||||
Metadata: pkg.BinaryMetadata{
|
||||
Classifier: c.Package,
|
||||
RealPath: reader.RealPath,
|
||||
VirtualPath: reader.VirtualPath,
|
||||
},
|
||||
}
|
||||
break
|
||||
}
|
||||
return classifiedPackage, nil, nil
|
||||
}
|
||||
|
||||
func getContents(reader source.LocationReadCloser) ([]byte, error) {
|
||||
unionReader, err := unionreader.GetUnionReader(reader.ReadCloser)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to get union reader for file: %w", err)
|
||||
}
|
||||
|
||||
contents, err := io.ReadAll(unionReader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to get contents for file: %w", err)
|
||||
}
|
||||
|
||||
return contents, nil
|
||||
}
|
||||
@ -4,7 +4,6 @@ Package javascript provides a concrete Cataloger implementation for JavaScript e
|
||||
package javascript
|
||||
|
||||
import (
|
||||
"github.com/anchore/syft/internal"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
||||
)
|
||||
|
||||
@ -20,8 +19,3 @@ func NewJavascriptLockCataloger() *generic.Cataloger {
|
||||
WithParserByGlobs(parseYarnLock, "**/yarn.lock").
|
||||
WithParserByGlobs(parsePnpmLock, "**/pnpm-lock.yaml")
|
||||
}
|
||||
|
||||
func NewNodeBinaryCataloger() *generic.Cataloger {
|
||||
return generic.NewCataloger("node-binary-cataloger").
|
||||
WithParserByMimeTypes(parseNodeBinary, internal.ExecutableMIMETypeSet.List()...)
|
||||
}
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
package javascript
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/artifact"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
var nodeClassifier = generic.Classifier{
|
||||
Package: "node.js", // Note: this purposely matches the "node.js" string to aid nvd vuln matching
|
||||
FilepathPatterns: []*regexp.Regexp{
|
||||
// note: should we just parse all files resolved with executable mimetypes
|
||||
// regexp that matches node binary
|
||||
regexp.MustCompile(`(.*/|^)node$`),
|
||||
},
|
||||
EvidencePatterns: []*regexp.Regexp{
|
||||
// regex that matches node.js/vx.y.z
|
||||
regexp.MustCompile(`(?m)node\.js\/v(?P<version>[0-9]+\.[0-9]+\.[0-9]+)`),
|
||||
},
|
||||
CPEs: []pkg.CPE{
|
||||
pkg.MustCPE("cpe:2.3:a:nodejs:node.js:*:*:*:*:*:*:*:*"),
|
||||
},
|
||||
}
|
||||
|
||||
func parseNodeBinary(_ source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
p, _, err := nodeClassifier.Examine(reader)
|
||||
if err != nil {
|
||||
log.Trace("failed to find node.js package: %+v", err)
|
||||
return nil, nil, nil // we can silently fail here to reduce warning noise
|
||||
}
|
||||
|
||||
// TODO add node specific metadata to the packages to help with vulnerability matching
|
||||
if p != nil {
|
||||
p.Language = pkg.JavaScript
|
||||
p.SetID()
|
||||
return []pkg.Package{*p}, nil, nil
|
||||
}
|
||||
return nil, nil, nil
|
||||
}
|
||||
7
syft/pkg/file_metadata.go
Normal file
7
syft/pkg/file_metadata.go
Normal file
@ -0,0 +1,7 @@
|
||||
package pkg
|
||||
|
||||
type FileMetadata struct {
|
||||
Classifier string `mapstructure:"Classifier" json:"classifier"`
|
||||
RealPath string `mapstructure:"RealPath" json:"realPath"`
|
||||
VirtualPath string `mapstructure:"VirtualPath" json:"virtualPath"`
|
||||
}
|
||||
@ -25,6 +25,7 @@ const (
|
||||
CPP Language = "c++"
|
||||
Haskell Language = "haskell"
|
||||
Binary Language = "binary"
|
||||
File Language = "file"
|
||||
)
|
||||
|
||||
// AllLanguages is a set of all programming languages detected by syft.
|
||||
|
||||
@ -18,13 +18,12 @@ type SBOM struct {
|
||||
}
|
||||
|
||||
type Artifacts struct {
|
||||
PackageCatalog *pkg.Catalog
|
||||
FileMetadata map[source.Coordinates]source.FileMetadata
|
||||
FileDigests map[source.Coordinates][]file.Digest
|
||||
FileClassifications map[source.Coordinates][]file.Classification
|
||||
FileContents map[source.Coordinates]string
|
||||
Secrets map[source.Coordinates][]file.SearchResult
|
||||
LinuxDistribution *linux.Release
|
||||
PackageCatalog *pkg.Catalog
|
||||
FileMetadata map[source.Coordinates]source.FileMetadata
|
||||
FileDigests map[source.Coordinates][]file.Digest
|
||||
FileContents map[source.Coordinates]string
|
||||
Secrets map[source.Coordinates][]file.SearchResult
|
||||
LinuxDistribution *linux.Release
|
||||
}
|
||||
|
||||
type Descriptor struct {
|
||||
@ -55,9 +54,6 @@ func (s SBOM) AllCoordinates() []source.Coordinates {
|
||||
for coordinates := range s.Artifacts.FileContents {
|
||||
set.Add(coordinates)
|
||||
}
|
||||
for coordinates := range s.Artifacts.FileClassifications {
|
||||
set.Add(coordinates)
|
||||
}
|
||||
for coordinates := range s.Artifacts.FileDigests {
|
||||
set.Add(coordinates)
|
||||
}
|
||||
|
||||
@ -105,7 +105,7 @@ func TestPackagesCmdFlags(t *testing.T) {
|
||||
name: "squashed-scope-flag-hidden-packages",
|
||||
args: []string{"packages", "-o", "json", "-s", "squashed", hiddenPackagesImage},
|
||||
assertions: []traitAssertion{
|
||||
assertPackageCount(162),
|
||||
assertPackageCount(163),
|
||||
assertNotInOutput("vsftpd"), // hidden package
|
||||
assertSuccessfulReturnCode,
|
||||
},
|
||||
@ -114,7 +114,7 @@ func TestPackagesCmdFlags(t *testing.T) {
|
||||
name: "all-layers-scope-flag",
|
||||
args: []string{"packages", "-o", "json", "-s", "all-layers", hiddenPackagesImage},
|
||||
assertions: []traitAssertion{
|
||||
assertPackageCount(163), // packages are now deduplicated for this case
|
||||
assertPackageCount(164), // packages are now deduplicated for this case
|
||||
assertInOutput("all-layers"),
|
||||
assertInOutput("vsftpd"), // hidden package
|
||||
assertSuccessfulReturnCode,
|
||||
@ -127,7 +127,7 @@ func TestPackagesCmdFlags(t *testing.T) {
|
||||
"SYFT_PACKAGE_CATALOGER_SCOPE": "all-layers",
|
||||
},
|
||||
assertions: []traitAssertion{
|
||||
assertPackageCount(163), // packages are now deduplicated for this case
|
||||
assertPackageCount(164), // packages are now deduplicated for this case
|
||||
assertInOutput("all-layers"),
|
||||
assertInOutput("vsftpd"), // hidden package
|
||||
assertSuccessfulReturnCode,
|
||||
|
||||
@ -9,6 +9,7 @@ import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
@ -21,7 +22,7 @@ func TestPackageDeduplication(t *testing.T) {
|
||||
}{
|
||||
{
|
||||
scope: source.AllLayersScope,
|
||||
packageCount: 172, // without deduplication this would be 618
|
||||
packageCount: 173, // without deduplication this would be 618
|
||||
instanceCount: map[string]int{
|
||||
"basesystem": 1,
|
||||
"wget": 1,
|
||||
@ -40,7 +41,7 @@ func TestPackageDeduplication(t *testing.T) {
|
||||
},
|
||||
{
|
||||
scope: source.SquashedScope,
|
||||
packageCount: 170,
|
||||
packageCount: 171,
|
||||
instanceCount: map[string]int{
|
||||
"basesystem": 1,
|
||||
"wget": 1,
|
||||
@ -62,6 +63,12 @@ func TestPackageDeduplication(t *testing.T) {
|
||||
t.Run(string(tt.scope), func(t *testing.T) {
|
||||
sbom, _ := catalogFixtureImage(t, "image-vertical-package-dups", tt.scope, nil)
|
||||
|
||||
for _, p := range sbom.Artifacts.PackageCatalog.Sorted() {
|
||||
if p.Type == pkg.BinaryPkg {
|
||||
assert.NotEmpty(t, p.Name)
|
||||
}
|
||||
}
|
||||
|
||||
assert.Equal(t, tt.packageCount, sbom.Artifacts.PackageCatalog.PackageCount())
|
||||
for name, expectedInstanceCount := range tt.instanceCount {
|
||||
pkgs := sbom.Artifacts.PackageCatalog.PackagesByName(name)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user