From 4f392872162555c86a65efdc0fef345ab5d1fa16 Mon Sep 17 00:00:00 2001 From: Keith Zantow Date: Tue, 29 Nov 2022 18:28:10 -0500 Subject: [PATCH] feat: Generic Binary Cataloger (#1336) --- cmd/syft/cli/eventloop/tasks.go | 29 --- syft/file/classification_cataloger.go | 38 ---- syft/file/classification_cataloger_test.go | 210 ------------------ syft/file/classifier.go | 153 ------------- syft/file/classifier_test.go | 97 -------- syft/formats/syftjson/model/file.go | 11 +- syft/formats/syftjson/to_format_model.go | 16 +- syft/pkg/cataloger/binary/cataloger.go | 78 +++++++ syft/pkg/cataloger/binary/cataloger_test.go | 201 +++++++++++++++++ syft/pkg/cataloger/binary/classifier.go | 180 +++++++++++++++ .../{generic => binary}/classifier_test.go | 49 ++-- .../cataloger/binary/default_classifiers.go | 60 +++++ .../classifiers/negative/.gitignore | 0 .../classifiers/negative/busybox | 0 .../test-fixtures/classifiers/negative/go | 0 .../classifiers/negative/libpython2.7.so | 0 .../classifiers/negative/python2.6 | 0 .../classifiers/positive/.gitignore | 0 .../classifiers/positive/VERSION | 0 .../test-fixtures/classifiers/positive/[ | 0 .../classifiers/positive/busybox | 0 .../test-fixtures/classifiers/positive/go | 0 .../classifiers/positive/libpython3.7.so | 0 .../classifiers/positive/patchlevel.h | 0 .../classifiers/positive/python3.6 | 0 .../test-fixtures/image-busybox/Dockerfile | 0 .../test-fixtures/version.txt | 0 syft/pkg/cataloger/cataloger.go | 7 +- syft/pkg/cataloger/generic/classifier.go | 100 --------- syft/pkg/cataloger/javascript/cataloger.go | 6 - .../cataloger/javascript/parse_node_binary.go | 43 ---- syft/pkg/file_metadata.go | 7 + syft/pkg/language.go | 1 + syft/sbom/sbom.go | 16 +- test/cli/packages_cmd_test.go | 6 +- .../integration/package_deduplication_test.go | 11 +- 36 files changed, 579 insertions(+), 740 deletions(-) delete mode 100644 syft/file/classification_cataloger.go delete mode 100644 syft/file/classification_cataloger_test.go delete mode 100644 syft/file/classifier.go delete mode 100644 syft/file/classifier_test.go create mode 100644 syft/pkg/cataloger/binary/cataloger.go create mode 100644 syft/pkg/cataloger/binary/cataloger_test.go create mode 100644 syft/pkg/cataloger/binary/classifier.go rename syft/pkg/cataloger/{generic => binary}/classifier_test.go (61%) create mode 100644 syft/pkg/cataloger/binary/default_classifiers.go rename syft/{file => pkg/cataloger/binary}/test-fixtures/classifiers/negative/.gitignore (100%) rename syft/{file => pkg/cataloger/binary}/test-fixtures/classifiers/negative/busybox (100%) rename syft/{file => pkg/cataloger/binary}/test-fixtures/classifiers/negative/go (100%) rename syft/{file => pkg/cataloger/binary}/test-fixtures/classifiers/negative/libpython2.7.so (100%) rename syft/{file => pkg/cataloger/binary}/test-fixtures/classifiers/negative/python2.6 (100%) rename syft/{file => pkg/cataloger/binary}/test-fixtures/classifiers/positive/.gitignore (100%) rename syft/{file => pkg/cataloger/binary}/test-fixtures/classifiers/positive/VERSION (100%) rename syft/{file => pkg/cataloger/binary}/test-fixtures/classifiers/positive/[ (100%) rename syft/{file => pkg/cataloger/binary}/test-fixtures/classifiers/positive/busybox (100%) rename syft/{file => pkg/cataloger/binary}/test-fixtures/classifiers/positive/go (100%) rename syft/{file => pkg/cataloger/binary}/test-fixtures/classifiers/positive/libpython3.7.so (100%) rename syft/{file => pkg/cataloger/binary}/test-fixtures/classifiers/positive/patchlevel.h (100%) rename syft/{file => pkg/cataloger/binary}/test-fixtures/classifiers/positive/python3.6 (100%) rename syft/{file => pkg/cataloger/binary}/test-fixtures/image-busybox/Dockerfile (100%) rename syft/pkg/cataloger/{generic => binary}/test-fixtures/version.txt (100%) delete mode 100644 syft/pkg/cataloger/generic/classifier.go delete mode 100644 syft/pkg/cataloger/javascript/parse_node_binary.go create mode 100644 syft/pkg/file_metadata.go diff --git a/cmd/syft/cli/eventloop/tasks.go b/cmd/syft/cli/eventloop/tasks.go index 4d16f64f6..5e32b0423 100644 --- a/cmd/syft/cli/eventloop/tasks.go +++ b/cmd/syft/cli/eventloop/tasks.go @@ -22,7 +22,6 @@ func Tasks(app *config.Application) ([]Task, error) { generateCatalogFileMetadataTask, generateCatalogFileDigestsTask, generateCatalogSecretsTask, - generateCatalogFileClassificationsTask, generateCatalogContentsTask, } @@ -162,34 +161,6 @@ func generateCatalogSecretsTask(app *config.Application) (Task, error) { return task, nil } -func generateCatalogFileClassificationsTask(app *config.Application) (Task, error) { - if !app.FileClassification.Cataloger.Enabled { - return nil, nil - } - - // TODO: in the future we could expose out the classifiers via configuration - classifierCataloger, err := file.NewClassificationCataloger(file.DefaultClassifiers) - if err != nil { - return nil, err - } - - task := func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { - resolver, err := src.FileResolver(app.FileClassification.Cataloger.ScopeOpt) - if err != nil { - return nil, err - } - - result, err := classifierCataloger.Catalog(resolver) - if err != nil { - return nil, err - } - results.FileClassifications = result - return nil, nil - } - - return task, nil -} - func generateCatalogContentsTask(app *config.Application) (Task, error) { if !app.FileContents.Cataloger.Enabled { return nil, nil diff --git a/syft/file/classification_cataloger.go b/syft/file/classification_cataloger.go deleted file mode 100644 index 01a0685ef..000000000 --- a/syft/file/classification_cataloger.go +++ /dev/null @@ -1,38 +0,0 @@ -package file - -import ( - "github.com/anchore/syft/internal/log" - "github.com/anchore/syft/syft/source" -) - -type ClassificationCataloger struct { - classifiers []Classifier -} - -func NewClassificationCataloger(classifiers []Classifier) (*ClassificationCataloger, error) { - return &ClassificationCataloger{ - classifiers: classifiers, - }, nil -} - -func (i *ClassificationCataloger) Catalog(resolver source.FileResolver) (map[source.Coordinates][]Classification, error) { - results := make(map[source.Coordinates][]Classification) - - numResults := 0 - for _, location := range allRegularFiles(resolver) { - for _, classifier := range i.classifiers { - result, err := classifier.Classify(resolver, location) - if err != nil { - log.Warnf("file classification cataloger failed with class=%q at location=%+v: %+v", classifier.Class, location, err) - continue - } - if result != nil { - results[location.Coordinates] = append(results[location.Coordinates], *result) - numResults++ - } - } - } - log.Debugf("file classifier discovered %d results", numResults) - - return results, nil -} diff --git a/syft/file/classification_cataloger_test.go b/syft/file/classification_cataloger_test.go deleted file mode 100644 index 405166225..000000000 --- a/syft/file/classification_cataloger_test.go +++ /dev/null @@ -1,210 +0,0 @@ -package file - -import ( - "testing" - - "github.com/stretchr/testify/assert" - - "github.com/anchore/stereoscope/pkg/imagetest" - "github.com/anchore/syft/syft/source" -) - -func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) { - tests := []struct { - name string - fixtureDir string - location string - expected []Classification - expectedErr func(assert.TestingT, error, ...interface{}) bool - }{ - { - name: "positive-libpython3.7.so", - fixtureDir: "test-fixtures/classifiers/positive", - location: "libpython3.7.so", - expected: []Classification{ - { - Class: "python-binary", - Metadata: map[string]string{ - "version": "3.7.4a-vZ9", - }, - }, - }, - expectedErr: assert.NoError, - }, - { - name: "positive-python3.6", - fixtureDir: "test-fixtures/classifiers/positive", - location: "python3.6", - expected: []Classification{ - { - Class: "python-binary", - Metadata: map[string]string{ - "version": "3.6.3a-vZ9", - }, - }, - }, - expectedErr: assert.NoError, - }, - { - name: "positive-patchlevel.h", - fixtureDir: "test-fixtures/classifiers/positive", - location: "patchlevel.h", - expected: []Classification{ - { - Class: "cpython-source", - Metadata: map[string]string{ - "version": "3.9-aZ5", - }, - }, - }, - expectedErr: assert.NoError, - }, - { - name: "positive-go", - fixtureDir: "test-fixtures/classifiers/positive", - location: "go", - expected: []Classification{ - { - Class: "go-binary", - Metadata: map[string]string{ - "version": "1.14", - }, - }, - }, - expectedErr: assert.NoError, - }, - { - name: "positive-go-hint", - fixtureDir: "test-fixtures/classifiers/positive", - location: "VERSION", - expected: []Classification{ - { - Class: "go-binary-hint", - Metadata: map[string]string{ - "version": "1.15", - }, - }, - }, - expectedErr: assert.NoError, - }, - { - name: "positive-busybox", - fixtureDir: "test-fixtures/classifiers/positive", - location: "[", // note: busybox is a link to [ - expected: []Classification{ - { - Class: "busybox-binary", - VirtualPath: "busybox", - Metadata: map[string]string{ - "version": "3.33.3", - }, - }, - }, - expectedErr: assert.NoError, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - - c, err := NewClassificationCataloger(DefaultClassifiers) - test.expectedErr(t, err) - - src, err := source.NewFromDirectory(test.fixtureDir) - test.expectedErr(t, err) - - resolver, err := src.FileResolver(source.SquashedScope) - test.expectedErr(t, err) - - actualResults, err := c.Catalog(resolver) - test.expectedErr(t, err) - - ok := false - for actualLoc, actualClassification := range actualResults { - if test.location == actualLoc.RealPath { - ok = true - assert.Equal(t, test.expected, actualClassification) - } - } - - if !ok { - t.Fatalf("could not find test location=%q", test.location) - } - - }) - } -} - -func TestClassifierCataloger_DefaultClassifiers_PositiveCases_Image(t *testing.T) { - tests := []struct { - name string - fixtureImage string - location string - expected []Classification - expectedErr func(assert.TestingT, error, ...interface{}) bool - }{ - { - name: "busybox-regression", - fixtureImage: "image-busybox", - location: "/bin/[", - expected: []Classification{ - { - Class: "busybox-binary", - VirtualPath: "/bin/busybox", - Metadata: map[string]string{ - "version": "1.35.0", - }, - }, - }, - expectedErr: assert.NoError, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - - c, err := NewClassificationCataloger(DefaultClassifiers) - test.expectedErr(t, err) - - img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureImage) - src, err := source.NewFromImage(img, "test-img") - test.expectedErr(t, err) - - resolver, err := src.FileResolver(source.SquashedScope) - test.expectedErr(t, err) - - actualResults, err := c.Catalog(resolver) - test.expectedErr(t, err) - - ok := false - for actuaLoc, actualClassification := range actualResults { - if actuaLoc.RealPath == test.location { - ok = true - assert.Equal(t, test.expected, actualClassification) - } - } - - if !ok { - t.Fatalf("could not find test location=%q", test.location) - } - - }) - } -} - -func TestClassifierCataloger_DefaultClassifiers_NegativeCases(t *testing.T) { - - c, err := NewClassificationCataloger(DefaultClassifiers) - assert.NoError(t, err) - - src, err := source.NewFromDirectory("test-fixtures/classifiers/negative") - assert.NoError(t, err) - - resolver, err := src.FileResolver(source.SquashedScope) - assert.NoError(t, err) - - actualResults, err := c.Catalog(resolver) - assert.NoError(t, err) - assert.Equal(t, 0, len(actualResults)) - -} diff --git a/syft/file/classifier.go b/syft/file/classifier.go deleted file mode 100644 index e71a5fa02..000000000 --- a/syft/file/classifier.go +++ /dev/null @@ -1,153 +0,0 @@ -package file - -import ( - "bytes" - "fmt" - "io" - "regexp" - "text/template" - - "github.com/anchore/syft/internal" - "github.com/anchore/syft/syft/source" -) - -var DefaultClassifiers = []Classifier{ - { - Class: "python-binary", - FilepathPatterns: []*regexp.Regexp{ - regexp.MustCompile(`(.*/|^)python(?P[0-9]+\.[0-9]+)$`), - regexp.MustCompile(`(.*/|^)libpython(?P[0-9]+\.[0-9]+).so.*$`), - }, - EvidencePatternTemplates: []string{ - `(?m)(?P{{ .version }}\.[0-9]+[-_a-zA-Z0-9]*)`, - }, - }, - { - Class: "cpython-source", - FilepathPatterns: []*regexp.Regexp{ - regexp.MustCompile(`(.*/|^)patchlevel.h$`), - }, - EvidencePatternTemplates: []string{ - `(?m)#define\s+PY_VERSION\s+"?(?P[0-9\.\-_a-zA-Z]+)"?`, - }, - }, - { - Class: "go-binary", - FilepathPatterns: []*regexp.Regexp{ - regexp.MustCompile(`(.*/|^)go$`), - }, - EvidencePatternTemplates: []string{ - `(?m)go(?P[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`, - }, - }, - { - Class: "nodejs-binary", - FilepathPatterns: []*regexp.Regexp{ - regexp.MustCompile(`(.*/|^)node$`), - }, - EvidencePatternTemplates: []string{ - // regex that matches node.js/vx.y.z - `(?m)node\.js\/v(?P[0-9]+\.[0-9]+\.[0-9]+)`, - }, - }, - { - Class: "go-binary-hint", - FilepathPatterns: []*regexp.Regexp{ - regexp.MustCompile(`(.*/|^)VERSION$`), - }, - EvidencePatternTemplates: []string{ - `(?m)go(?P[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`, - }, - }, - { - Class: "busybox-binary", - FilepathPatterns: []*regexp.Regexp{ - regexp.MustCompile(`(.*/|^)busybox$`), - }, - EvidencePatternTemplates: []string{ - `(?m)BusyBox\s+v(?P[0-9]+\.[0-9]+\.[0-9]+)`, - }, - }, -} - -type Classifier struct { - Class string - FilepathPatterns []*regexp.Regexp - EvidencePatternTemplates []string -} - -type Classification struct { - Class string `json:"class"` - VirtualPath string `json:"virtual_path"` - Metadata map[string]string `json:"metadata"` -} - -func (c Classifier) Classify(resolver source.FileResolver, location source.Location) (*Classification, error) { - doesFilepathMatch, filepathNamedGroupValues := FilepathMatches(c.FilepathPatterns, location) - if !doesFilepathMatch { - return nil, nil - } - - contentReader, err := resolver.FileContentsByLocation(location) - if err != nil { - return nil, err - } - defer internal.CloseAndLogError(contentReader, location.VirtualPath) - - // TODO: there is room for improvement here, as this may use an excessive amount of memory. Alternate approach is to leverage a RuneReader. - contents, err := io.ReadAll(contentReader) - if err != nil { - return nil, err - } - - var result *Classification - for _, patternTemplate := range c.EvidencePatternTemplates { - tmpl, err := template.New("").Parse(patternTemplate) - if err != nil { - return nil, fmt.Errorf("unable to parse classifier template=%q : %w", patternTemplate, err) - } - - patternBuf := &bytes.Buffer{} - err = tmpl.Execute(patternBuf, filepathNamedGroupValues) - if err != nil { - return nil, fmt.Errorf("unable to render template: %w", err) - } - - pattern, err := regexp.Compile(patternBuf.String()) - if err != nil { - return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err) - } - - if !pattern.Match(contents) { - continue - } - - matchMetadata := internal.MatchNamedCaptureGroups(pattern, string(contents)) - if result == nil { - result = &Classification{ - Class: c.Class, - VirtualPath: location.VirtualPath, - Metadata: matchMetadata, - } - } else { - for key, value := range matchMetadata { - result.Metadata[key] = value - } - } - } - return result, nil -} - -func FilepathMatches(patterns []*regexp.Regexp, location source.Location) (bool, map[string]string) { - for _, path := range []string{location.RealPath, location.VirtualPath} { - if path == "" { - continue - } - for _, pattern := range patterns { - if pattern.MatchString(path) { - return true, internal.MatchNamedCaptureGroups(pattern, path) - } - } - } - return false, nil -} diff --git a/syft/file/classifier_test.go b/syft/file/classifier_test.go deleted file mode 100644 index 4421bc349..000000000 --- a/syft/file/classifier_test.go +++ /dev/null @@ -1,97 +0,0 @@ -package file - -import ( - "regexp" - "testing" - - "github.com/stretchr/testify/assert" - - "github.com/anchore/syft/syft/source" -) - -func TestFilepathMatches(t *testing.T) { - tests := []struct { - name string - location source.Location - patterns []string - expectedMatches bool - expectedNamedGroups map[string]string - }{ - { - name: "simple-filename-match", - location: source.Location{ - Coordinates: source.Coordinates{ - RealPath: "python2.7", - }, - }, - patterns: []string{ - `python([0-9]+\.[0-9]+)$`, - }, - expectedMatches: true, - }, - { - name: "filepath-match", - location: source.Location{ - Coordinates: source.Coordinates{ - RealPath: "/usr/bin/python2.7", - }, - }, - patterns: []string{ - `python([0-9]+\.[0-9]+)$`, - }, - expectedMatches: true, - }, - { - name: "virtual-filepath-match", - location: source.Location{ - VirtualPath: "/usr/bin/python2.7", - }, - patterns: []string{ - `python([0-9]+\.[0-9]+)$`, - }, - expectedMatches: true, - }, - { - name: "full-filepath-match", - location: source.Location{ - VirtualPath: "/usr/bin/python2.7", - }, - patterns: []string{ - `.*/bin/python([0-9]+\.[0-9]+)$`, - }, - expectedMatches: true, - }, - { - name: "anchored-filename-match-FAILS", - location: source.Location{ - Coordinates: source.Coordinates{ - RealPath: "/usr/bin/python2.7", - }, - }, - patterns: []string{ - `^python([0-9]+\.[0-9]+)$`, - }, - expectedMatches: false, - }, - { - name: "empty-filename-match-FAILS", - location: source.Location{}, - patterns: []string{ - `^python([0-9]+\.[0-9]+)$`, - }, - expectedMatches: false, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - var patterns []*regexp.Regexp - for _, p := range test.patterns { - patterns = append(patterns, regexp.MustCompile(p)) - } - actualMatches, actualNamedGroups := FilepathMatches(patterns, test.location) - assert.Equal(t, test.expectedMatches, actualMatches) - assert.Equal(t, test.expectedNamedGroups, actualNamedGroups) - }) - } -} diff --git a/syft/formats/syftjson/model/file.go b/syft/formats/syftjson/model/file.go index 10e1249db..e230ef127 100644 --- a/syft/formats/syftjson/model/file.go +++ b/syft/formats/syftjson/model/file.go @@ -6,12 +6,11 @@ import ( ) type File struct { - ID string `json:"id"` - Location source.Coordinates `json:"location"` - Metadata *FileMetadataEntry `json:"metadata,omitempty"` - Contents string `json:"contents,omitempty"` - Digests []file.Digest `json:"digests,omitempty"` - Classifications []file.Classification `json:"classifications,omitempty"` + ID string `json:"id"` + Location source.Coordinates `json:"location"` + Metadata *FileMetadataEntry `json:"metadata,omitempty"` + Contents string `json:"contents,omitempty"` + Digests []file.Digest `json:"digests,omitempty"` } type FileMetadataEntry struct { diff --git a/syft/formats/syftjson/to_format_model.go b/syft/formats/syftjson/to_format_model.go index 12638f78e..51d8f87df 100644 --- a/syft/formats/syftjson/to_format_model.go +++ b/syft/formats/syftjson/to_format_model.go @@ -102,23 +102,17 @@ func toFile(s sbom.SBOM) []model.File { digests = digestsForLocation } - var classifications []file.Classification - if classificationsForLocation, exists := artifacts.FileClassifications[coordinates]; exists { - classifications = classificationsForLocation - } - var contents string if contentsForLocation, exists := artifacts.FileContents[coordinates]; exists { contents = contentsForLocation } results = append(results, model.File{ - ID: string(coordinates.ID()), - Location: coordinates, - Metadata: toFileMetadataEntry(coordinates, metadata), - Digests: digests, - Classifications: classifications, - Contents: contents, + ID: string(coordinates.ID()), + Location: coordinates, + Metadata: toFileMetadataEntry(coordinates, metadata), + Digests: digests, + Contents: contents, }) } diff --git a/syft/pkg/cataloger/binary/cataloger.go b/syft/pkg/cataloger/binary/cataloger.go new file mode 100644 index 000000000..18b5cdd89 --- /dev/null +++ b/syft/pkg/cataloger/binary/cataloger.go @@ -0,0 +1,78 @@ +package binary + +import ( + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/source" +) + +const catalogerName = "binary-cataloger" + +func NewCataloger() *Cataloger { + return &Cataloger{} +} + +// Cataloger is the cataloger responsible for surfacing evidence of a very limited set of binary files, +// which have been identified by the classifiers. The Cataloger is _NOT_ a place to catalog any and every +// binary, but rather the specific set that has been curated to be important, predominantly related to toolchain- +// related runtimes like Python, Go, Java, or Node. Some exceptions can be made for widely-used binaries such +// as busybox. +type Cataloger struct{} + +// Name returns a string that uniquely describes the Cataloger +func (c Cataloger) Name() string { + return catalogerName +} + +// Catalog is given an object to resolve file references and content, this function returns any discovered Packages +// after analyzing the catalog source. +func (c Cataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, []artifact.Relationship, error) { + var packages []pkg.Package + var relationships []artifact.Relationship + + for _, classifier := range defaultClassifiers { + locations, err := resolver.FilesByGlob(classifier.FileGlob) + if err != nil { + return nil, nil, err + } + for _, location := range locations { + reader, err := resolver.FileContentsByLocation(location) + if err != nil { + return nil, nil, err + } + locationReader := source.NewLocationReadCloser(location, reader) + newPkgs, err := classifier.EvidenceMatcher(classifier, locationReader) + if err != nil { + return nil, nil, err + } + newPackages: + for i := range newPkgs { + newPkg := &newPkgs[i] + for j := range packages { + p := &packages[j] + // consolidate identical packages found in different locations, + // but continue to track each location + if packagesMatch(p, newPkg) { + p.Locations.Add(newPkg.Locations.ToSlice()...) + continue newPackages + } + } + packages = append(packages, *newPkg) + } + } + } + + return packages, relationships, nil +} + +// packagesMatch returns true if the binary packages "match" based on basic criteria +func packagesMatch(p1 *pkg.Package, p2 *pkg.Package) bool { + if p1.Name != p2.Name || + p1.Version != p2.Version || + p1.Language != p2.Language || + p1.Type != p2.Type { + return false + } + + return true +} diff --git a/syft/pkg/cataloger/binary/cataloger_test.go b/syft/pkg/cataloger/binary/cataloger_test.go new file mode 100644 index 000000000..808fe1ffe --- /dev/null +++ b/syft/pkg/cataloger/binary/cataloger_test.go @@ -0,0 +1,201 @@ +package binary + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/stereoscope/pkg/imagetest" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/source" +) + +func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) { + tests := []struct { + name string + fixtureDir string + expected pkg.Package + }{ + { + name: "positive-libpython3.7.so", + fixtureDir: "test-fixtures/classifiers/positive", + expected: pkg.Package{ + Name: "python", + Version: "3.7.4a-vZ9", + Locations: singleLocation("libpython3.7.so"), + Metadata: pkg.BinaryMetadata{ + Classifier: "python-binary-lib", + }, + }, + }, + { + name: "positive-python3.6", + fixtureDir: "test-fixtures/classifiers/positive", + expected: pkg.Package{ + Name: "python", + Version: "3.6.3a-vZ9", + Locations: singleLocation("python3.6"), + Metadata: pkg.BinaryMetadata{ + Classifier: "python-binary", + }, + }, + }, + { + name: "positive-patchlevel.h", + fixtureDir: "test-fixtures/classifiers/positive", + expected: pkg.Package{ + Name: "python", + Version: "3.9-aZ5", + Locations: singleLocation("patchlevel.h"), + Metadata: pkg.BinaryMetadata{ + Classifier: "cpython-source", + }, + }, + }, + { + name: "positive-go", + fixtureDir: "test-fixtures/classifiers/positive", + expected: pkg.Package{ + Name: "go", + Version: "1.14", + Locations: singleLocation("go"), + Metadata: pkg.BinaryMetadata{ + Classifier: "go-binary", + }, + }, + }, + { + name: "positive-go-hint", + fixtureDir: "test-fixtures/classifiers/positive", + expected: pkg.Package{ + Name: "go", + Version: "1.15", + Locations: singleLocation("VERSION"), + Metadata: pkg.BinaryMetadata{ + Classifier: "go-binary-hint", + }, + }, + }, + { + name: "positive-busybox", + fixtureDir: "test-fixtures/classifiers/positive", + expected: pkg.Package{ + Name: "busybox", + Version: "3.33.3", + Locations: singleLocation("["), // note: busybox is a link to [ + Metadata: pkg.BinaryMetadata{ + Classifier: "busybox-binary", + VirtualPath: "busybox", + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + c := NewCataloger() + + src, err := source.NewFromDirectory(test.fixtureDir) + require.NoError(t, err) + + resolver, err := src.FileResolver(source.SquashedScope) + require.NoError(t, err) + + packages, _, err := c.Catalog(resolver) + require.NoError(t, err) + + ok := false + for _, p := range packages { + if test.expected.Locations.ToSlice()[0].RealPath == p.Locations.ToSlice()[0].RealPath { + ok = true + assertPackagesAreEqual(t, test.expected, p) + } + } + + if !ok { + t.Fatalf("could not find test location=%q", test.expected.Locations.ToSlice()[0].RealPath) + } + + }) + } +} + +func TestClassifierCataloger_DefaultClassifiers_PositiveCases_Image(t *testing.T) { + tests := []struct { + name string + fixtureImage string + expected pkg.Package + }{ + { + name: "busybox-regression", + fixtureImage: "image-busybox", + expected: pkg.Package{ + Name: "busybox", + Version: "1.35.0", + Locations: singleLocation("/bin/["), + Metadata: pkg.BinaryMetadata{ + Classifier: "busybox-binary", + VirtualPath: "/bin/busybox", + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + c := NewCataloger() + + img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureImage) + src, err := source.NewFromImage(img, "test-img") + require.NoError(t, err) + + resolver, err := src.FileResolver(source.SquashedScope) + require.NoError(t, err) + + packages, _, err := c.Catalog(resolver) + require.NoError(t, err) + + ok := false + for _, p := range packages { + if test.expected.Locations.ToSlice()[0].RealPath == p.Locations.ToSlice()[0].RealPath { + ok = true + assertPackagesAreEqual(t, test.expected, p) + } + } + + if !ok { + t.Fatalf("could not find test location=%q", test.expected.Locations.ToSlice()[0].RealPath) + } + + }) + } +} + +func TestClassifierCataloger_DefaultClassifiers_NegativeCases(t *testing.T) { + c := NewCataloger() + + src, err := source.NewFromDirectory("test-fixtures/classifiers/negative") + assert.NoError(t, err) + + resolver, err := src.FileResolver(source.SquashedScope) + assert.NoError(t, err) + + actualResults, _, err := c.Catalog(resolver) + assert.NoError(t, err) + assert.Equal(t, 0, len(actualResults)) +} + +func singleLocation(s string) source.LocationSet { + return source.NewLocationSet(source.NewLocation(s)) +} + +func assertPackagesAreEqual(t *testing.T, expected pkg.Package, p pkg.Package) { + meta1 := expected.Metadata.(pkg.BinaryMetadata) + meta2 := p.Metadata.(pkg.BinaryMetadata) + if expected.Name != p.Name || + expected.Version != p.Version || + meta1.Classifier != meta2.Classifier { + assert.Failf(t, "packages not equal", "%v != %v", expected, p) + } +} diff --git a/syft/pkg/cataloger/binary/classifier.go b/syft/pkg/cataloger/binary/classifier.go new file mode 100644 index 000000000..32d2dc1ae --- /dev/null +++ b/syft/pkg/cataloger/binary/classifier.go @@ -0,0 +1,180 @@ +package binary + +import ( + "bytes" + "fmt" + "io" + "reflect" + "regexp" + "text/template" + + "github.com/anchore/packageurl-go" + "github.com/anchore/syft/internal" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/internal/unionreader" + "github.com/anchore/syft/syft/source" +) + +var emptyPURL = packageurl.PackageURL{} + +// classifier is a generic package classifier that can be used to match a package definition +// to a file that meets the given content criteria of the evidenceMatcher. +type classifier struct { + Class string + + // FileGlob is a selector to narrow down file inspection using the **/glob* syntax + FileGlob string + + // EvidenceMatcher is what will be used to match against the file in the source + // location. If the matcher returns a package, the file will be considered a candidate. + EvidenceMatcher evidenceMatcher + + // Information below is used to specify the Package information when returned + + // Package is the name to use for the package + Package string + + // Language is the language to classify this package as + Language pkg.Language + + // Type is the package type to use for the package + Type pkg.Type + + // PURL is the Package URL to use when generating a package + PURL packageurl.PackageURL + + // CPEs are the specific CPEs we want to include for this binary with updated version information + CPEs []pkg.CPE +} + +// evidenceMatcher is a function called to catalog Packages that match some sort of evidence +type evidenceMatcher func(classifier classifier, reader source.LocationReadCloser) ([]pkg.Package, error) + +func fileNameTemplateVersionMatcher(fileNamePattern string, contentTemplate string) evidenceMatcher { + pat := regexp.MustCompile(fileNamePattern) + return func(classifier classifier, reader source.LocationReadCloser) ([]pkg.Package, error) { + if !pat.MatchString(reader.RealPath) { + return nil, nil + } + + filepathNamedGroupValues := internal.MatchNamedCaptureGroups(pat, reader.RealPath) + + tmpl, err := template.New("").Parse(contentTemplate) + if err != nil { + return nil, fmt.Errorf("unable to parse classifier template=%q : %w", contentTemplate, err) + } + + patternBuf := &bytes.Buffer{} + err = tmpl.Execute(patternBuf, filepathNamedGroupValues) + if err != nil { + return nil, fmt.Errorf("unable to render template: %w", err) + } + + tmplPattern, err := regexp.Compile(patternBuf.String()) + if err != nil { + return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err) + } + + contents, err := getContents(reader) + if err != nil { + return nil, fmt.Errorf("unable to get read contents for file: %w", err) + } + + matchMetadata := internal.MatchNamedCaptureGroups(tmplPattern, string(contents)) + + version, ok := matchMetadata["version"] + if ok { + return singlePackage(classifier, reader, version), nil + } + + return nil, nil + } +} + +func fileContentsVersionMatcher(pattern string) evidenceMatcher { + pat := regexp.MustCompile(pattern) + return func(classifier classifier, reader source.LocationReadCloser) ([]pkg.Package, error) { + contents, err := getContents(reader) + if err != nil { + return nil, fmt.Errorf("unable to get read contents for file: %w", err) + } + + matchMetadata := internal.MatchNamedCaptureGroups(pat, string(contents)) + version, ok := matchMetadata["version"] + if ok { + return singlePackage(classifier, reader, version), nil + } + return nil, nil + } +} + +func mustPURL(purl string) packageurl.PackageURL { + p, err := packageurl.FromString(purl) + if err != nil { + panic(fmt.Sprintf("invalid PURL: %s", p)) + } + return p +} + +func singlePackage(classifier classifier, reader source.LocationReadCloser, version string) []pkg.Package { + var cpes []pkg.CPE + for _, cpe := range classifier.CPEs { + cpe.Version = version + cpes = append(cpes, cpe) + } + + p := pkg.Package{ + Name: classifier.Package, + Version: version, + Language: pkg.Binary, + Locations: source.NewLocationSet(reader.Location), + Type: pkg.BinaryPkg, + CPEs: cpes, + MetadataType: pkg.BinaryMetadataType, + Metadata: pkg.BinaryMetadata{ + Classifier: classifier.Class, + RealPath: reader.RealPath, + VirtualPath: reader.VirtualPath, + }, + } + + if classifier.Type != "" { + p.Type = classifier.Type + } + + if !reflect.DeepEqual(classifier.PURL, emptyPURL) { + purl := classifier.PURL + purl.Version = version + p.PURL = purl.ToString() + } + + if classifier.Language != "" { + p.Language = classifier.Language + } + + p.SetID() + + return []pkg.Package{p} +} + +func getContents(reader source.LocationReadCloser) ([]byte, error) { + unionReader, err := unionreader.GetUnionReader(reader.ReadCloser) + if err != nil { + return nil, fmt.Errorf("unable to get union reader for file: %w", err) + } + + // TODO: there may be room for improvement here, as this may use an excessive amount of memory. Alternate approach is to leverage a RuneReader. + contents, err := io.ReadAll(unionReader) + if err != nil { + return nil, fmt.Errorf("unable to get contents for file: %w", err) + } + + return contents, nil +} + +// singleCPE returns a []pkg.CPE based on the cpe string or panics if the CPE is invalid +func singleCPE(cpe string) []pkg.CPE { + return []pkg.CPE{ + pkg.MustCPE(cpe), + } +} diff --git a/syft/pkg/cataloger/generic/classifier_test.go b/syft/pkg/cataloger/binary/classifier_test.go similarity index 61% rename from syft/pkg/cataloger/generic/classifier_test.go rename to syft/pkg/cataloger/binary/classifier_test.go index 77242e4fa..35cec08eb 100644 --- a/syft/pkg/cataloger/generic/classifier_test.go +++ b/syft/pkg/cataloger/binary/classifier_test.go @@ -1,7 +1,6 @@ -package generic +package binary import ( - "regexp" "testing" "github.com/stretchr/testify/require" @@ -14,35 +13,27 @@ func Test_ClassifierCPEs(t *testing.T) { tests := []struct { name string fixture string - classifier Classifier + classifier classifier cpes []string }{ { name: "no CPEs", fixture: "test-fixtures/version.txt", - classifier: Classifier{ - Package: "some-app", - FilepathPatterns: []*regexp.Regexp{ - regexp.MustCompile(".*/version.txt"), - }, - EvidencePatterns: []*regexp.Regexp{ - regexp.MustCompile(`(?m)my-verison:(?P[0-9.]+)`), - }, - CPEs: []pkg.CPE{}, + classifier: classifier{ + Package: "some-app", + FileGlob: ".*/version.txt", + EvidenceMatcher: fileContentsVersionMatcher(`(?m)my-verison:(?P[0-9.]+)`), + CPEs: []pkg.CPE{}, }, cpes: nil, }, { name: "one CPE", fixture: "test-fixtures/version.txt", - classifier: Classifier{ - Package: "some-app", - FilepathPatterns: []*regexp.Regexp{ - regexp.MustCompile(".*/version.txt"), - }, - EvidencePatterns: []*regexp.Regexp{ - regexp.MustCompile(`(?m)my-verison:(?P[0-9.]+)`), - }, + classifier: classifier{ + Package: "some-app", + FileGlob: ".*/version.txt", + EvidenceMatcher: fileContentsVersionMatcher(`(?m)my-verison:(?P[0-9.]+)`), CPEs: []pkg.CPE{ pkg.MustCPE("cpe:2.3:a:some:app:*:*:*:*:*:*:*:*"), }, @@ -54,14 +45,10 @@ func Test_ClassifierCPEs(t *testing.T) { { name: "multiple CPEs", fixture: "test-fixtures/version.txt", - classifier: Classifier{ - Package: "some-app", - FilepathPatterns: []*regexp.Regexp{ - regexp.MustCompile(".*/version.txt"), - }, - EvidencePatterns: []*regexp.Regexp{ - regexp.MustCompile(`(?m)my-verison:(?P[0-9.]+)`), - }, + classifier: classifier{ + Package: "some-app", + FileGlob: ".*/version.txt", + EvidenceMatcher: fileContentsVersionMatcher(`(?m)my-verison:(?P[0-9.]+)`), CPEs: []pkg.CPE{ pkg.MustCPE("cpe:2.3:a:some:app:*:*:*:*:*:*:*:*"), pkg.MustCPE("cpe:2.3:a:some:apps:*:*:*:*:*:*:*:*"), @@ -83,9 +70,13 @@ func Test_ClassifierCPEs(t *testing.T) { location := locations[0] readCloser, err := resolver.FileContentsByLocation(location) require.NoError(t, err) - p, _, err := test.classifier.Examine(source.NewLocationReadCloser(location, readCloser)) + pkgs, err := test.classifier.EvidenceMatcher(test.classifier, source.NewLocationReadCloser(location, readCloser)) require.NoError(t, err) + require.Len(t, pkgs, 1) + + p := pkgs[0] + var cpes []string for _, c := range p.CPEs { cpes = append(cpes, pkg.CPEString(c)) diff --git a/syft/pkg/cataloger/binary/default_classifiers.go b/syft/pkg/cataloger/binary/default_classifiers.go new file mode 100644 index 000000000..65f8de5a4 --- /dev/null +++ b/syft/pkg/cataloger/binary/default_classifiers.go @@ -0,0 +1,60 @@ +package binary + +import "github.com/anchore/syft/syft/pkg" + +var defaultClassifiers = []classifier{ + { + Class: "python-binary", + FileGlob: "**/python*", + EvidenceMatcher: fileNameTemplateVersionMatcher( + `(.*/|^)python(?P[0-9]+\.[0-9]+)$`, + `(?m)(?P{{ .version }}\.[0-9]+[-_a-zA-Z0-9]*)`), + Package: "python", + }, + { + Class: "python-binary-lib", + FileGlob: "**/libpython*.so*", + EvidenceMatcher: fileNameTemplateVersionMatcher( + `(.*/|^)libpython(?P[0-9]+\.[0-9]+).so.*$`, + `(?m)(?P{{ .version }}\.[0-9]+[-_a-zA-Z0-9]*)`), + Package: "python", + }, + { + Class: "cpython-source", + FileGlob: "**/patchlevel.h", + EvidenceMatcher: fileContentsVersionMatcher( + `(?m)#define\s+PY_VERSION\s+"?(?P[0-9\.\-_a-zA-Z]+)"?`), + Package: "python", + }, + { + Class: "go-binary", + FileGlob: "**/go", + EvidenceMatcher: fileContentsVersionMatcher( + `(?m)go(?P[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`), + Package: "go", + }, + { + Class: "nodejs-binary", + FileGlob: "**/node", + EvidenceMatcher: fileContentsVersionMatcher( + `(?m)node\.js\/v(?P[0-9]+\.[0-9]+\.[0-9]+)`), + Package: "node.js", + Language: pkg.JavaScript, + PURL: mustPURL("pkg:generic/node@version"), + CPEs: singleCPE("cpe:2.3:a:nodejs:node.js:*:*:*:*:*:*:*:*"), + }, + { + Class: "go-binary-hint", + FileGlob: "**/VERSION", + EvidenceMatcher: fileContentsVersionMatcher( + `(?m)go(?P[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`), + Package: "go", + }, + { + Class: "busybox-binary", + FileGlob: "**/busybox", + EvidenceMatcher: fileContentsVersionMatcher( + `(?m)BusyBox\s+v(?P[0-9]+\.[0-9]+\.[0-9]+)`), + Package: "busybox", + }, +} diff --git a/syft/file/test-fixtures/classifiers/negative/.gitignore b/syft/pkg/cataloger/binary/test-fixtures/classifiers/negative/.gitignore similarity index 100% rename from syft/file/test-fixtures/classifiers/negative/.gitignore rename to syft/pkg/cataloger/binary/test-fixtures/classifiers/negative/.gitignore diff --git a/syft/file/test-fixtures/classifiers/negative/busybox b/syft/pkg/cataloger/binary/test-fixtures/classifiers/negative/busybox similarity index 100% rename from syft/file/test-fixtures/classifiers/negative/busybox rename to syft/pkg/cataloger/binary/test-fixtures/classifiers/negative/busybox diff --git a/syft/file/test-fixtures/classifiers/negative/go b/syft/pkg/cataloger/binary/test-fixtures/classifiers/negative/go similarity index 100% rename from syft/file/test-fixtures/classifiers/negative/go rename to syft/pkg/cataloger/binary/test-fixtures/classifiers/negative/go diff --git a/syft/file/test-fixtures/classifiers/negative/libpython2.7.so b/syft/pkg/cataloger/binary/test-fixtures/classifiers/negative/libpython2.7.so similarity index 100% rename from syft/file/test-fixtures/classifiers/negative/libpython2.7.so rename to syft/pkg/cataloger/binary/test-fixtures/classifiers/negative/libpython2.7.so diff --git a/syft/file/test-fixtures/classifiers/negative/python2.6 b/syft/pkg/cataloger/binary/test-fixtures/classifiers/negative/python2.6 similarity index 100% rename from syft/file/test-fixtures/classifiers/negative/python2.6 rename to syft/pkg/cataloger/binary/test-fixtures/classifiers/negative/python2.6 diff --git a/syft/file/test-fixtures/classifiers/positive/.gitignore b/syft/pkg/cataloger/binary/test-fixtures/classifiers/positive/.gitignore similarity index 100% rename from syft/file/test-fixtures/classifiers/positive/.gitignore rename to syft/pkg/cataloger/binary/test-fixtures/classifiers/positive/.gitignore diff --git a/syft/file/test-fixtures/classifiers/positive/VERSION b/syft/pkg/cataloger/binary/test-fixtures/classifiers/positive/VERSION similarity index 100% rename from syft/file/test-fixtures/classifiers/positive/VERSION rename to syft/pkg/cataloger/binary/test-fixtures/classifiers/positive/VERSION diff --git a/syft/file/test-fixtures/classifiers/positive/[ b/syft/pkg/cataloger/binary/test-fixtures/classifiers/positive/[ similarity index 100% rename from syft/file/test-fixtures/classifiers/positive/[ rename to syft/pkg/cataloger/binary/test-fixtures/classifiers/positive/[ diff --git a/syft/file/test-fixtures/classifiers/positive/busybox b/syft/pkg/cataloger/binary/test-fixtures/classifiers/positive/busybox similarity index 100% rename from syft/file/test-fixtures/classifiers/positive/busybox rename to syft/pkg/cataloger/binary/test-fixtures/classifiers/positive/busybox diff --git a/syft/file/test-fixtures/classifiers/positive/go b/syft/pkg/cataloger/binary/test-fixtures/classifiers/positive/go similarity index 100% rename from syft/file/test-fixtures/classifiers/positive/go rename to syft/pkg/cataloger/binary/test-fixtures/classifiers/positive/go diff --git a/syft/file/test-fixtures/classifiers/positive/libpython3.7.so b/syft/pkg/cataloger/binary/test-fixtures/classifiers/positive/libpython3.7.so similarity index 100% rename from syft/file/test-fixtures/classifiers/positive/libpython3.7.so rename to syft/pkg/cataloger/binary/test-fixtures/classifiers/positive/libpython3.7.so diff --git a/syft/file/test-fixtures/classifiers/positive/patchlevel.h b/syft/pkg/cataloger/binary/test-fixtures/classifiers/positive/patchlevel.h similarity index 100% rename from syft/file/test-fixtures/classifiers/positive/patchlevel.h rename to syft/pkg/cataloger/binary/test-fixtures/classifiers/positive/patchlevel.h diff --git a/syft/file/test-fixtures/classifiers/positive/python3.6 b/syft/pkg/cataloger/binary/test-fixtures/classifiers/positive/python3.6 similarity index 100% rename from syft/file/test-fixtures/classifiers/positive/python3.6 rename to syft/pkg/cataloger/binary/test-fixtures/classifiers/positive/python3.6 diff --git a/syft/file/test-fixtures/image-busybox/Dockerfile b/syft/pkg/cataloger/binary/test-fixtures/image-busybox/Dockerfile similarity index 100% rename from syft/file/test-fixtures/image-busybox/Dockerfile rename to syft/pkg/cataloger/binary/test-fixtures/image-busybox/Dockerfile diff --git a/syft/pkg/cataloger/generic/test-fixtures/version.txt b/syft/pkg/cataloger/binary/test-fixtures/version.txt similarity index 100% rename from syft/pkg/cataloger/generic/test-fixtures/version.txt rename to syft/pkg/cataloger/binary/test-fixtures/version.txt diff --git a/syft/pkg/cataloger/cataloger.go b/syft/pkg/cataloger/cataloger.go index 907058b77..631ae7bb5 100644 --- a/syft/pkg/cataloger/cataloger.go +++ b/syft/pkg/cataloger/cataloger.go @@ -12,6 +12,7 @@ import ( "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg/cataloger/alpm" "github.com/anchore/syft/syft/pkg/cataloger/apkdb" + "github.com/anchore/syft/syft/pkg/cataloger/binary" "github.com/anchore/syft/syft/pkg/cataloger/cpp" "github.com/anchore/syft/syft/pkg/cataloger/dart" "github.com/anchore/syft/syft/pkg/cataloger/deb" @@ -40,7 +41,6 @@ func ImageCatalogers(cfg Config) []pkg.Cataloger { python.NewPythonPackageCataloger(), php.NewPHPComposerInstalledCataloger(), javascript.NewJavascriptPackageCataloger(), - javascript.NewNodeBinaryCataloger(), deb.NewDpkgdbCataloger(), rpm.NewRpmDBCataloger(), java.NewJavaCataloger(cfg.Java()), @@ -49,6 +49,7 @@ func ImageCatalogers(cfg Config) []pkg.Cataloger { dotnet.NewDotnetDepsCataloger(), portage.NewPortageCataloger(), sbom.NewSBOMCataloger(), + binary.NewCataloger(), }, cfg.Catalogers) } @@ -61,7 +62,6 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger { python.NewPythonPackageCataloger(), php.NewPHPComposerLockCataloger(), javascript.NewJavascriptLockCataloger(), - javascript.NewNodeBinaryCataloger(), deb.NewDpkgdbCataloger(), rpm.NewRpmDBCataloger(), rpm.NewFileCataloger(), @@ -78,6 +78,7 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger { portage.NewPortageCataloger(), haskell.NewHackageCataloger(), sbom.NewSBOMCataloger(), + binary.NewCataloger(), }, cfg.Catalogers) } @@ -91,7 +92,6 @@ func AllCatalogers(cfg Config) []pkg.Cataloger { python.NewPythonPackageCataloger(), javascript.NewJavascriptLockCataloger(), javascript.NewJavascriptPackageCataloger(), - javascript.NewNodeBinaryCataloger(), deb.NewDpkgdbCataloger(), rpm.NewRpmDBCataloger(), rpm.NewFileCataloger(), @@ -111,6 +111,7 @@ func AllCatalogers(cfg Config) []pkg.Cataloger { portage.NewPortageCataloger(), haskell.NewHackageCataloger(), sbom.NewSBOMCataloger(), + binary.NewCataloger(), }, cfg.Catalogers) } diff --git a/syft/pkg/cataloger/generic/classifier.go b/syft/pkg/cataloger/generic/classifier.go deleted file mode 100644 index 7c5ade06e..000000000 --- a/syft/pkg/cataloger/generic/classifier.go +++ /dev/null @@ -1,100 +0,0 @@ -package generic - -import ( - "fmt" - "io" - "path" - "regexp" - - "github.com/anchore/syft/internal" - "github.com/anchore/syft/internal/log" - "github.com/anchore/syft/syft/artifact" - "github.com/anchore/syft/syft/file" - "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/pkg/cataloger/internal/unionreader" - "github.com/anchore/syft/syft/source" -) - -// Classifier is a generic package classifier that can be used to match a package definition -// to a file that meets the given content criteria of the EvidencePatternTemplates. -type Classifier struct { - Package string - // FilepathPatterns is a list of regular expressions that will be used to match against the file path of a given - // source location. If any of the patterns match, the file will be considered a candidate for parsing. - // If no patterns are provided, the reader is automatically considered a candidate. - FilepathPatterns []*regexp.Regexp - // EvidencePatterns is a list of regular expressions that will be used to match against the file contents of a - // given file in the source location. If any of the patterns match, the file will be considered a candidate for parsing. - EvidencePatterns []*regexp.Regexp - // CPEs are the specific CPEs we want to include for this binary with updated version information - CPEs []pkg.CPE -} - -func (c Classifier) Examine(reader source.LocationReadCloser) (p *pkg.Package, r *artifact.Relationship, err error) { - doesFilepathMatch := true - if len(c.FilepathPatterns) > 0 { - doesFilepathMatch, _ = file.FilepathMatches(c.FilepathPatterns, reader.Location) - } - - if !doesFilepathMatch { - return nil, nil, fmt.Errorf("location: %s did not match any patterns for package=%q", reader.Location, c.Package) - } - - contents, err := getContents(reader) - if err != nil { - return nil, nil, fmt.Errorf("unable to get read contents for file: %w", err) - } - - var classifiedPackage *pkg.Package - for _, evidencePattern := range c.EvidencePatterns { - if !evidencePattern.Match(contents) { - continue - } - - matchMetadata := internal.MatchNamedCaptureGroups(evidencePattern, string(contents)) - version, ok := matchMetadata["version"] - if !ok { - log.Debugf("no version found in binary from pattern %v", evidencePattern) - continue - } - - var cpes []pkg.CPE - for _, cpe := range c.CPEs { - cpe.Version = version - if err == nil { - cpes = append(cpes, cpe) - } - } - - classifiedPackage = &pkg.Package{ - Name: path.Base(reader.VirtualPath), - Version: version, - Language: pkg.Binary, - Locations: source.NewLocationSet(reader.Location), - Type: pkg.BinaryPkg, - CPEs: cpes, - MetadataType: pkg.BinaryMetadataType, - Metadata: pkg.BinaryMetadata{ - Classifier: c.Package, - RealPath: reader.RealPath, - VirtualPath: reader.VirtualPath, - }, - } - break - } - return classifiedPackage, nil, nil -} - -func getContents(reader source.LocationReadCloser) ([]byte, error) { - unionReader, err := unionreader.GetUnionReader(reader.ReadCloser) - if err != nil { - return nil, fmt.Errorf("unable to get union reader for file: %w", err) - } - - contents, err := io.ReadAll(unionReader) - if err != nil { - return nil, fmt.Errorf("unable to get contents for file: %w", err) - } - - return contents, nil -} diff --git a/syft/pkg/cataloger/javascript/cataloger.go b/syft/pkg/cataloger/javascript/cataloger.go index 3e5a93685..6688abcdc 100644 --- a/syft/pkg/cataloger/javascript/cataloger.go +++ b/syft/pkg/cataloger/javascript/cataloger.go @@ -4,7 +4,6 @@ Package javascript provides a concrete Cataloger implementation for JavaScript e package javascript import ( - "github.com/anchore/syft/internal" "github.com/anchore/syft/syft/pkg/cataloger/generic" ) @@ -20,8 +19,3 @@ func NewJavascriptLockCataloger() *generic.Cataloger { WithParserByGlobs(parseYarnLock, "**/yarn.lock"). WithParserByGlobs(parsePnpmLock, "**/pnpm-lock.yaml") } - -func NewNodeBinaryCataloger() *generic.Cataloger { - return generic.NewCataloger("node-binary-cataloger"). - WithParserByMimeTypes(parseNodeBinary, internal.ExecutableMIMETypeSet.List()...) -} diff --git a/syft/pkg/cataloger/javascript/parse_node_binary.go b/syft/pkg/cataloger/javascript/parse_node_binary.go deleted file mode 100644 index 51ec307c0..000000000 --- a/syft/pkg/cataloger/javascript/parse_node_binary.go +++ /dev/null @@ -1,43 +0,0 @@ -package javascript - -import ( - "regexp" - - "github.com/anchore/syft/internal/log" - "github.com/anchore/syft/syft/artifact" - "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/pkg/cataloger/generic" - "github.com/anchore/syft/syft/source" -) - -var nodeClassifier = generic.Classifier{ - Package: "node.js", // Note: this purposely matches the "node.js" string to aid nvd vuln matching - FilepathPatterns: []*regexp.Regexp{ - // note: should we just parse all files resolved with executable mimetypes - // regexp that matches node binary - regexp.MustCompile(`(.*/|^)node$`), - }, - EvidencePatterns: []*regexp.Regexp{ - // regex that matches node.js/vx.y.z - regexp.MustCompile(`(?m)node\.js\/v(?P[0-9]+\.[0-9]+\.[0-9]+)`), - }, - CPEs: []pkg.CPE{ - pkg.MustCPE("cpe:2.3:a:nodejs:node.js:*:*:*:*:*:*:*:*"), - }, -} - -func parseNodeBinary(_ source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { - p, _, err := nodeClassifier.Examine(reader) - if err != nil { - log.Trace("failed to find node.js package: %+v", err) - return nil, nil, nil // we can silently fail here to reduce warning noise - } - - // TODO add node specific metadata to the packages to help with vulnerability matching - if p != nil { - p.Language = pkg.JavaScript - p.SetID() - return []pkg.Package{*p}, nil, nil - } - return nil, nil, nil -} diff --git a/syft/pkg/file_metadata.go b/syft/pkg/file_metadata.go new file mode 100644 index 000000000..d3d379ded --- /dev/null +++ b/syft/pkg/file_metadata.go @@ -0,0 +1,7 @@ +package pkg + +type FileMetadata struct { + Classifier string `mapstructure:"Classifier" json:"classifier"` + RealPath string `mapstructure:"RealPath" json:"realPath"` + VirtualPath string `mapstructure:"VirtualPath" json:"virtualPath"` +} diff --git a/syft/pkg/language.go b/syft/pkg/language.go index 9e8054d60..1adc0d3a6 100644 --- a/syft/pkg/language.go +++ b/syft/pkg/language.go @@ -25,6 +25,7 @@ const ( CPP Language = "c++" Haskell Language = "haskell" Binary Language = "binary" + File Language = "file" ) // AllLanguages is a set of all programming languages detected by syft. diff --git a/syft/sbom/sbom.go b/syft/sbom/sbom.go index 0f77567fc..f867c49f4 100644 --- a/syft/sbom/sbom.go +++ b/syft/sbom/sbom.go @@ -18,13 +18,12 @@ type SBOM struct { } type Artifacts struct { - PackageCatalog *pkg.Catalog - FileMetadata map[source.Coordinates]source.FileMetadata - FileDigests map[source.Coordinates][]file.Digest - FileClassifications map[source.Coordinates][]file.Classification - FileContents map[source.Coordinates]string - Secrets map[source.Coordinates][]file.SearchResult - LinuxDistribution *linux.Release + PackageCatalog *pkg.Catalog + FileMetadata map[source.Coordinates]source.FileMetadata + FileDigests map[source.Coordinates][]file.Digest + FileContents map[source.Coordinates]string + Secrets map[source.Coordinates][]file.SearchResult + LinuxDistribution *linux.Release } type Descriptor struct { @@ -55,9 +54,6 @@ func (s SBOM) AllCoordinates() []source.Coordinates { for coordinates := range s.Artifacts.FileContents { set.Add(coordinates) } - for coordinates := range s.Artifacts.FileClassifications { - set.Add(coordinates) - } for coordinates := range s.Artifacts.FileDigests { set.Add(coordinates) } diff --git a/test/cli/packages_cmd_test.go b/test/cli/packages_cmd_test.go index a3428ac33..fe57819e7 100644 --- a/test/cli/packages_cmd_test.go +++ b/test/cli/packages_cmd_test.go @@ -105,7 +105,7 @@ func TestPackagesCmdFlags(t *testing.T) { name: "squashed-scope-flag-hidden-packages", args: []string{"packages", "-o", "json", "-s", "squashed", hiddenPackagesImage}, assertions: []traitAssertion{ - assertPackageCount(162), + assertPackageCount(163), assertNotInOutput("vsftpd"), // hidden package assertSuccessfulReturnCode, }, @@ -114,7 +114,7 @@ func TestPackagesCmdFlags(t *testing.T) { name: "all-layers-scope-flag", args: []string{"packages", "-o", "json", "-s", "all-layers", hiddenPackagesImage}, assertions: []traitAssertion{ - assertPackageCount(163), // packages are now deduplicated for this case + assertPackageCount(164), // packages are now deduplicated for this case assertInOutput("all-layers"), assertInOutput("vsftpd"), // hidden package assertSuccessfulReturnCode, @@ -127,7 +127,7 @@ func TestPackagesCmdFlags(t *testing.T) { "SYFT_PACKAGE_CATALOGER_SCOPE": "all-layers", }, assertions: []traitAssertion{ - assertPackageCount(163), // packages are now deduplicated for this case + assertPackageCount(164), // packages are now deduplicated for this case assertInOutput("all-layers"), assertInOutput("vsftpd"), // hidden package assertSuccessfulReturnCode, diff --git a/test/integration/package_deduplication_test.go b/test/integration/package_deduplication_test.go index f00e854a6..90ed50b20 100644 --- a/test/integration/package_deduplication_test.go +++ b/test/integration/package_deduplication_test.go @@ -9,6 +9,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/source" ) @@ -21,7 +22,7 @@ func TestPackageDeduplication(t *testing.T) { }{ { scope: source.AllLayersScope, - packageCount: 172, // without deduplication this would be 618 + packageCount: 173, // without deduplication this would be 618 instanceCount: map[string]int{ "basesystem": 1, "wget": 1, @@ -40,7 +41,7 @@ func TestPackageDeduplication(t *testing.T) { }, { scope: source.SquashedScope, - packageCount: 170, + packageCount: 171, instanceCount: map[string]int{ "basesystem": 1, "wget": 1, @@ -62,6 +63,12 @@ func TestPackageDeduplication(t *testing.T) { t.Run(string(tt.scope), func(t *testing.T) { sbom, _ := catalogFixtureImage(t, "image-vertical-package-dups", tt.scope, nil) + for _, p := range sbom.Artifacts.PackageCatalog.Sorted() { + if p.Type == pkg.BinaryPkg { + assert.NotEmpty(t, p.Name) + } + } + assert.Equal(t, tt.packageCount, sbom.Artifacts.PackageCatalog.PackageCount()) for name, expectedInstanceCount := range tt.instanceCount { pkgs := sbom.Artifacts.PackageCatalog.PackagesByName(name)