From 46bfb6811338b7566550bfe0cc7c9c0bc3ae65c1 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Wed, 7 Apr 2021 15:48:11 -0400 Subject: [PATCH] add file classifier + tests Signed-off-by: Alex Goodman --- syft/file/classfication_cataloger_test.go | 164 ++++++++++++++++++ syft/file/classification_cataloger.go | 38 ++++ syft/file/classifier.go | 141 +++++++++++++++ syft/file/classifier_test.go | 90 ++++++++++ .../classifiers/negative/busybox | 1 + .../test-fixtures/classifiers/negative/go | 1 + .../classifiers/negative/python2.6 | 3 + .../classifiers/positive/VERSION | 1 + .../classifiers/positive/busybox | 3 + .../test-fixtures/classifiers/positive/go | 1 + .../classifiers/positive/patchlevel.h | 7 + .../classifiers/positive/python3.6 | 3 + 12 files changed, 453 insertions(+) create mode 100644 syft/file/classfication_cataloger_test.go create mode 100644 syft/file/classification_cataloger.go create mode 100644 syft/file/classifier.go create mode 100644 syft/file/classifier_test.go create mode 100644 syft/file/test-fixtures/classifiers/negative/busybox create mode 100644 syft/file/test-fixtures/classifiers/negative/go create mode 100644 syft/file/test-fixtures/classifiers/negative/python2.6 create mode 100644 syft/file/test-fixtures/classifiers/positive/VERSION create mode 100644 syft/file/test-fixtures/classifiers/positive/busybox create mode 100644 syft/file/test-fixtures/classifiers/positive/go create mode 100644 syft/file/test-fixtures/classifiers/positive/patchlevel.h create mode 100644 syft/file/test-fixtures/classifiers/positive/python3.6 diff --git a/syft/file/classfication_cataloger_test.go b/syft/file/classfication_cataloger_test.go new file mode 100644 index 000000000..59c301c24 --- /dev/null +++ b/syft/file/classfication_cataloger_test.go @@ -0,0 +1,164 @@ +package file + +import ( + "testing" + + "github.com/anchore/syft/syft/source" + "github.com/stretchr/testify/assert" +) + +func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) { + tests := []struct { + name string + fixtureDir string + location string + expected []Classification + constructorErr bool + catalogErr bool + }{ + { + name: "positive-libpython3.7.so", + fixtureDir: "test-fixtures/classifiers/positive", + location: "test-fixtures/classifiers/positive/libpython3.7.so", + expected: []Classification{ + { + Class: "python-binary", + Metadata: map[string]string{ + "version": "3.7.4a-vZ9", + }, + }, + }, + }, + { + name: "positive-python3.6", + fixtureDir: "test-fixtures/classifiers/positive", + location: "test-fixtures/classifiers/positive/python3.6", + expected: []Classification{ + { + Class: "python-binary", + Metadata: map[string]string{ + "version": "3.6.3a-vZ9", + }, + }, + }, + }, + { + name: "positive-patchlevel.h", + fixtureDir: "test-fixtures/classifiers/positive", + location: "test-fixtures/classifiers/positive/patchlevel.h", + expected: []Classification{ + { + Class: "cpython-source", + Metadata: map[string]string{ + "version": "3.9-aZ5", + }, + }, + }, + }, + { + name: "positive-go", + fixtureDir: "test-fixtures/classifiers/positive", + location: "test-fixtures/classifiers/positive/go", + expected: []Classification{ + { + Class: "go-binary", + Metadata: map[string]string{ + "version": "1.14", + }, + }, + }, + }, + { + name: "positive-go-hint", + fixtureDir: "test-fixtures/classifiers/positive", + location: "test-fixtures/classifiers/positive/VERSION", + expected: []Classification{ + { + Class: "go-binary-hint", + Metadata: map[string]string{ + "version": "1.15", + }, + }, + }, + }, + { + name: "positive-busybox", + fixtureDir: "test-fixtures/classifiers/positive", + location: "test-fixtures/classifiers/positive/busybox", + expected: []Classification{ + { + Class: "busybox-binary", + Metadata: map[string]string{ + "version": "3.33.3", + }, + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + + c, err := NewClassificationCataloger(DefaultClassifiers) + if err != nil && !test.constructorErr { + t.Fatalf("could not create cataloger (but should have been able to): %+v", err) + } else if err == nil && test.constructorErr { + t.Fatalf("expected constructor error but did not get one") + } else if test.constructorErr && err != nil { + return + } + + src, err := source.NewFromDirectory(test.fixtureDir) + if err != nil { + t.Fatalf("could not create source: %+v", err) + } + + resolver, err := src.FileResolver(source.SquashedScope) + if err != nil { + t.Fatalf("could not create resolver: %+v", err) + } + + actualResults, err := c.Catalog(resolver) + if err != nil && !test.catalogErr { + t.Fatalf("could not catalog (but should have been able to): %+v", err) + } else if err == nil && test.catalogErr { + t.Fatalf("expected catalog error but did not get one") + } else if test.catalogErr && err != nil { + return + } + + loc := source.NewLocation(test.location) + + if _, ok := actualResults[loc]; !ok { + t.Fatalf("could not find test location=%q", test.location) + } + + assert.Equal(t, test.expected, actualResults[loc]) + }) + } +} + +func TestClassifierCataloger_DefaultClassifiers_NegativeCases(t *testing.T) { + + c, err := NewClassificationCataloger(DefaultClassifiers) + if err != nil { + t.Fatalf("could not create cataloger: %+v", err) + } + + src, err := source.NewFromDirectory("test-fixtures/classifiers/negative") + if err != nil { + t.Fatalf("could not create source: %+v", err) + } + + resolver, err := src.FileResolver(source.SquashedScope) + if err != nil { + t.Fatalf("could not create resolver: %+v", err) + } + + actualResults, err := c.Catalog(resolver) + if err != nil { + t.Fatalf("could not catalog: %+v", err) + } + assert.Equal(t, 0, len(actualResults)) + +} diff --git a/syft/file/classification_cataloger.go b/syft/file/classification_cataloger.go new file mode 100644 index 000000000..f5cee7342 --- /dev/null +++ b/syft/file/classification_cataloger.go @@ -0,0 +1,38 @@ +package file + +import ( + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/source" +) + +type ClassificationCataloger struct { + classifiers []Classifier +} + +func NewClassificationCataloger(classifiers []Classifier) (*ClassificationCataloger, error) { + return &ClassificationCataloger{ + classifiers: classifiers, + }, nil +} + +func (i *ClassificationCataloger) Catalog(resolver source.FileResolver) (map[source.Location][]Classification, error) { + results := make(map[source.Location][]Classification) + + numResults := 0 + for location := range resolver.AllLocations() { + for _, classifier := range i.classifiers { + result, err := classifier.Classify(resolver, location) + if err != nil { + return nil, err + } + if result != nil { + results[location] = append(results[location], *result) + numResults++ + } + + } + } + log.Debugf("classification cataloger discovered %d results", numResults) + + return results, nil +} diff --git a/syft/file/classifier.go b/syft/file/classifier.go new file mode 100644 index 000000000..b3aa144cd --- /dev/null +++ b/syft/file/classifier.go @@ -0,0 +1,141 @@ +package file + +import ( + "bytes" + "fmt" + "io/ioutil" + "regexp" + "text/template" + + "github.com/anchore/syft/internal" + "github.com/anchore/syft/syft/source" +) + +var DefaultClassifiers = []Classifier{ + { + Class: "python-binary", + FilepathPatterns: []*regexp.Regexp{ + regexp.MustCompile(`(.*/|^)python(?P[0-9]+\.[0-9]+)$`), + regexp.MustCompile(`(.*/|^)libpython(?P[0-9]+\.[0-9]+).so.*$`), + }, + EvidencePatternTemplates: []string{ + `(?m)(?P{{ .version }}\.[0-9]+[-_a-zA-Z0-9]*)`, + }, + }, + { + Class: "cpython-source", + FilepathPatterns: []*regexp.Regexp{ + regexp.MustCompile(`(.*/|^)patchlevel.h$`), + }, + EvidencePatternTemplates: []string{ + `(?m)#define\s+PY_VERSION\s+"?(?P[0-9\.\-_a-zA-Z]+)"?`, + }, + }, + { + Class: "go-binary", + FilepathPatterns: []*regexp.Regexp{ + regexp.MustCompile(`(.*/|^)go$`), + }, + EvidencePatternTemplates: []string{ + `(?m)go(?P[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`, + }, + }, + { + Class: "go-binary-hint", + FilepathPatterns: []*regexp.Regexp{ + regexp.MustCompile(`(.*/|^)VERSION$`), + }, + EvidencePatternTemplates: []string{ + `(?m)go(?P[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`, + }, + }, + { + Class: "busybox-binary", + FilepathPatterns: []*regexp.Regexp{ + regexp.MustCompile(`(.*/|^)busybox$`), + }, + EvidencePatternTemplates: []string{ + `(?m)BusyBox\s+v(?P[0-9]+\.[0-9]+\.[0-9]+)`, + }, + }, +} + +type Classifier struct { + Class string + FilepathPatterns []*regexp.Regexp + EvidencePatternTemplates []string +} + +type Classification struct { + Class string `json:"class"` + Metadata map[string]string `json:"metadata"` +} + +func (c Classifier) Classify(resolver source.FileResolver, location source.Location) (*Classification, error) { + doesFilepathMatch, filepathNamedGroupValues := filepathMatches(c.FilepathPatterns, location) + if !doesFilepathMatch { + return nil, nil + } + + contentReader, err := resolver.FileContentsByLocation(location) + if err != nil { + return nil, err + } + defer contentReader.Close() + + // TODO: there is room for improvement here, as this may use an excessive amount of memory. Alternate approach is to leverage a RuneReader. + contents, err := ioutil.ReadAll(contentReader) + if err != nil { + return nil, err + } + + var result *Classification + for _, patternTemplate := range c.EvidencePatternTemplates { + tmpl, err := template.New("").Parse(patternTemplate) + if err != nil { + return nil, fmt.Errorf("unable to parse classifier template=%q : %w", patternTemplate, err) + } + + patternBuf := &bytes.Buffer{} + err = tmpl.Execute(patternBuf, filepathNamedGroupValues) + if err != nil { + return nil, fmt.Errorf("unable to render template: %w", err) + } + + pattern, err := regexp.Compile(patternBuf.String()) + if err != nil { + return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err) + } + + if !pattern.Match(contents) { + continue + } + + matchMetadata := internal.MatchNamedCaptureGroups(pattern, string(contents)) + if result == nil { + result = &Classification{ + Class: c.Class, + Metadata: matchMetadata, + } + } else { + for key, value := range matchMetadata { + result.Metadata[key] = value + } + } + } + return result, nil +} + +func filepathMatches(patterns []*regexp.Regexp, location source.Location) (bool, map[string]string) { + for _, path := range []string{location.RealPath, location.VirtualPath} { + if path == "" { + continue + } + for _, pattern := range patterns { + if pattern.MatchString(path) { + return true, internal.MatchNamedCaptureGroups(pattern, path) + } + } + } + return false, nil +} diff --git a/syft/file/classifier_test.go b/syft/file/classifier_test.go new file mode 100644 index 000000000..1d1354a65 --- /dev/null +++ b/syft/file/classifier_test.go @@ -0,0 +1,90 @@ +package file + +import ( + "regexp" + "testing" + + "github.com/anchore/syft/syft/source" + "github.com/stretchr/testify/assert" +) + +func TestFilepathMatches(t *testing.T) { + tests := []struct { + name string + location source.Location + patterns []string + expectedMatches bool + expectedNamedGroups map[string]string + }{ + { + name: "simple-filename-match", + location: source.Location{ + RealPath: "python2.7", + }, + patterns: []string{ + `python([0-9]+\.[0-9]+)$`, + }, + expectedMatches: true, + }, + { + name: "filepath-match", + location: source.Location{ + RealPath: "/usr/bin/python2.7", + }, + patterns: []string{ + `python([0-9]+\.[0-9]+)$`, + }, + expectedMatches: true, + }, + { + name: "virtual-filepath-match", + location: source.Location{ + VirtualPath: "/usr/bin/python2.7", + }, + patterns: []string{ + `python([0-9]+\.[0-9]+)$`, + }, + expectedMatches: true, + }, + { + name: "full-filepath-match", + location: source.Location{ + VirtualPath: "/usr/bin/python2.7", + }, + patterns: []string{ + `.*/bin/python([0-9]+\.[0-9]+)$`, + }, + expectedMatches: true, + }, + { + name: "anchored-filename-match-FAILS", + location: source.Location{ + RealPath: "/usr/bin/python2.7", + }, + patterns: []string{ + `^python([0-9]+\.[0-9]+)$`, + }, + expectedMatches: false, + }, + { + name: "empty-filename-match-FAILS", + location: source.Location{}, + patterns: []string{ + `^python([0-9]+\.[0-9]+)$`, + }, + expectedMatches: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + var patterns []*regexp.Regexp + for _, p := range test.patterns { + patterns = append(patterns, regexp.MustCompile(p)) + } + actualMatches, actualNamedGroups := filepathMatches(patterns, test.location) + assert.Equal(t, test.expectedMatches, actualMatches) + assert.Equal(t, test.expectedNamedGroups, actualNamedGroups) + }) + } +} diff --git a/syft/file/test-fixtures/classifiers/negative/busybox b/syft/file/test-fixtures/classifiers/negative/busybox new file mode 100644 index 000000000..ac2bb305c --- /dev/null +++ b/syft/file/test-fixtures/classifiers/negative/busybox @@ -0,0 +1 @@ +another bad binary \ No newline at end of file diff --git a/syft/file/test-fixtures/classifiers/negative/go b/syft/file/test-fixtures/classifiers/negative/go new file mode 100644 index 000000000..adbbb757b --- /dev/null +++ b/syft/file/test-fixtures/classifiers/negative/go @@ -0,0 +1 @@ +a bad go binary \ No newline at end of file diff --git a/syft/file/test-fixtures/classifiers/negative/python2.6 b/syft/file/test-fixtures/classifiers/negative/python2.6 new file mode 100644 index 000000000..94639f34e --- /dev/null +++ b/syft/file/test-fixtures/classifiers/negative/python2.6 @@ -0,0 +1,3 @@ +# note: this should NOT match + +just some noise \ No newline at end of file diff --git a/syft/file/test-fixtures/classifiers/positive/VERSION b/syft/file/test-fixtures/classifiers/positive/VERSION new file mode 100644 index 000000000..5bedbed9f --- /dev/null +++ b/syft/file/test-fixtures/classifiers/positive/VERSION @@ -0,0 +1 @@ +go1.15-beta2 \ No newline at end of file diff --git a/syft/file/test-fixtures/classifiers/positive/busybox b/syft/file/test-fixtures/classifiers/positive/busybox new file mode 100644 index 000000000..7829d71b9 --- /dev/null +++ b/syft/file/test-fixtures/classifiers/positive/busybox @@ -0,0 +1,3 @@ +# note: this SHOULD match as busybox 3.33.3 + +noise!BusyBox v3.33.3!noise \ No newline at end of file diff --git a/syft/file/test-fixtures/classifiers/positive/go b/syft/file/test-fixtures/classifiers/positive/go new file mode 100644 index 000000000..56c6f3c7a --- /dev/null +++ b/syft/file/test-fixtures/classifiers/positive/go @@ -0,0 +1 @@ +go1.14 \ No newline at end of file diff --git a/syft/file/test-fixtures/classifiers/positive/patchlevel.h b/syft/file/test-fixtures/classifiers/positive/patchlevel.h new file mode 100644 index 000000000..c4245addd --- /dev/null +++ b/syft/file/test-fixtures/classifiers/positive/patchlevel.h @@ -0,0 +1,7 @@ +# note: this SHOULD match as python 3.9 + +some source code... + +#define PY_VERSION 3.9-aZ5 + +more source! \ No newline at end of file diff --git a/syft/file/test-fixtures/classifiers/positive/python3.6 b/syft/file/test-fixtures/classifiers/positive/python3.6 new file mode 100644 index 000000000..fecd79a46 --- /dev/null +++ b/syft/file/test-fixtures/classifiers/positive/python3.6 @@ -0,0 +1,3 @@ +# note: this SHOULD match as python 3.6 + +noise3.6.3a-vZ9!morenoise \ No newline at end of file