add file classifier + tests

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2021-04-07 15:48:11 -04:00
parent 5743e32e02
commit 46bfb68113
No known key found for this signature in database
GPG Key ID: 5CB45AE22BAB7EA7
12 changed files with 453 additions and 0 deletions

View File

@ -0,0 +1,164 @@
package file
import (
"testing"
"github.com/anchore/syft/syft/source"
"github.com/stretchr/testify/assert"
)
func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) {
tests := []struct {
name string
fixtureDir string
location string
expected []Classification
constructorErr bool
catalogErr bool
}{
{
name: "positive-libpython3.7.so",
fixtureDir: "test-fixtures/classifiers/positive",
location: "test-fixtures/classifiers/positive/libpython3.7.so",
expected: []Classification{
{
Class: "python-binary",
Metadata: map[string]string{
"version": "3.7.4a-vZ9",
},
},
},
},
{
name: "positive-python3.6",
fixtureDir: "test-fixtures/classifiers/positive",
location: "test-fixtures/classifiers/positive/python3.6",
expected: []Classification{
{
Class: "python-binary",
Metadata: map[string]string{
"version": "3.6.3a-vZ9",
},
},
},
},
{
name: "positive-patchlevel.h",
fixtureDir: "test-fixtures/classifiers/positive",
location: "test-fixtures/classifiers/positive/patchlevel.h",
expected: []Classification{
{
Class: "cpython-source",
Metadata: map[string]string{
"version": "3.9-aZ5",
},
},
},
},
{
name: "positive-go",
fixtureDir: "test-fixtures/classifiers/positive",
location: "test-fixtures/classifiers/positive/go",
expected: []Classification{
{
Class: "go-binary",
Metadata: map[string]string{
"version": "1.14",
},
},
},
},
{
name: "positive-go-hint",
fixtureDir: "test-fixtures/classifiers/positive",
location: "test-fixtures/classifiers/positive/VERSION",
expected: []Classification{
{
Class: "go-binary-hint",
Metadata: map[string]string{
"version": "1.15",
},
},
},
},
{
name: "positive-busybox",
fixtureDir: "test-fixtures/classifiers/positive",
location: "test-fixtures/classifiers/positive/busybox",
expected: []Classification{
{
Class: "busybox-binary",
Metadata: map[string]string{
"version": "3.33.3",
},
},
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
c, err := NewClassificationCataloger(DefaultClassifiers)
if err != nil && !test.constructorErr {
t.Fatalf("could not create cataloger (but should have been able to): %+v", err)
} else if err == nil && test.constructorErr {
t.Fatalf("expected constructor error but did not get one")
} else if test.constructorErr && err != nil {
return
}
src, err := source.NewFromDirectory(test.fixtureDir)
if err != nil {
t.Fatalf("could not create source: %+v", err)
}
resolver, err := src.FileResolver(source.SquashedScope)
if err != nil {
t.Fatalf("could not create resolver: %+v", err)
}
actualResults, err := c.Catalog(resolver)
if err != nil && !test.catalogErr {
t.Fatalf("could not catalog (but should have been able to): %+v", err)
} else if err == nil && test.catalogErr {
t.Fatalf("expected catalog error but did not get one")
} else if test.catalogErr && err != nil {
return
}
loc := source.NewLocation(test.location)
if _, ok := actualResults[loc]; !ok {
t.Fatalf("could not find test location=%q", test.location)
}
assert.Equal(t, test.expected, actualResults[loc])
})
}
}
func TestClassifierCataloger_DefaultClassifiers_NegativeCases(t *testing.T) {
c, err := NewClassificationCataloger(DefaultClassifiers)
if err != nil {
t.Fatalf("could not create cataloger: %+v", err)
}
src, err := source.NewFromDirectory("test-fixtures/classifiers/negative")
if err != nil {
t.Fatalf("could not create source: %+v", err)
}
resolver, err := src.FileResolver(source.SquashedScope)
if err != nil {
t.Fatalf("could not create resolver: %+v", err)
}
actualResults, err := c.Catalog(resolver)
if err != nil {
t.Fatalf("could not catalog: %+v", err)
}
assert.Equal(t, 0, len(actualResults))
}

View File

@ -0,0 +1,38 @@
package file
import (
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/source"
)
type ClassificationCataloger struct {
classifiers []Classifier
}
func NewClassificationCataloger(classifiers []Classifier) (*ClassificationCataloger, error) {
return &ClassificationCataloger{
classifiers: classifiers,
}, nil
}
func (i *ClassificationCataloger) Catalog(resolver source.FileResolver) (map[source.Location][]Classification, error) {
results := make(map[source.Location][]Classification)
numResults := 0
for location := range resolver.AllLocations() {
for _, classifier := range i.classifiers {
result, err := classifier.Classify(resolver, location)
if err != nil {
return nil, err
}
if result != nil {
results[location] = append(results[location], *result)
numResults++
}
}
}
log.Debugf("classification cataloger discovered %d results", numResults)
return results, nil
}

141
syft/file/classifier.go Normal file
View File

@ -0,0 +1,141 @@
package file
import (
"bytes"
"fmt"
"io/ioutil"
"regexp"
"text/template"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/source"
)
var DefaultClassifiers = []Classifier{
{
Class: "python-binary",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(`(.*/|^)python(?P<version>[0-9]+\.[0-9]+)$`),
regexp.MustCompile(`(.*/|^)libpython(?P<version>[0-9]+\.[0-9]+).so.*$`),
},
EvidencePatternTemplates: []string{
`(?m)(?P<version>{{ .version }}\.[0-9]+[-_a-zA-Z0-9]*)`,
},
},
{
Class: "cpython-source",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(`(.*/|^)patchlevel.h$`),
},
EvidencePatternTemplates: []string{
`(?m)#define\s+PY_VERSION\s+"?(?P<version>[0-9\.\-_a-zA-Z]+)"?`,
},
},
{
Class: "go-binary",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(`(.*/|^)go$`),
},
EvidencePatternTemplates: []string{
`(?m)go(?P<version>[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`,
},
},
{
Class: "go-binary-hint",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(`(.*/|^)VERSION$`),
},
EvidencePatternTemplates: []string{
`(?m)go(?P<version>[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`,
},
},
{
Class: "busybox-binary",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(`(.*/|^)busybox$`),
},
EvidencePatternTemplates: []string{
`(?m)BusyBox\s+v(?P<version>[0-9]+\.[0-9]+\.[0-9]+)`,
},
},
}
type Classifier struct {
Class string
FilepathPatterns []*regexp.Regexp
EvidencePatternTemplates []string
}
type Classification struct {
Class string `json:"class"`
Metadata map[string]string `json:"metadata"`
}
func (c Classifier) Classify(resolver source.FileResolver, location source.Location) (*Classification, error) {
doesFilepathMatch, filepathNamedGroupValues := filepathMatches(c.FilepathPatterns, location)
if !doesFilepathMatch {
return nil, nil
}
contentReader, err := resolver.FileContentsByLocation(location)
if err != nil {
return nil, err
}
defer contentReader.Close()
// TODO: there is room for improvement here, as this may use an excessive amount of memory. Alternate approach is to leverage a RuneReader.
contents, err := ioutil.ReadAll(contentReader)
if err != nil {
return nil, err
}
var result *Classification
for _, patternTemplate := range c.EvidencePatternTemplates {
tmpl, err := template.New("").Parse(patternTemplate)
if err != nil {
return nil, fmt.Errorf("unable to parse classifier template=%q : %w", patternTemplate, err)
}
patternBuf := &bytes.Buffer{}
err = tmpl.Execute(patternBuf, filepathNamedGroupValues)
if err != nil {
return nil, fmt.Errorf("unable to render template: %w", err)
}
pattern, err := regexp.Compile(patternBuf.String())
if err != nil {
return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err)
}
if !pattern.Match(contents) {
continue
}
matchMetadata := internal.MatchNamedCaptureGroups(pattern, string(contents))
if result == nil {
result = &Classification{
Class: c.Class,
Metadata: matchMetadata,
}
} else {
for key, value := range matchMetadata {
result.Metadata[key] = value
}
}
}
return result, nil
}
func filepathMatches(patterns []*regexp.Regexp, location source.Location) (bool, map[string]string) {
for _, path := range []string{location.RealPath, location.VirtualPath} {
if path == "" {
continue
}
for _, pattern := range patterns {
if pattern.MatchString(path) {
return true, internal.MatchNamedCaptureGroups(pattern, path)
}
}
}
return false, nil
}

View File

@ -0,0 +1,90 @@
package file
import (
"regexp"
"testing"
"github.com/anchore/syft/syft/source"
"github.com/stretchr/testify/assert"
)
func TestFilepathMatches(t *testing.T) {
tests := []struct {
name string
location source.Location
patterns []string
expectedMatches bool
expectedNamedGroups map[string]string
}{
{
name: "simple-filename-match",
location: source.Location{
RealPath: "python2.7",
},
patterns: []string{
`python([0-9]+\.[0-9]+)$`,
},
expectedMatches: true,
},
{
name: "filepath-match",
location: source.Location{
RealPath: "/usr/bin/python2.7",
},
patterns: []string{
`python([0-9]+\.[0-9]+)$`,
},
expectedMatches: true,
},
{
name: "virtual-filepath-match",
location: source.Location{
VirtualPath: "/usr/bin/python2.7",
},
patterns: []string{
`python([0-9]+\.[0-9]+)$`,
},
expectedMatches: true,
},
{
name: "full-filepath-match",
location: source.Location{
VirtualPath: "/usr/bin/python2.7",
},
patterns: []string{
`.*/bin/python([0-9]+\.[0-9]+)$`,
},
expectedMatches: true,
},
{
name: "anchored-filename-match-FAILS",
location: source.Location{
RealPath: "/usr/bin/python2.7",
},
patterns: []string{
`^python([0-9]+\.[0-9]+)$`,
},
expectedMatches: false,
},
{
name: "empty-filename-match-FAILS",
location: source.Location{},
patterns: []string{
`^python([0-9]+\.[0-9]+)$`,
},
expectedMatches: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
var patterns []*regexp.Regexp
for _, p := range test.patterns {
patterns = append(patterns, regexp.MustCompile(p))
}
actualMatches, actualNamedGroups := filepathMatches(patterns, test.location)
assert.Equal(t, test.expectedMatches, actualMatches)
assert.Equal(t, test.expectedNamedGroups, actualNamedGroups)
})
}
}

View File

@ -0,0 +1 @@
another bad binary

View File

@ -0,0 +1 @@
a bad go binary

View File

@ -0,0 +1,3 @@
# note: this should NOT match
just some noise

View File

@ -0,0 +1 @@
go1.15-beta2

View File

@ -0,0 +1,3 @@
# note: this SHOULD match as busybox 3.33.3
noise!BusyBox v3.33.3!noise

View File

@ -0,0 +1 @@
go1.14

View File

@ -0,0 +1,7 @@
# note: this SHOULD match as python 3.9
some source code...
#define PY_VERSION 3.9-aZ5
more source!

View File

@ -0,0 +1,3 @@
# note: this SHOULD match as python 3.6
noise3.6.3a-vZ9!morenoise