Merge pull request #374 from anchore/add-binary-classifier

Add file classification cataloger
This commit is contained in:
Alex Goodman 2021-04-12 17:25:16 -04:00 committed by GitHub
commit 9ad786d608
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
35 changed files with 1637 additions and 59 deletions

View File

@ -108,6 +108,17 @@ package:
# same as -s ; SYFT_PACKAGE_CATALOGER_SCOPE env var
scope: "squashed"
# cataloging file classifications is exposed through the power-user subcommand
file-classification:
cataloger:
# enable/disable cataloging of file classifications
# SYFT_FILE_CLASSIFICATION_CATALOGER_ENABLED env var
enabled: true
# the search space to look for file classifications (options: all-layers, squashed)
# SYFT_FILE_CLASSIFICATION_CATALOGER_SCOPE env var
scope: "squashed"
# cataloging file metadata is exposed through the power-user subcommand
file-metadata:
cataloger:

View File

@ -20,6 +20,7 @@ func powerUserTasks() ([]powerUserTask, error) {
catalogFileMetadataTask,
catalogFileDigestsTask,
catalogSecretsTask,
catalogFileClassificationsTask,
}
for _, generator := range generators {
@ -156,3 +157,31 @@ func catalogSecretsTask() (powerUserTask, error) {
return task, nil
}
func catalogFileClassificationsTask() (powerUserTask, error) {
if !appConfig.FileClassification.Cataloger.Enabled {
return nil, nil
}
// TODO: in the future we could expose out the classifiers via configuration
classifierCataloger, err := file.NewClassificationCataloger(file.DefaultClassifiers)
if err != nil {
return nil, err
}
task := func(results *poweruser.JSONDocumentConfig, src source.Source) error {
resolver, err := src.FileResolver(appConfig.FileClassification.Cataloger.ScopeOpt)
if err != nil {
return err
}
result, err := classifierCataloger.Catalog(resolver)
if err != nil {
return err
}
results.FileClassifications = result
return nil
}
return task, nil
}

View File

@ -27,17 +27,18 @@ type parser interface {
// Application is the main syft application configuration.
type Application struct {
ConfigPath string `yaml:",omitempty" json:"configPath"` // the location where the application config was read from (either from -c or discovered while loading)
Output string `yaml:"output" json:"output" mapstructure:"output"` // -o, the Presenter hint string to use for report formatting
Quiet bool `yaml:"quiet" json:"quiet" mapstructure:"quiet"` // -q, indicates to not show any status output to stderr (ETUI or logging UI)
Log logging `yaml:"log" json:"log" mapstructure:"log"` // all logging-related options
CliOptions CliOnlyOptions `yaml:"-" json:"-"` // all options only available through the CLI (not via env vars or config)
Dev Development `yaml:"dev" json:"dev" mapstructure:"dev"`
CheckForAppUpdate bool `yaml:"check-for-app-update" json:"check-for-app-update" mapstructure:"check-for-app-update"` // whether to check for an application update on start up or not
Anchore anchore `yaml:"anchore" json:"anchore" mapstructure:"anchore"` // options for interacting with Anchore Engine/Enterprise
Package Packages `yaml:"package" json:"package" mapstructure:"package"`
FileMetadata FileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"`
Secrets Secrets `yaml:"secrets" json:"secrets" mapstructure:"secrets"`
ConfigPath string `yaml:",omitempty" json:"configPath"` // the location where the application config was read from (either from -c or discovered while loading)
Output string `yaml:"output" json:"output" mapstructure:"output"` // -o, the Presenter hint string to use for report formatting
Quiet bool `yaml:"quiet" json:"quiet" mapstructure:"quiet"` // -q, indicates to not show any status output to stderr (ETUI or logging UI)
CheckForAppUpdate bool `yaml:"check-for-app-update" json:"check-for-app-update" mapstructure:"check-for-app-update"` // whether to check for an application update on start up or not
Anchore anchore `yaml:"anchore" json:"anchore" mapstructure:"anchore"` // options for interacting with Anchore Engine/Enterprise
CliOptions CliOnlyOptions `yaml:"-" json:"-"` // all options only available through the CLI (not via env vars or config)
Dev development `yaml:"dev" json:"dev" mapstructure:"dev"`
Log logging `yaml:"log" json:"log" mapstructure:"log"` // all logging-related options
Package packages `yaml:"package" json:"package" mapstructure:"package"`
FileMetadata FileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"`
FileClassification fileClassification `yaml:"file-classification" json:"file-classification" mapstructure:"file-classification"`
Secrets secrets `yaml:"secrets" json:"secrets" mapstructure:"secrets"`
}
func newApplicationConfig(v *viper.Viper, cliOpts CliOnlyOptions) *Application {

View File

@ -2,12 +2,12 @@ package config
import "github.com/spf13/viper"
type Development struct {
type development struct {
ProfileCPU bool `yaml:"profile-cpu" json:"profile-cpu" mapstructure:"profile-cpu"`
ProfileMem bool `yaml:"profile-mem" json:"profile-mem" mapstructure:"profile-mem"`
}
func (cfg Development) loadDefaultValues(v *viper.Viper) {
func (cfg development) loadDefaultValues(v *viper.Viper) {
v.SetDefault("dev.profile-cpu", false)
v.SetDefault("dev.profile-mem", false)
}

View File

@ -0,0 +1,19 @@
package config
import (
"github.com/anchore/syft/syft/source"
"github.com/spf13/viper"
)
type fileClassification struct {
Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"`
}
func (cfg fileClassification) loadDefaultValues(v *viper.Viper) {
v.SetDefault("file-classification.cataloger.enabled", true)
v.SetDefault("file-classification.cataloger.scope", source.SquashedScope)
}
func (cfg *fileClassification) parseConfigValues() error {
return cfg.Cataloger.parseConfigValues()
}

View File

@ -2,14 +2,14 @@ package config
import "github.com/spf13/viper"
type Packages struct {
type packages struct {
Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"`
}
func (cfg Packages) loadDefaultValues(v *viper.Viper) {
func (cfg packages) loadDefaultValues(v *viper.Viper) {
v.SetDefault("package.cataloger.enabled", true)
}
func (cfg *Packages) parseConfigValues() error {
func (cfg *packages) parseConfigValues() error {
return cfg.Cataloger.parseConfigValues()
}

View File

@ -6,7 +6,7 @@ import (
"github.com/spf13/viper"
)
type Secrets struct {
type secrets struct {
Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"`
AdditionalPatterns map[string]string `yaml:"additional-patterns" json:"additional-patterns" mapstructure:"additional-patterns"`
ExcludePatternNames []string `yaml:"exclude-pattern-names" json:"exclude-pattern-names" mapstructure:"exclude-pattern-names"`
@ -14,7 +14,7 @@ type Secrets struct {
SkipFilesAboveSize int64 `yaml:"skip-files-above-size" json:"skip-files-above-size" mapstructure:"skip-files-above-size"`
}
func (cfg Secrets) loadDefaultValues(v *viper.Viper) {
func (cfg secrets) loadDefaultValues(v *viper.Viper) {
v.SetDefault("secrets.cataloger.enabled", true)
v.SetDefault("secrets.cataloger.scope", source.AllLayersScope)
v.SetDefault("secrets.reveal-values", false)
@ -23,6 +23,6 @@ func (cfg Secrets) loadDefaultValues(v *viper.Viper) {
v.SetDefault("secrets.exclude-pattern-names", []string{})
}
func (cfg *Secrets) parseConfigValues() error {
func (cfg *secrets) parseConfigValues() error {
return cfg.Cataloger.parseConfigValues()
}

View File

@ -1,15 +0,0 @@
package internal
import "regexp"
// MatchCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map.
func MatchCaptureGroups(regEx *regexp.Regexp, str string) map[string]string {
match := regEx.FindStringSubmatch(str)
results := make(map[string]string)
for i, name := range regEx.SubexpNames() {
if i > 0 && i <= len(match) {
results[name] = match[i]
}
}
return results
}

View File

@ -9,8 +9,9 @@ type JSONDocument struct {
// here should be optional by supplying "omitempty" on these fields hint to the jsonschema generator to not
// require these fields. As an accepted rule in this repo all collections should still be initialized in the
// context of being used in a JSON document.
FileMetadata []JSONFileMetadata `json:"fileMetadata,omitempty"` // note: must have omitempty
Secrets []JSONSecrets `json:"secrets,omitempty"` // note: must have omitempty
FileClassifications []JSONFileClassifications `json:"fileClassifications,omitempty"` // note: must have omitempty
FileMetadata []JSONFileMetadata `json:"fileMetadata,omitempty"` // note: must have omitempty
Secrets []JSONSecrets `json:"secrets,omitempty"` // note: must have omitempty
packages.JSONDocument
}
@ -27,8 +28,9 @@ func NewJSONDocument(config JSONDocumentConfig) (JSONDocument, error) {
}
return JSONDocument{
FileMetadata: fileMetadata,
Secrets: NewJSONSecrets(config.Secrets),
JSONDocument: pkgsDoc,
FileClassifications: NewJSONFileClassifications(config.FileClassifications),
FileMetadata: fileMetadata,
Secrets: NewJSONSecrets(config.Secrets),
JSONDocument: pkgsDoc,
}, nil
}

View File

@ -9,11 +9,12 @@ import (
)
type JSONDocumentConfig struct {
ApplicationConfig config.Application
PackageCatalog *pkg.Catalog
FileMetadata map[source.Location]source.FileMetadata
FileDigests map[source.Location][]file.Digest
Secrets map[source.Location][]file.SearchResult
Distro *distro.Distro
SourceMetadata source.Metadata
ApplicationConfig config.Application
PackageCatalog *pkg.Catalog
FileMetadata map[source.Location]source.FileMetadata
FileDigests map[source.Location][]file.Digest
FileClassifications map[source.Location][]file.Classification
Secrets map[source.Location][]file.SearchResult
Distro *distro.Distro
SourceMetadata source.Metadata
}

View File

@ -0,0 +1,34 @@
package poweruser
import (
"sort"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/source"
)
type JSONFileClassifications struct {
Location source.Location `json:"location"`
Classification file.Classification `json:"classification"`
}
func NewJSONFileClassifications(data map[source.Location][]file.Classification) []JSONFileClassifications {
results := make([]JSONFileClassifications, 0)
for location, classifications := range data {
for _, classification := range classifications {
results = append(results, JSONFileClassifications{
Location: location,
Classification: classification,
})
}
}
// sort by real path then virtual path to ensure the result is stable across multiple runs
sort.SliceStable(results, func(i, j int) bool {
if results[i].Location.RealPath == results[j].Location.RealPath {
return results[i].Location.VirtualPath < results[j].Location.VirtualPath
}
return results[i].Location.RealPath < results[j].Location.RealPath
})
return results
}

View File

@ -161,15 +161,6 @@
"configPath": "",
"output": "",
"quiet": false,
"log": {
"structured": false,
"level": "",
"file-location": ""
},
"dev": {
"profile-cpu": false,
"profile-mem": false
},
"check-for-app-update": false,
"anchore": {
"host": "",
@ -177,6 +168,15 @@
"dockerfile": "",
"overwrite-existing-image": false
},
"dev": {
"profile-cpu": false,
"profile-mem": false
},
"log": {
"structured": false,
"level": "",
"file-location": ""
},
"package": {
"cataloger": {
"enabled": false,
@ -192,6 +192,12 @@
"sha256"
]
},
"file-classification": {
"cataloger": {
"enabled": false,
"scope": ""
}
},
"secrets": {
"cataloger": {
"enabled": false,

45
internal/regex_helpers.go Normal file
View File

@ -0,0 +1,45 @@
package internal
import "regexp"
// MatchNamedCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map.
// This is only for the first match in the regex. Callers shouldn't be providing regexes with multiple capture groups with the same name.
func MatchNamedCaptureGroups(regEx *regexp.Regexp, content string) map[string]string {
// note: we are looking across all matches and stopping on the first non-empty match. Why? Take the following example:
// input: "cool something to match against" pattern: `((?P<name>match) (?P<version>against))?`. Since the pattern is
// encapsulated in an optional capture group, there will be results for each character, but the results will match
// on nothing. The only "true" match will be at the end ("match against").
allMatches := regEx.FindAllStringSubmatch(content, -1)
var results map[string]string
for _, match := range allMatches {
// fill a candidate results map with named capture group results, accepting empty values, but not groups with
// no names
for nameIdx, name := range regEx.SubexpNames() {
if nameIdx > len(match) || len(name) == 0 {
continue
}
if results == nil {
results = make(map[string]string)
}
results[name] = match[nameIdx]
}
// note: since we are looking for the first best potential match we should stop when we find the first one
// with non-empty results.
if !isEmptyMap(results) {
break
}
}
return results
}
func isEmptyMap(m map[string]string) bool {
if len(m) == 0 {
return true
}
for _, value := range m {
if value != "" {
return false
}
}
return true
}

View File

@ -0,0 +1,70 @@
package internal
import (
"regexp"
"testing"
"github.com/stretchr/testify/assert"
)
func TestMatchCaptureGroups(t *testing.T) {
tests := []struct {
name string
input string
pattern string
expected map[string]string
}{
{
name: "go-case",
input: "match this thing",
pattern: `(?P<name>match).*(?P<version>thing)`,
expected: map[string]string{
"name": "match",
"version": "thing",
},
},
{
name: "only matches the first instance",
input: "match this thing batch another think",
pattern: `(?P<name>[mb]atch).*?(?P<version>thin[gk])`,
expected: map[string]string{
"name": "match",
"version": "thing",
},
},
{
name: "nested capture groups",
input: "cool something to match against",
pattern: `((?P<name>match) (?P<version>against))`,
expected: map[string]string{
"name": "match",
"version": "against",
},
},
{
name: "nested optional capture groups",
input: "cool something to match against",
pattern: `((?P<name>match) (?P<version>against))?`,
expected: map[string]string{
"name": "match",
"version": "against",
},
},
{
name: "nested optional capture groups with larger match",
input: "cool something to match against match never",
pattern: `.*?((?P<name>match) (?P<version>(against|never)))?`,
expected: map[string]string{
"name": "match",
"version": "against",
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
actual := MatchNamedCaptureGroups(regexp.MustCompile(test.pattern), test.input)
assert.Equal(t, test.expected, actual)
})
}
}

View File

@ -0,0 +1,935 @@
{
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/Document",
"definitions": {
"ApkFileRecord": {
"required": [
"path"
],
"properties": {
"path": {
"type": "string"
},
"ownerUid": {
"type": "string"
},
"ownerGid": {
"type": "string"
},
"permissions": {
"type": "string"
},
"checksum": {
"type": "string"
}
},
"additionalProperties": true,
"type": "object"
},
"ApkMetadata": {
"required": [
"package",
"originPackage",
"maintainer",
"version",
"license",
"architecture",
"url",
"description",
"size",
"installedSize",
"pullDependencies",
"pullChecksum",
"gitCommitOfApkPort",
"files"
],
"properties": {
"package": {
"type": "string"
},
"originPackage": {
"type": "string"
},
"maintainer": {
"type": "string"
},
"version": {
"type": "string"
},
"license": {
"type": "string"
},
"architecture": {
"type": "string"
},
"url": {
"type": "string"
},
"description": {
"type": "string"
},
"size": {
"type": "integer"
},
"installedSize": {
"type": "integer"
},
"pullDependencies": {
"type": "string"
},
"pullChecksum": {
"type": "string"
},
"gitCommitOfApkPort": {
"type": "string"
},
"files": {
"items": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/ApkFileRecord"
},
"type": "array"
}
},
"additionalProperties": true,
"type": "object"
},
"CargoPackageMetadata": {
"required": [
"name",
"version",
"source",
"checksum",
"dependencies"
],
"properties": {
"name": {
"type": "string"
},
"version": {
"type": "string"
},
"source": {
"type": "string"
},
"checksum": {
"type": "string"
},
"dependencies": {
"items": {
"type": "string"
},
"type": "array"
}
},
"additionalProperties": true,
"type": "object"
},
"Classification": {
"required": [
"class",
"metadata"
],
"properties": {
"class": {
"type": "string"
},
"metadata": {
"patternProperties": {
".*": {
"type": "string"
}
},
"type": "object"
}
},
"additionalProperties": true,
"type": "object"
},
"Descriptor": {
"required": [
"name",
"version"
],
"properties": {
"name": {
"type": "string"
},
"version": {
"type": "string"
},
"configuration": {
"additionalProperties": true
}
},
"additionalProperties": true,
"type": "object"
},
"Digest": {
"required": [
"algorithm",
"value"
],
"properties": {
"algorithm": {
"type": "string"
},
"value": {
"type": "string"
}
},
"additionalProperties": true,
"type": "object"
},
"Distribution": {
"required": [
"name",
"version",
"idLike"
],
"properties": {
"name": {
"type": "string"
},
"version": {
"type": "string"
},
"idLike": {
"type": "string"
}
},
"additionalProperties": true,
"type": "object"
},
"Document": {
"required": [
"artifacts",
"artifactRelationships",
"source",
"distro",
"descriptor",
"schema"
],
"properties": {
"fileClassifications": {
"items": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/FileClassifications"
},
"type": "array"
},
"fileMetadata": {
"items": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/FileMetadata"
},
"type": "array"
},
"secrets": {
"items": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/Secrets"
},
"type": "array"
},
"artifacts": {
"items": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/Package"
},
"type": "array"
},
"artifactRelationships": {
"items": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/Relationship"
},
"type": "array"
},
"source": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/Source"
},
"distro": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/Distribution"
},
"descriptor": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/Descriptor"
},
"schema": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/Schema"
},
"artifacts.metadata": {
"anyOf": [
{
"type": "null"
},
{
"$ref": "#/definitions/ApkMetadata"
},
{
"$ref": "#/definitions/CargoPackageMetadata"
},
{
"$ref": "#/definitions/DpkgMetadata"
},
{
"$ref": "#/definitions/GemMetadata"
},
{
"$ref": "#/definitions/JavaMetadata"
},
{
"$ref": "#/definitions/NpmPackageJSONMetadata"
},
{
"$ref": "#/definitions/PythonPackageMetadata"
},
{
"$ref": "#/definitions/RpmdbMetadata"
}
]
}
},
"additionalProperties": true,
"type": "object"
},
"DpkgFileRecord": {
"required": [
"path",
"md5"
],
"properties": {
"path": {
"type": "string"
},
"md5": {
"type": "string"
}
},
"additionalProperties": true,
"type": "object"
},
"DpkgMetadata": {
"required": [
"package",
"source",
"version",
"sourceVersion",
"architecture",
"maintainer",
"installedSize",
"files"
],
"properties": {
"package": {
"type": "string"
},
"source": {
"type": "string"
},
"version": {
"type": "string"
},
"sourceVersion": {
"type": "string"
},
"architecture": {
"type": "string"
},
"maintainer": {
"type": "string"
},
"installedSize": {
"type": "integer"
},
"files": {
"items": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/DpkgFileRecord"
},
"type": "array"
}
},
"additionalProperties": true,
"type": "object"
},
"FileClassifications": {
"required": [
"location",
"classification"
],
"properties": {
"location": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/Location"
},
"classification": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/Classification"
}
},
"additionalProperties": true,
"type": "object"
},
"FileMetadata": {
"required": [
"location",
"metadata"
],
"properties": {
"location": {
"$ref": "#/definitions/Location"
},
"metadata": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/FileMetadataEntry"
}
},
"additionalProperties": true,
"type": "object"
},
"FileMetadataEntry": {
"required": [
"mode",
"type",
"userID",
"groupID"
],
"properties": {
"mode": {
"type": "integer"
},
"type": {
"type": "string"
},
"linkDestination": {
"type": "string"
},
"userID": {
"type": "integer"
},
"groupID": {
"type": "integer"
},
"digests": {
"items": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/Digest"
},
"type": "array"
}
},
"additionalProperties": true,
"type": "object"
},
"GemMetadata": {
"required": [
"name",
"version"
],
"properties": {
"name": {
"type": "string"
},
"version": {
"type": "string"
},
"files": {
"items": {
"type": "string"
},
"type": "array"
},
"authors": {
"items": {
"type": "string"
},
"type": "array"
},
"licenses": {
"items": {
"type": "string"
},
"type": "array"
},
"homepage": {
"type": "string"
}
},
"additionalProperties": true,
"type": "object"
},
"JavaManifest": {
"properties": {
"main": {
"patternProperties": {
".*": {
"type": "string"
}
},
"type": "object"
},
"namedSections": {
"patternProperties": {
".*": {
"patternProperties": {
".*": {
"type": "string"
}
},
"type": "object"
}
},
"type": "object"
}
},
"additionalProperties": true,
"type": "object"
},
"JavaMetadata": {
"required": [
"virtualPath"
],
"properties": {
"virtualPath": {
"type": "string"
},
"manifest": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/JavaManifest"
},
"pomProperties": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/PomProperties"
}
},
"additionalProperties": true,
"type": "object"
},
"Location": {
"required": [
"path"
],
"properties": {
"path": {
"type": "string"
},
"layerID": {
"type": "string"
}
},
"additionalProperties": true,
"type": "object"
},
"NpmPackageJSONMetadata": {
"required": [
"author",
"licenses",
"homepage",
"description",
"url"
],
"properties": {
"files": {
"items": {
"type": "string"
},
"type": "array"
},
"author": {
"type": "string"
},
"licenses": {
"items": {
"type": "string"
},
"type": "array"
},
"homepage": {
"type": "string"
},
"description": {
"type": "string"
},
"url": {
"type": "string"
}
},
"additionalProperties": true,
"type": "object"
},
"Package": {
"required": [
"id",
"name",
"version",
"type",
"foundBy",
"locations",
"licenses",
"language",
"cpes",
"purl",
"metadataType",
"metadata"
],
"properties": {
"id": {
"type": "string"
},
"name": {
"type": "string"
},
"version": {
"type": "string"
},
"type": {
"type": "string"
},
"foundBy": {
"type": "string"
},
"locations": {
"items": {
"$ref": "#/definitions/Location"
},
"type": "array"
},
"licenses": {
"items": {
"type": "string"
},
"type": "array"
},
"language": {
"type": "string"
},
"cpes": {
"items": {
"type": "string"
},
"type": "array"
},
"purl": {
"type": "string"
},
"metadataType": {
"type": "string"
},
"metadata": {
"additionalProperties": true
}
},
"additionalProperties": true,
"type": "object"
},
"PomProperties": {
"required": [
"path",
"name",
"groupId",
"artifactId",
"version",
"extraFields"
],
"properties": {
"path": {
"type": "string"
},
"name": {
"type": "string"
},
"groupId": {
"type": "string"
},
"artifactId": {
"type": "string"
},
"version": {
"type": "string"
},
"extraFields": {
"patternProperties": {
".*": {
"type": "string"
}
},
"type": "object"
}
},
"additionalProperties": true,
"type": "object"
},
"PythonFileDigest": {
"required": [
"algorithm",
"value"
],
"properties": {
"algorithm": {
"type": "string"
},
"value": {
"type": "string"
}
},
"additionalProperties": true,
"type": "object"
},
"PythonFileRecord": {
"required": [
"path"
],
"properties": {
"path": {
"type": "string"
},
"digest": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/PythonFileDigest"
},
"size": {
"type": "string"
}
},
"additionalProperties": true,
"type": "object"
},
"PythonPackageMetadata": {
"required": [
"name",
"version",
"license",
"author",
"authorEmail",
"platform",
"sitePackagesRootPath"
],
"properties": {
"name": {
"type": "string"
},
"version": {
"type": "string"
},
"license": {
"type": "string"
},
"author": {
"type": "string"
},
"authorEmail": {
"type": "string"
},
"platform": {
"type": "string"
},
"files": {
"items": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/PythonFileRecord"
},
"type": "array"
},
"sitePackagesRootPath": {
"type": "string"
},
"topLevelPackages": {
"items": {
"type": "string"
},
"type": "array"
}
},
"additionalProperties": true,
"type": "object"
},
"Relationship": {
"required": [
"parent",
"child",
"type",
"metadata"
],
"properties": {
"parent": {
"type": "string"
},
"child": {
"type": "string"
},
"type": {
"type": "string"
},
"metadata": {
"additionalProperties": true
}
},
"additionalProperties": true,
"type": "object"
},
"RpmdbFileRecord": {
"required": [
"path",
"mode",
"size",
"sha256"
],
"properties": {
"path": {
"type": "string"
},
"mode": {
"type": "integer"
},
"size": {
"type": "integer"
},
"sha256": {
"type": "string"
}
},
"additionalProperties": true,
"type": "object"
},
"RpmdbMetadata": {
"required": [
"name",
"version",
"epoch",
"architecture",
"release",
"sourceRpm",
"size",
"license",
"vendor",
"files"
],
"properties": {
"name": {
"type": "string"
},
"version": {
"type": "string"
},
"epoch": {
"type": "integer"
},
"architecture": {
"type": "string"
},
"release": {
"type": "string"
},
"sourceRpm": {
"type": "string"
},
"size": {
"type": "integer"
},
"license": {
"type": "string"
},
"vendor": {
"type": "string"
},
"files": {
"items": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/RpmdbFileRecord"
},
"type": "array"
}
},
"additionalProperties": true,
"type": "object"
},
"Schema": {
"required": [
"version",
"url"
],
"properties": {
"version": {
"type": "string"
},
"url": {
"type": "string"
}
},
"additionalProperties": true,
"type": "object"
},
"SearchResult": {
"required": [
"classification",
"lineNumber",
"lineOffset",
"seekPosition",
"length"
],
"properties": {
"classification": {
"type": "string"
},
"lineNumber": {
"type": "integer"
},
"lineOffset": {
"type": "integer"
},
"seekPosition": {
"type": "integer"
},
"length": {
"type": "integer"
},
"value": {
"type": "string"
}
},
"additionalProperties": true,
"type": "object"
},
"Secrets": {
"required": [
"location",
"secrets"
],
"properties": {
"location": {
"$ref": "#/definitions/Location"
},
"secrets": {
"items": {
"$schema": "http://json-schema.org/draft-04/schema#",
"$ref": "#/definitions/SearchResult"
},
"type": "array"
}
},
"additionalProperties": true,
"type": "object"
},
"Source": {
"required": [
"type",
"target"
],
"properties": {
"type": {
"type": "string"
},
"target": {
"additionalProperties": true
}
},
"additionalProperties": true,
"type": "object"
}
}
}

View File

@ -0,0 +1,37 @@
package file
import (
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/source"
)
type ClassificationCataloger struct {
classifiers []Classifier
}
func NewClassificationCataloger(classifiers []Classifier) (*ClassificationCataloger, error) {
return &ClassificationCataloger{
classifiers: classifiers,
}, nil
}
func (i *ClassificationCataloger) Catalog(resolver source.FileResolver) (map[source.Location][]Classification, error) {
results := make(map[source.Location][]Classification)
numResults := 0
for location := range resolver.AllLocations() {
for _, classifier := range i.classifiers {
result, err := classifier.Classify(resolver, location)
if err != nil {
return nil, err
}
if result != nil {
results[location] = append(results[location], *result)
numResults++
}
}
}
log.Debugf("classification cataloger discovered %d results", numResults)
return results, nil
}

View File

@ -0,0 +1,145 @@
package file
import (
"testing"
"github.com/anchore/syft/syft/source"
"github.com/stretchr/testify/assert"
)
func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) {
tests := []struct {
name string
fixtureDir string
location string
expected []Classification
expectedErr func(assert.TestingT, error, ...interface{}) bool
}{
{
name: "positive-libpython3.7.so",
fixtureDir: "test-fixtures/classifiers/positive",
location: "test-fixtures/classifiers/positive/libpython3.7.so",
expected: []Classification{
{
Class: "python-binary",
Metadata: map[string]string{
"version": "3.7.4a-vZ9",
},
},
},
expectedErr: assert.NoError,
},
{
name: "positive-python3.6",
fixtureDir: "test-fixtures/classifiers/positive",
location: "test-fixtures/classifiers/positive/python3.6",
expected: []Classification{
{
Class: "python-binary",
Metadata: map[string]string{
"version": "3.6.3a-vZ9",
},
},
},
expectedErr: assert.NoError,
},
{
name: "positive-patchlevel.h",
fixtureDir: "test-fixtures/classifiers/positive",
location: "test-fixtures/classifiers/positive/patchlevel.h",
expected: []Classification{
{
Class: "cpython-source",
Metadata: map[string]string{
"version": "3.9-aZ5",
},
},
},
expectedErr: assert.NoError,
},
{
name: "positive-go",
fixtureDir: "test-fixtures/classifiers/positive",
location: "test-fixtures/classifiers/positive/go",
expected: []Classification{
{
Class: "go-binary",
Metadata: map[string]string{
"version": "1.14",
},
},
},
expectedErr: assert.NoError,
},
{
name: "positive-go-hint",
fixtureDir: "test-fixtures/classifiers/positive",
location: "test-fixtures/classifiers/positive/VERSION",
expected: []Classification{
{
Class: "go-binary-hint",
Metadata: map[string]string{
"version": "1.15",
},
},
},
expectedErr: assert.NoError,
},
{
name: "positive-busybox",
fixtureDir: "test-fixtures/classifiers/positive",
location: "test-fixtures/classifiers/positive/busybox",
expected: []Classification{
{
Class: "busybox-binary",
Metadata: map[string]string{
"version": "3.33.3",
},
},
},
expectedErr: assert.NoError,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
c, err := NewClassificationCataloger(DefaultClassifiers)
test.expectedErr(t, err)
src, err := source.NewFromDirectory(test.fixtureDir)
test.expectedErr(t, err)
resolver, err := src.FileResolver(source.SquashedScope)
test.expectedErr(t, err)
actualResults, err := c.Catalog(resolver)
test.expectedErr(t, err)
loc := source.NewLocation(test.location)
if _, ok := actualResults[loc]; !ok {
t.Fatalf("could not find test location=%q", test.location)
}
assert.Equal(t, test.expected, actualResults[loc])
})
}
}
func TestClassifierCataloger_DefaultClassifiers_NegativeCases(t *testing.T) {
c, err := NewClassificationCataloger(DefaultClassifiers)
assert.NoError(t, err)
src, err := source.NewFromDirectory("test-fixtures/classifiers/negative")
assert.NoError(t, err)
resolver, err := src.FileResolver(source.SquashedScope)
assert.NoError(t, err)
actualResults, err := c.Catalog(resolver)
assert.NoError(t, err)
assert.Equal(t, 0, len(actualResults))
}

141
syft/file/classifier.go Normal file
View File

@ -0,0 +1,141 @@
package file
import (
"bytes"
"fmt"
"io/ioutil"
"regexp"
"text/template"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/source"
)
var DefaultClassifiers = []Classifier{
{
Class: "python-binary",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(`(.*/|^)python(?P<version>[0-9]+\.[0-9]+)$`),
regexp.MustCompile(`(.*/|^)libpython(?P<version>[0-9]+\.[0-9]+).so.*$`),
},
EvidencePatternTemplates: []string{
`(?m)(?P<version>{{ .version }}\.[0-9]+[-_a-zA-Z0-9]*)`,
},
},
{
Class: "cpython-source",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(`(.*/|^)patchlevel.h$`),
},
EvidencePatternTemplates: []string{
`(?m)#define\s+PY_VERSION\s+"?(?P<version>[0-9\.\-_a-zA-Z]+)"?`,
},
},
{
Class: "go-binary",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(`(.*/|^)go$`),
},
EvidencePatternTemplates: []string{
`(?m)go(?P<version>[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`,
},
},
{
Class: "go-binary-hint",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(`(.*/|^)VERSION$`),
},
EvidencePatternTemplates: []string{
`(?m)go(?P<version>[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`,
},
},
{
Class: "busybox-binary",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(`(.*/|^)busybox$`),
},
EvidencePatternTemplates: []string{
`(?m)BusyBox\s+v(?P<version>[0-9]+\.[0-9]+\.[0-9]+)`,
},
},
}
type Classifier struct {
Class string
FilepathPatterns []*regexp.Regexp
EvidencePatternTemplates []string
}
type Classification struct {
Class string `json:"class"`
Metadata map[string]string `json:"metadata"`
}
func (c Classifier) Classify(resolver source.FileResolver, location source.Location) (*Classification, error) {
doesFilepathMatch, filepathNamedGroupValues := filepathMatches(c.FilepathPatterns, location)
if !doesFilepathMatch {
return nil, nil
}
contentReader, err := resolver.FileContentsByLocation(location)
if err != nil {
return nil, err
}
defer contentReader.Close()
// TODO: there is room for improvement here, as this may use an excessive amount of memory. Alternate approach is to leverage a RuneReader.
contents, err := ioutil.ReadAll(contentReader)
if err != nil {
return nil, err
}
var result *Classification
for _, patternTemplate := range c.EvidencePatternTemplates {
tmpl, err := template.New("").Parse(patternTemplate)
if err != nil {
return nil, fmt.Errorf("unable to parse classifier template=%q : %w", patternTemplate, err)
}
patternBuf := &bytes.Buffer{}
err = tmpl.Execute(patternBuf, filepathNamedGroupValues)
if err != nil {
return nil, fmt.Errorf("unable to render template: %w", err)
}
pattern, err := regexp.Compile(patternBuf.String())
if err != nil {
return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err)
}
if !pattern.Match(contents) {
continue
}
matchMetadata := internal.MatchNamedCaptureGroups(pattern, string(contents))
if result == nil {
result = &Classification{
Class: c.Class,
Metadata: matchMetadata,
}
} else {
for key, value := range matchMetadata {
result.Metadata[key] = value
}
}
}
return result, nil
}
func filepathMatches(patterns []*regexp.Regexp, location source.Location) (bool, map[string]string) {
for _, path := range []string{location.RealPath, location.VirtualPath} {
if path == "" {
continue
}
for _, pattern := range patterns {
if pattern.MatchString(path) {
return true, internal.MatchNamedCaptureGroups(pattern, path)
}
}
}
return false, nil
}

View File

@ -0,0 +1,90 @@
package file
import (
"regexp"
"testing"
"github.com/anchore/syft/syft/source"
"github.com/stretchr/testify/assert"
)
func TestFilepathMatches(t *testing.T) {
tests := []struct {
name string
location source.Location
patterns []string
expectedMatches bool
expectedNamedGroups map[string]string
}{
{
name: "simple-filename-match",
location: source.Location{
RealPath: "python2.7",
},
patterns: []string{
`python([0-9]+\.[0-9]+)$`,
},
expectedMatches: true,
},
{
name: "filepath-match",
location: source.Location{
RealPath: "/usr/bin/python2.7",
},
patterns: []string{
`python([0-9]+\.[0-9]+)$`,
},
expectedMatches: true,
},
{
name: "virtual-filepath-match",
location: source.Location{
VirtualPath: "/usr/bin/python2.7",
},
patterns: []string{
`python([0-9]+\.[0-9]+)$`,
},
expectedMatches: true,
},
{
name: "full-filepath-match",
location: source.Location{
VirtualPath: "/usr/bin/python2.7",
},
patterns: []string{
`.*/bin/python([0-9]+\.[0-9]+)$`,
},
expectedMatches: true,
},
{
name: "anchored-filename-match-FAILS",
location: source.Location{
RealPath: "/usr/bin/python2.7",
},
patterns: []string{
`^python([0-9]+\.[0-9]+)$`,
},
expectedMatches: false,
},
{
name: "empty-filename-match-FAILS",
location: source.Location{},
patterns: []string{
`^python([0-9]+\.[0-9]+)$`,
},
expectedMatches: false,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
var patterns []*regexp.Regexp
for _, p := range test.patterns {
patterns = append(patterns, regexp.MustCompile(p))
}
actualMatches, actualNamedGroups := filepathMatches(patterns, test.location)
assert.Equal(t, test.expectedMatches, actualMatches)
assert.Equal(t, test.expectedNamedGroups, actualNamedGroups)
})
}
}

View File

@ -0,0 +1 @@
!libpython2.7.so

View File

@ -0,0 +1 @@
another bad binary

View File

@ -0,0 +1 @@
a bad go binary

View File

@ -0,0 +1,3 @@
# note: this should NOT match
DO NOT DETECT

View File

@ -0,0 +1,3 @@
# note: this should NOT match
just some noise

View File

@ -0,0 +1 @@
!libpython3.7.so

View File

@ -0,0 +1 @@
go1.15-beta2

View File

@ -0,0 +1,3 @@
# note: this SHOULD match as busybox 3.33.3
noise!BusyBox v3.33.3!noise

View File

@ -0,0 +1 @@
go1.14

View File

@ -0,0 +1,2 @@
# note: this SHOULD match as python 3.7
noise3.7.4a-vZ9!morenoise

View File

@ -0,0 +1,7 @@
# note: this SHOULD match as python 3.9
some source code...
#define PY_VERSION 3.9-aZ5
more source!

View File

@ -0,0 +1,3 @@
# note: this SHOULD match as python 3.6
noise3.6.3a-vZ9!morenoise

View File

@ -21,7 +21,7 @@ func parseLicensesFromCopyright(reader io.Reader) []string {
for scanner.Scan() {
line := scanner.Text()
matchesByGroup := internal.MatchCaptureGroups(licensePattern, line)
matchesByGroup := internal.MatchNamedCaptureGroups(licensePattern, line)
if len(matchesByGroup) > 0 {
candidate, ok := matchesByGroup["license"]
if !ok {

View File

@ -145,7 +145,7 @@ func extractAllFields(reader *bufio.Reader) (map[string]interface{}, error) {
// of the "<name>" form, then return name and nil
func extractSourceVersion(source string) (string, string) {
// special handling for the Source field since it has formatted data
match := internal.MatchCaptureGroups(sourceRegexp, source)
match := internal.MatchNamedCaptureGroups(sourceRegexp, source)
return match["name"], match["version"]
}

View File

@ -63,7 +63,7 @@ func (a *Author) UnmarshalJSON(b []byte) error {
}
} else {
// parse out "name <email> (url)" into an Author struct
fields = internal.MatchCaptureGroups(authorPattern, authorStr)
fields = internal.MatchNamedCaptureGroups(authorPattern, authorStr)
}
// translate the map into a structure

View File

@ -77,7 +77,7 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) {
}
for field, pattern := range patterns {
matchMap := internal.MatchCaptureGroups(pattern, sanitizedLine)
matchMap := internal.MatchNamedCaptureGroups(pattern, sanitizedLine)
if value := matchMap[field]; value != "" {
if postProcessor := postProcessors[field]; postProcessor != nil {
fields[field] = postProcessor(value)