add file contents cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2021-04-06 16:41:43 -04:00
parent 9ad786d608
commit c1551a03c5
No known key found for this signature in database
GPG Key ID: 5CB45AE22BAB7EA7
10 changed files with 261 additions and 2 deletions

View File

@ -21,6 +21,7 @@ func powerUserTasks() ([]powerUserTask, error) {
catalogFileDigestsTask,
catalogSecretsTask,
catalogFileClassificationsTask,
catalogContentsTask,
}
for _, generator := range generators {
@ -185,3 +186,30 @@ func catalogFileClassificationsTask() (powerUserTask, error) {
return task, nil
}
func catalogContentsTask() (powerUserTask, error) {
if !appConfig.Contents.Cataloger.Enabled {
return nil, nil
}
contentsCataloger, err := file.NewContentsCataloger(appConfig.Contents.Globs, appConfig.Contents.SkipFilesAboveSize)
if err != nil {
return nil, err
}
task := func(results *poweruser.JSONDocumentConfig, src source.Source) error {
resolver, err := src.FileResolver(appConfig.Contents.Cataloger.ScopeOpt)
if err != nil {
return err
}
result, err := contentsCataloger.Catalog(resolver)
if err != nil {
return err
}
results.FileContents = result
return nil
}
return task, nil
}

View File

@ -38,6 +38,7 @@ type Application struct {
Package packages `yaml:"package" json:"package" mapstructure:"package"`
FileMetadata FileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"`
FileClassification fileClassification `yaml:"file-classification" json:"file-classification" mapstructure:"file-classification"`
Contents contents `yaml:"contents" json:"contents" mapstructure:"contents"`
Secrets secrets `yaml:"secrets" json:"secrets" mapstructure:"secrets"`
}

View File

@ -0,0 +1,24 @@
package config
import (
"github.com/anchore/syft/internal/file"
"github.com/anchore/syft/syft/source"
"github.com/spf13/viper"
)
type contents struct {
Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"`
SkipFilesAboveSize int64 `yaml:"skip-files-above-size" json:"skip-files-above-size" mapstructure:"skip-files-above-size"`
Globs []string `yaml:"globs" json:"globs" mapstructure:"globs"`
}
func (cfg contents) loadDefaultValues(v *viper.Viper) {
v.SetDefault("contents.cataloger.enabled", true)
v.SetDefault("contents.cataloger.scope", source.SquashedScope)
v.SetDefault("contents.skip-files-above-size", 1*file.MB)
v.SetDefault("contents.globs", []string{})
}
func (cfg *contents) parseConfigValues() error {
return cfg.Cataloger.parseConfigValues()
}

View File

@ -10,6 +10,7 @@ type JSONDocument struct {
// require these fields. As an accepted rule in this repo all collections should still be initialized in the
// context of being used in a JSON document.
FileClassifications []JSONFileClassifications `json:"fileClassifications,omitempty"` // note: must have omitempty
FileContents []JSONFileContents `json:"fileContents,omitempty"` // note: must have omitempty
FileMetadata []JSONFileMetadata `json:"fileMetadata,omitempty"` // note: must have omitempty
Secrets []JSONSecrets `json:"secrets,omitempty"` // note: must have omitempty
packages.JSONDocument
@ -29,6 +30,7 @@ func NewJSONDocument(config JSONDocumentConfig) (JSONDocument, error) {
return JSONDocument{
FileClassifications: NewJSONFileClassifications(config.FileClassifications),
FileContents: NewJSONFileContents(config.FileContents),
FileMetadata: fileMetadata,
Secrets: NewJSONSecrets(config.Secrets),
JSONDocument: pkgsDoc,

View File

@ -14,6 +14,7 @@ type JSONDocumentConfig struct {
FileMetadata map[source.Location]source.FileMetadata
FileDigests map[source.Location][]file.Digest
FileClassifications map[source.Location][]file.Classification
FileContents map[source.Location]string
Secrets map[source.Location][]file.SearchResult
Distro *distro.Distro
SourceMetadata source.Metadata

View File

@ -0,0 +1,31 @@
package poweruser
import (
"sort"
"github.com/anchore/syft/syft/source"
)
type JSONFileContents struct {
Location source.Location `json:"location"`
Contents string `json:"contents"`
}
func NewJSONFileContents(data map[source.Location]string) []JSONFileContents {
results := make([]JSONFileContents, 0)
for location, contents := range data {
results = append(results, JSONFileContents{
Location: location,
Contents: contents,
})
}
// sort by real path then virtual path to ensure the result is stable across multiple runs
sort.SliceStable(results, func(i, j int) bool {
if results[i].Location.RealPath == results[j].Location.RealPath {
return results[i].Location.VirtualPath < results[j].Location.VirtualPath
}
return results[i].Location.RealPath < results[j].Location.RealPath
})
return results
}

View File

@ -0,0 +1,68 @@
package file
import (
"bytes"
"encoding/base64"
"fmt"
"io"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/source"
)
type ContentsCataloger struct {
globs []string
skipFilesAboveSize int64
}
func NewContentsCataloger(globs []string, skipFilesAboveSize int64) (*ContentsCataloger, error) {
return &ContentsCataloger{
globs: globs,
skipFilesAboveSize: skipFilesAboveSize,
}, nil
}
func (i *ContentsCataloger) Catalog(resolver source.FileResolver) (map[source.Location]string, error) {
results := make(map[source.Location]string)
var locations []source.Location
locations, err := resolver.FilesByGlob(i.globs...)
if err != nil {
return nil, err
}
for _, location := range locations {
metadata, err := resolver.FileMetadataByLocation(location)
if err != nil {
return nil, err
}
if i.skipFilesAboveSize > 0 && metadata.Size > i.skipFilesAboveSize {
continue
}
result, err := i.catalogLocation(resolver, location)
if err != nil {
return nil, err
}
results[location] = result
}
log.Debugf("file contents cataloger processed %d files", len(results))
return results, nil
}
func (i *ContentsCataloger) catalogLocation(resolver source.FileResolver, location source.Location) (string, error) {
contentReader, err := resolver.FileContentsByLocation(location)
if err != nil {
return "", err
}
defer contentReader.Close()
buf := &bytes.Buffer{}
if _, err = io.Copy(base64.NewEncoder(base64.StdEncoding, buf), contentReader); err != nil {
return "", fmt.Errorf("unable to observe contents of %+v: %+v", location.RealPath, err)
}
return buf.String(), nil
}

View File

@ -0,0 +1,89 @@
package file
import (
"testing"
"github.com/anchore/syft/syft/source"
"github.com/stretchr/testify/assert"
)
func TestContentsCataloger(t *testing.T) {
allFiles := []string{"test-fixtures/last/path.txt", "test-fixtures/another-path.txt", "test-fixtures/a-path.txt"}
tests := []struct {
name string
globs []string
maxSize int64
files []string
expected map[source.Location]string
catalogErr bool
}{
{
name: "multi-pattern",
globs: []string{"test-fixtures/last/*.txt", "test-fixtures/*.txt"},
files: allFiles,
expected: map[source.Location]string{
source.NewLocation("test-fixtures/last/path.txt"): "dGVzdC1maXh0dXJlcy9sYXN0L3BhdGgudHh0IGZpbGUgY29udGVudHMh",
source.NewLocation("test-fixtures/another-path.txt"): "dGVzdC1maXh0dXJlcy9hbm90aGVyLXBhdGgudHh0IGZpbGUgY29udGVudHMh",
source.NewLocation("test-fixtures/a-path.txt"): "dGVzdC1maXh0dXJlcy9hLXBhdGgudHh0IGZpbGUgY29udGVudHMh",
},
},
{
name: "no-patterns",
globs: []string{},
files: []string{"test-fixtures/last/path.txt", "test-fixtures/another-path.txt", "test-fixtures/a-path.txt"},
expected: map[source.Location]string{},
},
{
name: "all-txt",
globs: []string{"**/*.txt"},
files: allFiles,
expected: map[source.Location]string{
source.NewLocation("test-fixtures/last/path.txt"): "dGVzdC1maXh0dXJlcy9sYXN0L3BhdGgudHh0IGZpbGUgY29udGVudHMh",
source.NewLocation("test-fixtures/another-path.txt"): "dGVzdC1maXh0dXJlcy9hbm90aGVyLXBhdGgudHh0IGZpbGUgY29udGVudHMh",
source.NewLocation("test-fixtures/a-path.txt"): "dGVzdC1maXh0dXJlcy9hLXBhdGgudHh0IGZpbGUgY29udGVudHMh",
},
},
{
name: "subpath",
globs: []string{"test-fixtures/*.txt"},
files: allFiles,
expected: map[source.Location]string{
source.NewLocation("test-fixtures/another-path.txt"): "dGVzdC1maXh0dXJlcy9hbm90aGVyLXBhdGgudHh0IGZpbGUgY29udGVudHMh",
source.NewLocation("test-fixtures/a-path.txt"): "dGVzdC1maXh0dXJlcy9hLXBhdGgudHh0IGZpbGUgY29udGVudHMh",
},
},
{
name: "size-filter",
maxSize: 42,
globs: []string{"**/*.txt"},
files: allFiles,
expected: map[source.Location]string{
source.NewLocation("test-fixtures/last/path.txt"): "dGVzdC1maXh0dXJlcy9sYXN0L3BhdGgudHh0IGZpbGUgY29udGVudHMh",
source.NewLocation("test-fixtures/a-path.txt"): "dGVzdC1maXh0dXJlcy9hLXBhdGgudHh0IGZpbGUgY29udGVudHMh",
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
c, err := NewContentsCataloger(test.globs, test.maxSize)
if err != nil {
t.Fatalf("could not create cataloger: %+v", err)
}
resolver := source.NewMockResolverForPaths(test.files...)
actual, err := c.Catalog(resolver)
if err != nil && !test.catalogErr {
t.Fatalf("could not catalog (but should have been able to): %+v", err)
} else if err == nil && test.catalogErr {
t.Fatalf("expected catalog error but did not get one")
} else if test.catalogErr && err != nil {
return
}
assert.Equal(t, test.expected, actual, "mismatched contents")
})
}
}

View File

@ -5,7 +5,7 @@ import (
"io"
"os"
"github.com/anchore/syft/internal/file"
"github.com/bmatcuk/doublestar/v2"
)
var _ FileResolver = (*MockResolver)(nil)
@ -84,7 +84,11 @@ func (r MockResolver) FilesByGlob(patterns ...string) ([]Location, error) {
var results []Location
for _, pattern := range patterns {
for _, location := range r.Locations {
if file.GlobMatch(pattern, location.RealPath) {
matches, err := doublestar.Match(pattern, location.RealPath)
if err != nil {
return nil, err
}
if matches {
results = append(results, location)
}
}

View File

@ -51,6 +51,17 @@ func TestPowerUserCmdFlags(t *testing.T) {
assertSuccessfulReturnCode,
},
},
{
name: "content-cataloger-wired-up",
args: []string{"power-user", "docker-archive:" + getFixtureImage(t, "image-secrets")},
env: map[string]string{
"SYFT_CONTENTS_GLOBS": "/api-key.txt",
},
assertions: []traitAssertion{
assertInOutput(`"contents": "c29tZV9BcEkta0V5ID0gIjEyMzQ1QTdhOTAxYjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MCIK"`), // proof of the content cataloger
assertSuccessfulReturnCode,
},
},
}
for _, test := range tests {