diff --git a/cmd/power_user_tasks.go b/cmd/power_user_tasks.go index feacb7174..da7cc97ea 100644 --- a/cmd/power_user_tasks.go +++ b/cmd/power_user_tasks.go @@ -21,6 +21,7 @@ func powerUserTasks() ([]powerUserTask, error) { catalogFileDigestsTask, catalogSecretsTask, catalogFileClassificationsTask, + catalogContentsTask, } for _, generator := range generators { @@ -185,3 +186,30 @@ func catalogFileClassificationsTask() (powerUserTask, error) { return task, nil } + +func catalogContentsTask() (powerUserTask, error) { + if !appConfig.Contents.Cataloger.Enabled { + return nil, nil + } + + contentsCataloger, err := file.NewContentsCataloger(appConfig.Contents.Globs, appConfig.Contents.SkipFilesAboveSize) + if err != nil { + return nil, err + } + + task := func(results *poweruser.JSONDocumentConfig, src source.Source) error { + resolver, err := src.FileResolver(appConfig.Contents.Cataloger.ScopeOpt) + if err != nil { + return err + } + + result, err := contentsCataloger.Catalog(resolver) + if err != nil { + return err + } + results.FileContents = result + return nil + } + + return task, nil +} diff --git a/internal/config/application.go b/internal/config/application.go index 3c0f2f112..c78b21abf 100644 --- a/internal/config/application.go +++ b/internal/config/application.go @@ -38,6 +38,7 @@ type Application struct { Package packages `yaml:"package" json:"package" mapstructure:"package"` FileMetadata FileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"` FileClassification fileClassification `yaml:"file-classification" json:"file-classification" mapstructure:"file-classification"` + Contents contents `yaml:"contents" json:"contents" mapstructure:"contents"` Secrets secrets `yaml:"secrets" json:"secrets" mapstructure:"secrets"` } diff --git a/internal/config/contents.go b/internal/config/contents.go new file mode 100644 index 000000000..5c660013c --- /dev/null +++ b/internal/config/contents.go @@ -0,0 +1,24 @@ +package config + +import ( + "github.com/anchore/syft/internal/file" + "github.com/anchore/syft/syft/source" + "github.com/spf13/viper" +) + +type contents struct { + Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"` + SkipFilesAboveSize int64 `yaml:"skip-files-above-size" json:"skip-files-above-size" mapstructure:"skip-files-above-size"` + Globs []string `yaml:"globs" json:"globs" mapstructure:"globs"` +} + +func (cfg contents) loadDefaultValues(v *viper.Viper) { + v.SetDefault("contents.cataloger.enabled", true) + v.SetDefault("contents.cataloger.scope", source.SquashedScope) + v.SetDefault("contents.skip-files-above-size", 1*file.MB) + v.SetDefault("contents.globs", []string{}) +} + +func (cfg *contents) parseConfigValues() error { + return cfg.Cataloger.parseConfigValues() +} diff --git a/internal/presenter/poweruser/json_document.go b/internal/presenter/poweruser/json_document.go index b4e930307..d468bfaf5 100644 --- a/internal/presenter/poweruser/json_document.go +++ b/internal/presenter/poweruser/json_document.go @@ -10,6 +10,7 @@ type JSONDocument struct { // require these fields. As an accepted rule in this repo all collections should still be initialized in the // context of being used in a JSON document. FileClassifications []JSONFileClassifications `json:"fileClassifications,omitempty"` // note: must have omitempty + FileContents []JSONFileContents `json:"fileContents,omitempty"` // note: must have omitempty FileMetadata []JSONFileMetadata `json:"fileMetadata,omitempty"` // note: must have omitempty Secrets []JSONSecrets `json:"secrets,omitempty"` // note: must have omitempty packages.JSONDocument @@ -29,6 +30,7 @@ func NewJSONDocument(config JSONDocumentConfig) (JSONDocument, error) { return JSONDocument{ FileClassifications: NewJSONFileClassifications(config.FileClassifications), + FileContents: NewJSONFileContents(config.FileContents), FileMetadata: fileMetadata, Secrets: NewJSONSecrets(config.Secrets), JSONDocument: pkgsDoc, diff --git a/internal/presenter/poweruser/json_document_config.go b/internal/presenter/poweruser/json_document_config.go index 7d9cdffc3..20db5c759 100644 --- a/internal/presenter/poweruser/json_document_config.go +++ b/internal/presenter/poweruser/json_document_config.go @@ -14,6 +14,7 @@ type JSONDocumentConfig struct { FileMetadata map[source.Location]source.FileMetadata FileDigests map[source.Location][]file.Digest FileClassifications map[source.Location][]file.Classification + FileContents map[source.Location]string Secrets map[source.Location][]file.SearchResult Distro *distro.Distro SourceMetadata source.Metadata diff --git a/internal/presenter/poweruser/json_file_contents.go b/internal/presenter/poweruser/json_file_contents.go new file mode 100644 index 000000000..3105a9507 --- /dev/null +++ b/internal/presenter/poweruser/json_file_contents.go @@ -0,0 +1,31 @@ +package poweruser + +import ( + "sort" + + "github.com/anchore/syft/syft/source" +) + +type JSONFileContents struct { + Location source.Location `json:"location"` + Contents string `json:"contents"` +} + +func NewJSONFileContents(data map[source.Location]string) []JSONFileContents { + results := make([]JSONFileContents, 0) + for location, contents := range data { + results = append(results, JSONFileContents{ + Location: location, + Contents: contents, + }) + } + + // sort by real path then virtual path to ensure the result is stable across multiple runs + sort.SliceStable(results, func(i, j int) bool { + if results[i].Location.RealPath == results[j].Location.RealPath { + return results[i].Location.VirtualPath < results[j].Location.VirtualPath + } + return results[i].Location.RealPath < results[j].Location.RealPath + }) + return results +} diff --git a/syft/file/contents_cataloger.go b/syft/file/contents_cataloger.go new file mode 100644 index 000000000..2a67b4c2e --- /dev/null +++ b/syft/file/contents_cataloger.go @@ -0,0 +1,68 @@ +package file + +import ( + "bytes" + "encoding/base64" + "fmt" + "io" + + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/source" +) + +type ContentsCataloger struct { + globs []string + skipFilesAboveSize int64 +} + +func NewContentsCataloger(globs []string, skipFilesAboveSize int64) (*ContentsCataloger, error) { + return &ContentsCataloger{ + globs: globs, + skipFilesAboveSize: skipFilesAboveSize, + }, nil +} + +func (i *ContentsCataloger) Catalog(resolver source.FileResolver) (map[source.Location]string, error) { + results := make(map[source.Location]string) + var locations []source.Location + + locations, err := resolver.FilesByGlob(i.globs...) + if err != nil { + return nil, err + } + + for _, location := range locations { + metadata, err := resolver.FileMetadataByLocation(location) + if err != nil { + return nil, err + } + + if i.skipFilesAboveSize > 0 && metadata.Size > i.skipFilesAboveSize { + continue + } + + result, err := i.catalogLocation(resolver, location) + if err != nil { + return nil, err + } + results[location] = result + } + log.Debugf("file contents cataloger processed %d files", len(results)) + + return results, nil +} + +func (i *ContentsCataloger) catalogLocation(resolver source.FileResolver, location source.Location) (string, error) { + contentReader, err := resolver.FileContentsByLocation(location) + if err != nil { + return "", err + } + defer contentReader.Close() + + buf := &bytes.Buffer{} + if _, err = io.Copy(base64.NewEncoder(base64.StdEncoding, buf), contentReader); err != nil { + return "", fmt.Errorf("unable to observe contents of %+v: %+v", location.RealPath, err) + } + + return buf.String(), nil +} diff --git a/syft/file/contents_cataloger_test.go b/syft/file/contents_cataloger_test.go new file mode 100644 index 000000000..eb83d21aa --- /dev/null +++ b/syft/file/contents_cataloger_test.go @@ -0,0 +1,89 @@ +package file + +import ( + "testing" + + "github.com/anchore/syft/syft/source" + "github.com/stretchr/testify/assert" +) + +func TestContentsCataloger(t *testing.T) { + allFiles := []string{"test-fixtures/last/path.txt", "test-fixtures/another-path.txt", "test-fixtures/a-path.txt"} + + tests := []struct { + name string + globs []string + maxSize int64 + files []string + expected map[source.Location]string + catalogErr bool + }{ + { + name: "multi-pattern", + globs: []string{"test-fixtures/last/*.txt", "test-fixtures/*.txt"}, + files: allFiles, + expected: map[source.Location]string{ + source.NewLocation("test-fixtures/last/path.txt"): "dGVzdC1maXh0dXJlcy9sYXN0L3BhdGgudHh0IGZpbGUgY29udGVudHMh", + source.NewLocation("test-fixtures/another-path.txt"): "dGVzdC1maXh0dXJlcy9hbm90aGVyLXBhdGgudHh0IGZpbGUgY29udGVudHMh", + source.NewLocation("test-fixtures/a-path.txt"): "dGVzdC1maXh0dXJlcy9hLXBhdGgudHh0IGZpbGUgY29udGVudHMh", + }, + }, + { + name: "no-patterns", + globs: []string{}, + files: []string{"test-fixtures/last/path.txt", "test-fixtures/another-path.txt", "test-fixtures/a-path.txt"}, + expected: map[source.Location]string{}, + }, + { + name: "all-txt", + globs: []string{"**/*.txt"}, + files: allFiles, + expected: map[source.Location]string{ + source.NewLocation("test-fixtures/last/path.txt"): "dGVzdC1maXh0dXJlcy9sYXN0L3BhdGgudHh0IGZpbGUgY29udGVudHMh", + source.NewLocation("test-fixtures/another-path.txt"): "dGVzdC1maXh0dXJlcy9hbm90aGVyLXBhdGgudHh0IGZpbGUgY29udGVudHMh", + source.NewLocation("test-fixtures/a-path.txt"): "dGVzdC1maXh0dXJlcy9hLXBhdGgudHh0IGZpbGUgY29udGVudHMh", + }, + }, + { + name: "subpath", + globs: []string{"test-fixtures/*.txt"}, + files: allFiles, + expected: map[source.Location]string{ + source.NewLocation("test-fixtures/another-path.txt"): "dGVzdC1maXh0dXJlcy9hbm90aGVyLXBhdGgudHh0IGZpbGUgY29udGVudHMh", + source.NewLocation("test-fixtures/a-path.txt"): "dGVzdC1maXh0dXJlcy9hLXBhdGgudHh0IGZpbGUgY29udGVudHMh", + }, + }, + { + name: "size-filter", + maxSize: 42, + globs: []string{"**/*.txt"}, + files: allFiles, + expected: map[source.Location]string{ + source.NewLocation("test-fixtures/last/path.txt"): "dGVzdC1maXh0dXJlcy9sYXN0L3BhdGgudHh0IGZpbGUgY29udGVudHMh", + source.NewLocation("test-fixtures/a-path.txt"): "dGVzdC1maXh0dXJlcy9hLXBhdGgudHh0IGZpbGUgY29udGVudHMh", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + c, err := NewContentsCataloger(test.globs, test.maxSize) + if err != nil { + t.Fatalf("could not create cataloger: %+v", err) + } + + resolver := source.NewMockResolverForPaths(test.files...) + actual, err := c.Catalog(resolver) + if err != nil && !test.catalogErr { + t.Fatalf("could not catalog (but should have been able to): %+v", err) + } else if err == nil && test.catalogErr { + t.Fatalf("expected catalog error but did not get one") + } else if test.catalogErr && err != nil { + return + } + + assert.Equal(t, test.expected, actual, "mismatched contents") + + }) + } +} diff --git a/syft/source/mock_resolver.go b/syft/source/mock_resolver.go index d97278722..9bd569475 100644 --- a/syft/source/mock_resolver.go +++ b/syft/source/mock_resolver.go @@ -5,7 +5,7 @@ import ( "io" "os" - "github.com/anchore/syft/internal/file" + "github.com/bmatcuk/doublestar/v2" ) var _ FileResolver = (*MockResolver)(nil) @@ -84,7 +84,11 @@ func (r MockResolver) FilesByGlob(patterns ...string) ([]Location, error) { var results []Location for _, pattern := range patterns { for _, location := range r.Locations { - if file.GlobMatch(pattern, location.RealPath) { + matches, err := doublestar.Match(pattern, location.RealPath) + if err != nil { + return nil, err + } + if matches { results = append(results, location) } } diff --git a/test/cli/power_user_cmd_test.go b/test/cli/power_user_cmd_test.go index 3246c20c4..fb7c8cf48 100644 --- a/test/cli/power_user_cmd_test.go +++ b/test/cli/power_user_cmd_test.go @@ -51,6 +51,17 @@ func TestPowerUserCmdFlags(t *testing.T) { assertSuccessfulReturnCode, }, }, + { + name: "content-cataloger-wired-up", + args: []string{"power-user", "docker-archive:" + getFixtureImage(t, "image-secrets")}, + env: map[string]string{ + "SYFT_CONTENTS_GLOBS": "/api-key.txt", + }, + assertions: []traitAssertion{ + assertInOutput(`"contents": "c29tZV9BcEkta0V5ID0gIjEyMzQ1QTdhOTAxYjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MCIK"`), // proof of the content cataloger + assertSuccessfulReturnCode, + }, + }, } for _, test := range tests {