diff --git a/syft/source/all_layers_resolver.go b/syft/source/all_layers_resolver.go index f2f248ee2..6437fb4bc 100644 --- a/syft/source/all_layers_resolver.go +++ b/syft/source/all_layers_resolver.go @@ -2,10 +2,8 @@ package source import ( "archive/tar" - "bytes" "fmt" "io" - "io/ioutil" "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/filetree" @@ -13,16 +11,16 @@ import ( "github.com/anchore/syft/internal/log" ) -var _ Resolver = (*AllLayersResolver)(nil) +var _ FileResolver = (*allLayersResolver)(nil) -// AllLayersResolver implements path and content access for the AllLayers source option for container image data sources. -type AllLayersResolver struct { +// allLayersResolver implements path and content access for the AllLayers source option for container image data sources. +type allLayersResolver struct { img *image.Image layers []int } -// NewAllLayersResolver returns a new resolver from the perspective of all image layers for the given image. -func NewAllLayersResolver(img *image.Image) (*AllLayersResolver, error) { +// newAllLayersResolver returns a new resolver from the perspective of all image layers for the given image. +func newAllLayersResolver(img *image.Image) (*allLayersResolver, error) { if len(img.Layers) == 0 { return nil, fmt.Errorf("the image does not contain any layers") } @@ -31,14 +29,14 @@ func NewAllLayersResolver(img *image.Image) (*AllLayersResolver, error) { for idx := range img.Layers { layers = append(layers, idx) } - return &AllLayersResolver{ + return &allLayersResolver{ img: img, layers: layers, }, nil } // HasPath indicates if the given path exists in the underlying source. -func (r *AllLayersResolver) HasPath(path string) bool { +func (r *allLayersResolver) HasPath(path string) bool { p := file.Path(path) for _, layerIdx := range r.layers { tree := r.img.Layers[layerIdx].Tree @@ -49,7 +47,7 @@ func (r *AllLayersResolver) HasPath(path string) bool { return false } -func (r *AllLayersResolver) fileByRef(ref file.Reference, uniqueFileIDs file.ReferenceSet, layerIdx int) ([]file.Reference, error) { +func (r *allLayersResolver) fileByRef(ref file.Reference, uniqueFileIDs file.ReferenceSet, layerIdx int) ([]file.Reference, error) { uniqueFiles := make([]file.Reference, 0) // since there is potentially considerable work for each symlink/hardlink that needs to be resolved, let's check to see if this is a symlink/hardlink first @@ -80,7 +78,7 @@ func (r *AllLayersResolver) fileByRef(ref file.Reference, uniqueFileIDs file.Ref } // FilesByPath returns all file.References that match the given paths from any layer in the image. -func (r *AllLayersResolver) FilesByPath(paths ...string) ([]Location, error) { +func (r *allLayersResolver) FilesByPath(paths ...string) ([]Location, error) { uniqueFileIDs := file.NewFileReferenceSet() uniqueLocations := make([]Location, 0) @@ -123,7 +121,7 @@ func (r *AllLayersResolver) FilesByPath(paths ...string) ([]Location, error) { // FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image. // nolint:gocognit -func (r *AllLayersResolver) FilesByGlob(patterns ...string) ([]Location, error) { +func (r *allLayersResolver) FilesByGlob(patterns ...string) ([]Location, error) { uniqueFileIDs := file.NewFileReferenceSet() uniqueLocations := make([]Location, 0) @@ -164,7 +162,7 @@ func (r *AllLayersResolver) FilesByGlob(patterns ...string) ([]Location, error) // RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference. // This is helpful when attempting to find a file that is in the same layer or lower as another file. -func (r *AllLayersResolver) RelativeFileByPath(location Location, path string) *Location { +func (r *allLayersResolver) RelativeFileByPath(location Location, path string) *Location { entry, err := r.img.FileCatalog.Get(location.ref) if err != nil { return nil @@ -184,55 +182,26 @@ func (r *AllLayersResolver) RelativeFileByPath(location Location, path string) * return &relativeLocation } -// MultipleFileContentsByLocation returns the file contents for all file.References relative to the image. Note that a -// file.Reference is a path relative to a particular layer. -func (r *AllLayersResolver) MultipleFileContentsByLocation(locations []Location) (map[Location]io.ReadCloser, error) { - return mapLocationRefs(r.img.MultipleFileContentsByRef, locations) -} - // FileContentsByLocation fetches file contents for a single file reference, irregardless of the source layer. // If the path does not exist an error is returned. -func (r *AllLayersResolver) FileContentsByLocation(location Location) (io.ReadCloser, error) { +func (r *allLayersResolver) FileContentsByLocation(location Location) (io.ReadCloser, error) { return r.img.FileContentsByRef(location.ref) } -type multiContentFetcher func(refs ...file.Reference) (map[file.Reference]io.ReadCloser, error) - -func mapLocationRefs(callback multiContentFetcher, locations []Location) (map[Location]io.ReadCloser, error) { - var fileRefs = make([]file.Reference, len(locations)) - var locationByRefs = make(map[file.Reference][]Location) - var results = make(map[Location]io.ReadCloser) - - for i, location := range locations { - locationByRefs[location.ref] = append(locationByRefs[location.ref], location) - fileRefs[i] = location.ref - } - - contentsByRef, err := callback(fileRefs...) - if err != nil { - return nil, err - } - - // TODO: this is not tested, we need a test case that covers a mapLocationRefs which has multiple Locations with the same reference in the request. The io.Reader should be copied. - for ref, content := range contentsByRef { - mappedLocations := locationByRefs[ref] - switch { - case len(mappedLocations) > 1: - // TODO: fixme... this can lead to lots of unexpected memory usage in unusual circumstances (cache is not leveraged for large files). - // stereoscope wont duplicate content requests if the caller asks for the same file multiple times... thats up to the caller - contentsBytes, err := ioutil.ReadAll(content) - if err != nil { - return nil, fmt.Errorf("unable to read ref=%+v :%w", ref, err) +func (r *allLayersResolver) AllLocations() <-chan Location { + results := make(chan Location) + go func() { + defer close(results) + for _, layerIdx := range r.layers { + tree := r.img.Layers[layerIdx].Tree + for _, ref := range tree.AllFiles() { + results <- NewLocationFromImage(string(ref.RealPath), ref, r.img) } - for _, loc := range mappedLocations { - results[loc] = ioutil.NopCloser(bytes.NewReader(contentsBytes)) - } - - case len(mappedLocations) == 1: - results[locationByRefs[ref][0]] = content - default: - return nil, fmt.Errorf("unexpected ref-location count=%d for ref=%v", len(mappedLocations), ref) } - } - return results, nil + }() + return results +} + +func (r *allLayersResolver) FileMetadataByLocation(location Location) (FileMetadata, error) { + return fileMetadataByLocation(r.img, location) } diff --git a/syft/source/all_layers_resolver_test.go b/syft/source/all_layers_resolver_test.go index 54b50c38b..421a6663d 100644 --- a/syft/source/all_layers_resolver_test.go +++ b/syft/source/all_layers_resolver_test.go @@ -82,10 +82,9 @@ func TestAllLayersResolver_FilesByPath(t *testing.T) { } for _, c := range cases { t.Run(c.name, func(t *testing.T) { - img, cleanup := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") - defer cleanup() + img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") - resolver, err := NewAllLayersResolver(img) + resolver, err := newAllLayersResolver(img) if err != nil { t.Fatalf("could not create resolver: %+v", err) } @@ -201,10 +200,9 @@ func TestAllLayersResolver_FilesByGlob(t *testing.T) { } for _, c := range cases { t.Run(c.name, func(t *testing.T) { - img, cleanup := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") - defer cleanup() + img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") - resolver, err := NewAllLayersResolver(img) + resolver, err := newAllLayersResolver(img) if err != nil { t.Fatalf("could not create resolver: %+v", err) } diff --git a/syft/source/content_requester.go b/syft/source/content_requester.go deleted file mode 100644 index ef99513c0..000000000 --- a/syft/source/content_requester.go +++ /dev/null @@ -1,56 +0,0 @@ -package source - -import "sync" - -// ContentRequester is an object tailored for taking source.Location objects which file contents will be resolved -// upon invoking Execute(). -type ContentRequester struct { - request map[Location][]*FileData - lock sync.Mutex -} - -// NewContentRequester creates a new ContentRequester object with the given initial request data. -func NewContentRequester(data ...*FileData) *ContentRequester { - requester := &ContentRequester{ - request: make(map[Location][]*FileData), - } - for _, d := range data { - requester.Add(d) - } - return requester -} - -// Add appends a new single FileData containing a source.Location to later have the contents fetched and stored within -// the given FileData object. -func (r *ContentRequester) Add(data *FileData) { - r.lock.Lock() - defer r.lock.Unlock() - - r.request[data.Location] = append(r.request[data.Location], data) -} - -// Execute takes the previously provided source.Location's and resolves the file contents, storing the results within -// the previously provided FileData objects. -func (r *ContentRequester) Execute(resolver ContentResolver) error { - r.lock.Lock() - defer r.lock.Unlock() - - var locations = make([]Location, len(r.request)) - idx := 0 - for l := range r.request { - locations[idx] = l - idx++ - } - - response, err := resolver.MultipleFileContentsByLocation(locations) - if err != nil { - return err - } - - for l, contents := range response { - for i := range r.request[l] { - r.request[l][i].Contents = contents - } - } - return nil -} diff --git a/syft/source/content_requester_test.go b/syft/source/content_requester_test.go deleted file mode 100644 index f46498e63..000000000 --- a/syft/source/content_requester_test.go +++ /dev/null @@ -1,74 +0,0 @@ -package source - -import ( - "io/ioutil" - "testing" - - "github.com/anchore/stereoscope/pkg/imagetest" - "github.com/sergi/go-diff/diffmatchpatch" -) - -func TestContentRequester(t *testing.T) { - tests := []struct { - fixture string - expectedContents map[string]string - }{ - { - fixture: "image-simple", - expectedContents: map[string]string{ - "/somefile-1.txt": "this file has contents", - "/somefile-2.txt": "file-2 contents!", - "/really/nested/file-3.txt": "another file!\nwith lines...", - }, - }, - } - - for _, test := range tests { - t.Run(test.fixture, func(t *testing.T) { - img, cleanup := imagetest.GetFixtureImage(t, "docker-archive", "image-simple") - defer cleanup() - - resolver, err := NewAllLayersResolver(img) - if err != nil { - t.Fatalf("could not create resolver: %+v", err) - } - - var data []*FileData - for path := range test.expectedContents { - - locations, err := resolver.FilesByPath(path) - if err != nil { - t.Fatalf("could not build request: %+v", err) - } - if len(locations) != 1 { - t.Fatalf("bad resolver paths: %+v", locations) - } - - data = append(data, &FileData{ - Location: locations[0], - }) - } - - if err := NewContentRequester(data...).Execute(resolver); err != nil { - t.Fatalf("could not execute request: %+v", err) - } - - for _, entry := range data { - if expected, ok := test.expectedContents[entry.Location.RealPath]; ok { - actualBytes, err := ioutil.ReadAll(entry.Contents) - if err != nil { - t.Fatalf("could not read %q: %+v", entry.Location.RealPath, err) - } - for expected != string(actualBytes) { - t.Errorf("mismatched contents for %q", entry.Location.RealPath) - dmp := diffmatchpatch.New() - diffs := dmp.DiffMain(expected, string(actualBytes), true) - t.Errorf("diff: %s", dmp.DiffPrettyText(diffs)) - } - continue - } - t.Errorf("could not find %q", entry.Location.RealPath) - } - }) - } -} diff --git a/syft/source/directory_resolver.go b/syft/source/directory_resolver.go index 7ef68bae8..9645cfee7 100644 --- a/syft/source/directory_resolver.go +++ b/syft/source/directory_resolver.go @@ -12,35 +12,39 @@ import ( "github.com/bmatcuk/doublestar/v2" ) -var _ Resolver = (*DirectoryResolver)(nil) +var _ FileResolver = (*directoryResolver)(nil) -// DirectoryResolver implements path and content access for the directory data source. -type DirectoryResolver struct { - Path string +// directoryResolver implements path and content access for the directory data source. +type directoryResolver struct { + path string } -func (r DirectoryResolver) requestPath(userPath string) string { +func newDirectoryResolver(path string) *directoryResolver { + return &directoryResolver{path: path} +} + +func (r directoryResolver) requestPath(userPath string) string { fullPath := userPath if filepath.IsAbs(fullPath) { // a path relative to root should be prefixed with the resolvers directory path, otherwise it should be left as is - fullPath = path.Join(r.Path, fullPath) + fullPath = path.Join(r.path, fullPath) } return fullPath } // HasPath indicates if the given path exists in the underlying source. -func (r *DirectoryResolver) HasPath(userPath string) bool { +func (r *directoryResolver) HasPath(userPath string) bool { _, err := os.Stat(r.requestPath(userPath)) return !os.IsNotExist(err) } // Stringer to represent a directory path data source -func (r DirectoryResolver) String() string { - return fmt.Sprintf("dir:%s", r.Path) +func (r directoryResolver) String() string { + return fmt.Sprintf("dir:%s", r.path) } // FilesByPath returns all file.References that match the given paths from the directory. -func (r DirectoryResolver) FilesByPath(userPaths ...string) ([]Location, error) { +func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error) { var references = make([]Location, 0) for _, userPath := range userPaths { @@ -64,11 +68,11 @@ func (r DirectoryResolver) FilesByPath(userPaths ...string) ([]Location, error) } // FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image. -func (r DirectoryResolver) FilesByGlob(patterns ...string) ([]Location, error) { +func (r directoryResolver) FilesByGlob(patterns ...string) ([]Location, error) { result := make([]Location, 0) for _, pattern := range patterns { - pathPattern := path.Join(r.Path, pattern) + pathPattern := path.Join(r.path, pattern) pathMatches, err := doublestar.Glob(pathPattern) if err != nil { return nil, err @@ -93,8 +97,8 @@ func (r DirectoryResolver) FilesByGlob(patterns ...string) ([]Location, error) { // RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference. // This is helpful when attempting to find a file that is in the same layer or lower as another file. For the -// DirectoryResolver, this is a simple path lookup. -func (r *DirectoryResolver) RelativeFileByPath(_ Location, path string) *Location { +// directoryResolver, this is a simple path lookup. +func (r *directoryResolver) RelativeFileByPath(_ Location, path string) *Location { paths, err := r.FilesByPath(path) if err != nil { return nil @@ -106,17 +110,51 @@ func (r *DirectoryResolver) RelativeFileByPath(_ Location, path string) *Locatio return &paths[0] } -// MultipleFileContentsByLocation returns the file contents for all file.References relative a directory. -func (r DirectoryResolver) MultipleFileContentsByLocation(locations []Location) (map[Location]io.ReadCloser, error) { - refContents := make(map[Location]io.ReadCloser) - for _, location := range locations { - refContents[location] = file.NewDeferredReadCloser(location.RealPath) - } - return refContents, nil -} - // FileContentsByLocation fetches file contents for a single file reference relative to a directory. // If the path does not exist an error is returned. -func (r DirectoryResolver) FileContentsByLocation(location Location) (io.ReadCloser, error) { - return file.NewDeferredReadCloser(location.RealPath), nil +func (r directoryResolver) FileContentsByLocation(location Location) (io.ReadCloser, error) { + return file.NewLazyReadCloser(location.RealPath), nil +} + +func (r *directoryResolver) AllLocations() <-chan Location { + results := make(chan Location) + go func() { + defer close(results) + err := filepath.Walk(r.path, + func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + results <- NewLocation(path) + return nil + }) + if err != nil { + log.Errorf("unable to walk path=%q : %+v", r.path, err) + } + }() + return results +} + +func (r *directoryResolver) FileMetadataByLocation(location Location) (FileMetadata, error) { + fi, err := os.Stat(location.RealPath) + if err != nil { + return FileMetadata{}, err + } + + // best effort + ty := UnknownFileType + switch { + case fi.Mode().IsDir(): + ty = Directory + case fi.Mode().IsRegular(): + ty = RegularFile + } + + return FileMetadata{ + Mode: fi.Mode(), + Type: ty, + // unsupported across platforms + UserID: -1, + GroupID: -1, + }, nil } diff --git a/syft/source/directory_resolver_test.go b/syft/source/directory_resolver_test.go index dc88876c7..db0ccd0c1 100644 --- a/syft/source/directory_resolver_test.go +++ b/syft/source/directory_resolver_test.go @@ -57,7 +57,7 @@ func TestDirectoryResolver_FilesByPath(t *testing.T) { } for _, c := range cases { t.Run(c.name, func(t *testing.T) { - resolver := DirectoryResolver{c.root} + resolver := directoryResolver{c.root} hasPath := resolver.HasPath(c.input) if !c.forcePositiveHasPath { @@ -112,7 +112,7 @@ func TestDirectoryResolver_MultipleFilesByPath(t *testing.T) { } for _, c := range cases { t.Run(c.name, func(t *testing.T) { - resolver := DirectoryResolver{"test-fixtures"} + resolver := directoryResolver{"test-fixtures"} refs, err := resolver.FilesByPath(c.input...) if err != nil { @@ -126,59 +126,9 @@ func TestDirectoryResolver_MultipleFilesByPath(t *testing.T) { } } -func TestDirectoryResolver_MultipleFileContentsByRef(t *testing.T) { - cases := []struct { - name string - input []string - refCount int - contents []string - }{ - { - name: "gets multiple file contents", - input: []string{"test-fixtures/image-symlinks/file-1.txt", "test-fixtures/image-symlinks/file-2.txt"}, - refCount: 2, - }, - { - name: "skips non-existing files", - input: []string{"test-fixtures/image-symlinks/bogus.txt", "test-fixtures/image-symlinks/file-1.txt"}, - refCount: 1, - }, - { - name: "does not return anything for non-existing directories", - input: []string{"test-fixtures/non-existing/bogus.txt", "test-fixtures/non-existing/file-1.txt"}, - refCount: 0, - }, - } - for _, c := range cases { - t.Run(c.name, func(t *testing.T) { - locations := make([]Location, 0) - resolver := DirectoryResolver{"test-fixtures"} - - for _, p := range c.input { - newRefs, err := resolver.FilesByPath(p) - if err != nil { - t.Errorf("could not generate locations: %+v", err) - } - for _, ref := range newRefs { - locations = append(locations, ref) - } - } - - contents, err := resolver.MultipleFileContentsByLocation(locations) - if err != nil { - t.Fatalf("unable to generate file contents by ref: %+v", err) - } - if len(contents) != c.refCount { - t.Errorf("unexpected number of locations produced: %d != %d", len(contents), c.refCount) - } - - }) - } -} - func TestDirectoryResolver_FilesByGlobMultiple(t *testing.T) { t.Run("finds multiple matching files", func(t *testing.T) { - resolver := DirectoryResolver{"test-fixtures"} + resolver := directoryResolver{"test-fixtures"} refs, err := resolver.FilesByGlob("image-symlinks/file*") if err != nil { @@ -195,7 +145,7 @@ func TestDirectoryResolver_FilesByGlobMultiple(t *testing.T) { func TestDirectoryResolver_FilesByGlobRecursive(t *testing.T) { t.Run("finds multiple matching files", func(t *testing.T) { - resolver := DirectoryResolver{"test-fixtures/image-symlinks"} + resolver := directoryResolver{"test-fixtures/image-symlinks"} refs, err := resolver.FilesByGlob("**/*.txt") if err != nil { @@ -212,7 +162,7 @@ func TestDirectoryResolver_FilesByGlobRecursive(t *testing.T) { func TestDirectoryResolver_FilesByGlobSingle(t *testing.T) { t.Run("finds multiple matching files", func(t *testing.T) { - resolver := DirectoryResolver{"test-fixtures"} + resolver := directoryResolver{"test-fixtures"} refs, err := resolver.FilesByGlob("image-symlinks/*1.txt") if err != nil { t.Fatalf("could not use resolver: %+v, %+v", err, refs) diff --git a/syft/source/file_data.go b/syft/source/file_data.go deleted file mode 100644 index bd3d0c849..000000000 --- a/syft/source/file_data.go +++ /dev/null @@ -1,8 +0,0 @@ -package source - -import "io" - -type FileData struct { - Location Location - Contents io.ReadCloser -} diff --git a/syft/source/file_metadata.go b/syft/source/file_metadata.go new file mode 100644 index 000000000..45c6a877a --- /dev/null +++ b/syft/source/file_metadata.go @@ -0,0 +1,28 @@ +package source + +import ( + "os" + + "github.com/anchore/stereoscope/pkg/image" +) + +type FileMetadata struct { + Mode os.FileMode + Type FileType + UserID int + GroupID int +} + +func fileMetadataByLocation(img *image.Image, location Location) (FileMetadata, error) { + entry, err := img.FileCatalog.Get(location.ref) + if err != nil { + return FileMetadata{}, err + } + + return FileMetadata{ + Mode: entry.Metadata.Mode, + Type: newFileTypeFromTarHeaderTypeFlag(entry.Metadata.TypeFlag), + UserID: entry.Metadata.UserID, + GroupID: entry.Metadata.GroupID, + }, nil +} diff --git a/syft/source/file_resolver.go b/syft/source/file_resolver.go new file mode 100644 index 000000000..331820af2 --- /dev/null +++ b/syft/source/file_resolver.go @@ -0,0 +1,39 @@ +package source + +import ( + "io" +) + +// FileResolver is an interface that encompasses how to get specific file references and file contents for a generic data source. +type FileResolver interface { + FileContentResolver + FilePathResolver + FileLocationResolver + FileMetadataResolver +} + +// FileContentResolver knows how to get file content for a given Location +type FileContentResolver interface { + FileContentsByLocation(Location) (io.ReadCloser, error) +} + +type FileMetadataResolver interface { + FileMetadataByLocation(Location) (FileMetadata, error) +} + +// FilePathResolver knows how to get a Location for given string paths and globs +type FilePathResolver interface { + // HasPath indicates if the given path exists in the underlying source. + HasPath(string) bool + // FilesByPath fetches a set of file references which have the given path (for an image, there may be multiple matches) + FilesByPath(paths ...string) ([]Location, error) + // FilesByGlob fetches a set of file references which the given glob matches + FilesByGlob(patterns ...string) ([]Location, error) + // RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference. + // This is helpful when attempting to find a file that is in the same layer or lower as another file. + RelativeFileByPath(_ Location, path string) *Location +} + +type FileLocationResolver interface { + AllLocations() <-chan Location +} diff --git a/syft/source/file_type.go b/syft/source/file_type.go new file mode 100644 index 000000000..3718b12c4 --- /dev/null +++ b/syft/source/file_type.go @@ -0,0 +1,34 @@ +package source + +const ( + UnknownFileType FileType = "unknownFileType" + RegularFile FileType = "regularFile" + HardLink FileType = "hardLink" + SymbolicLink FileType = "symbolicLink" + CharacterDevice FileType = "characterDevice" + BlockDevice FileType = "blockDevice" + Directory FileType = "directory" + FIFONode FileType = "fifoNode" +) + +type FileType string + +func newFileTypeFromTarHeaderTypeFlag(flag byte) FileType { + switch flag { + case '0', '\x00': + return RegularFile + case '1': + return HardLink + case '2': + return SymbolicLink + case '3': + return CharacterDevice + case '4': + return BlockDevice + case '5': + return Directory + case '6': + return FIFONode + } + return UnknownFileType +} diff --git a/syft/source/image_metadata.go b/syft/source/image_metadata.go index a00803caa..38a351ca3 100644 --- a/syft/source/image_metadata.go +++ b/syft/source/image_metadata.go @@ -3,7 +3,7 @@ package source import "github.com/anchore/stereoscope/pkg/image" // ImageMetadata represents all static metadata that defines what a container image is. This is useful to later describe -// "what" was cataloged without needing the more complicated stereoscope Image objects or Resolver objects. +// "what" was cataloged without needing the more complicated stereoscope Image objects or FileResolver objects. type ImageMetadata struct { UserInput string `json:"userInput"` ID string `json:"imageID"` @@ -11,7 +11,6 @@ type ImageMetadata struct { MediaType string `json:"mediaType"` Tags []string `json:"tags"` Size int64 `json:"imageSize"` - Scope Scope `json:"scope"` // specific perspective to catalog Layers []LayerMetadata `json:"layers"` RawManifest []byte `json:"manifest"` RawConfig []byte `json:"config"` @@ -25,7 +24,7 @@ type LayerMetadata struct { } // NewImageMetadata creates a new ImageMetadata object populated from the given stereoscope Image object and user configuration. -func NewImageMetadata(img *image.Image, userInput string, scope Scope) ImageMetadata { +func NewImageMetadata(img *image.Image, userInput string) ImageMetadata { // populate artifacts... tags := make([]string, len(img.Metadata.Tags)) for idx, tag := range img.Metadata.Tags { @@ -34,7 +33,6 @@ func NewImageMetadata(img *image.Image, userInput string, scope Scope) ImageMeta theImg := ImageMetadata{ ID: img.Metadata.ID, UserInput: userInput, - Scope: scope, ManifestDigest: img.Metadata.ManifestDigest, Size: img.Metadata.Size, MediaType: string(img.Metadata.MediaType), diff --git a/syft/source/image_squash_resolver.go b/syft/source/image_squash_resolver.go index 137d498b5..3500a80a2 100644 --- a/syft/source/image_squash_resolver.go +++ b/syft/source/image_squash_resolver.go @@ -9,28 +9,31 @@ import ( "github.com/anchore/stereoscope/pkg/image" ) -var _ Resolver = (*ImageSquashResolver)(nil) +var _ FileResolver = (*imageSquashResolver)(nil) -// ImageSquashResolver implements path and content access for the Squashed source option for container image data sources. -type ImageSquashResolver struct { +// imageSquashResolver implements path and content access for the Squashed source option for container image data sources. +type imageSquashResolver struct { img *image.Image } -// NewImageSquashResolver returns a new resolver from the perspective of the squashed representation for the given image. -func NewImageSquashResolver(img *image.Image) (*ImageSquashResolver, error) { +// newImageSquashResolver returns a new resolver from the perspective of the squashed representation for the given image. +func newImageSquashResolver(img *image.Image) (*imageSquashResolver, error) { if img.SquashedTree() == nil { return nil, fmt.Errorf("the image does not have have a squashed tree") } - return &ImageSquashResolver{img: img}, nil + + return &imageSquashResolver{ + img: img, + }, nil } // HasPath indicates if the given path exists in the underlying source. -func (r *ImageSquashResolver) HasPath(path string) bool { +func (r *imageSquashResolver) HasPath(path string) bool { return r.img.SquashedTree().HasPath(file.Path(path)) } // FilesByPath returns all file.References that match the given paths within the squashed representation of the image. -func (r *ImageSquashResolver) FilesByPath(paths ...string) ([]Location, error) { +func (r *imageSquashResolver) FilesByPath(paths ...string) ([]Location, error) { uniqueFileIDs := file.NewFileReferenceSet() uniqueLocations := make([]Location, 0) @@ -74,7 +77,7 @@ func (r *ImageSquashResolver) FilesByPath(paths ...string) ([]Location, error) { } // FilesByGlob returns all file.References that match the given path glob pattern within the squashed representation of the image. -func (r *ImageSquashResolver) FilesByGlob(patterns ...string) ([]Location, error) { +func (r *imageSquashResolver) FilesByGlob(patterns ...string) ([]Location, error) { uniqueFileIDs := file.NewFileReferenceSet() uniqueLocations := make([]Location, 0) @@ -88,7 +91,9 @@ func (r *ImageSquashResolver) FilesByGlob(patterns ...string) ([]Location, error // don't consider directories (special case: there is no path information for /) if result.MatchPath == "/" { continue - } else if r.img.FileCatalog.Exists(result.Reference) { + } + + if r.img.FileCatalog.Exists(result.Reference) { metadata, err := r.img.FileCatalog.Get(result.Reference) if err != nil { return nil, fmt.Errorf("unable to get file metadata for path=%q: %w", result.MatchPath, err) @@ -116,8 +121,8 @@ func (r *ImageSquashResolver) FilesByGlob(patterns ...string) ([]Location, error // RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference. // This is helpful when attempting to find a file that is in the same layer or lower as another file. For the -// ImageSquashResolver, this is a simple path lookup. -func (r *ImageSquashResolver) RelativeFileByPath(_ Location, path string) *Location { +// imageSquashResolver, this is a simple path lookup. +func (r *imageSquashResolver) RelativeFileByPath(_ Location, path string) *Location { paths, err := r.FilesByPath(path) if err != nil { return nil @@ -129,14 +134,23 @@ func (r *ImageSquashResolver) RelativeFileByPath(_ Location, path string) *Locat return &paths[0] } -// MultipleFileContentsByLocation returns the file contents for all file.References relative to the image. Note that a -// file.Reference is a path relative to a particular layer, in this case only from the squashed representation. -func (r *ImageSquashResolver) MultipleFileContentsByLocation(locations []Location) (map[Location]io.ReadCloser, error) { - return mapLocationRefs(r.img.MultipleFileContentsByRef, locations) -} - // FileContentsByLocation fetches file contents for a single file reference, irregardless of the source layer. // If the path does not exist an error is returned. -func (r *ImageSquashResolver) FileContentsByLocation(location Location) (io.ReadCloser, error) { +func (r *imageSquashResolver) FileContentsByLocation(location Location) (io.ReadCloser, error) { return r.img.FileContentsByRef(location.ref) } + +func (r *imageSquashResolver) AllLocations() <-chan Location { + results := make(chan Location) + go func() { + defer close(results) + for _, ref := range r.img.SquashedTree().AllFiles() { + results <- NewLocationFromImage(string(ref.RealPath), ref, r.img) + } + }() + return results +} + +func (r *imageSquashResolver) FileMetadataByLocation(location Location) (FileMetadata, error) { + return fileMetadataByLocation(r.img, location) +} diff --git a/syft/source/image_squash_resolver_test.go b/syft/source/image_squash_resolver_test.go index a866f0a91..4d8c1556f 100644 --- a/syft/source/image_squash_resolver_test.go +++ b/syft/source/image_squash_resolver_test.go @@ -62,10 +62,9 @@ func TestImageSquashResolver_FilesByPath(t *testing.T) { } for _, c := range cases { t.Run(c.name, func(t *testing.T) { - img, cleanup := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") - defer cleanup() + img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") - resolver, err := NewImageSquashResolver(img) + resolver, err := newImageSquashResolver(img) if err != nil { t.Fatalf("could not create resolver: %+v", err) } @@ -179,10 +178,9 @@ func TestImageSquashResolver_FilesByGlob(t *testing.T) { } for _, c := range cases { t.Run(c.name, func(t *testing.T) { - img, cleanup := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") - defer cleanup() + img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") - resolver, err := NewImageSquashResolver(img) + resolver, err := newImageSquashResolver(img) if err != nil { t.Fatalf("could not create resolver: %+v", err) } diff --git a/syft/source/location.go b/syft/source/location.go index 50083166a..532b01623 100644 --- a/syft/source/location.go +++ b/syft/source/location.go @@ -45,6 +45,14 @@ func NewLocationFromImage(virtualPath string, ref file.Reference, img *image.Ima } } +func NewLocationFromReference(ref file.Reference) Location { + return Location{ + VirtualPath: string(ref.RealPath), + RealPath: string(ref.RealPath), + ref: ref, + } +} + func (l Location) String() string { str := "" if l.ref.ID() != 0 { diff --git a/syft/source/mock_resolver.go b/syft/source/mock_resolver.go index ba815f3c3..31d129449 100644 --- a/syft/source/mock_resolver.go +++ b/syft/source/mock_resolver.go @@ -8,9 +8,9 @@ import ( "github.com/anchore/syft/internal/file" ) -var _ Resolver = (*MockResolver)(nil) +var _ FileResolver = (*MockResolver)(nil) -// MockResolver implements the Resolver interface and is intended for use *only in test code*. +// MockResolver implements the FileResolver interface and is intended for use *only in test code*. // It provides an implementation that can resolve local filesystem paths using only a provided discrete list of file // paths, which are typically paths to test fixtures. type MockResolver struct { @@ -55,20 +55,6 @@ func (r MockResolver) FileContentsByLocation(location Location) (io.ReadCloser, return nil, fmt.Errorf("no file for location: %v", location) } -// MultipleFileContentsByLocation returns the file contents for all specified Locations. -func (r MockResolver) MultipleFileContentsByLocation(locations []Location) (map[Location]io.ReadCloser, error) { - results := make(map[Location]io.ReadCloser) - for _, l := range locations { - contents, err := r.FileContentsByLocation(l) - if err != nil { - return nil, err - } - results[l] = contents - } - - return results, nil -} - // FilesByPath returns all Locations that match the given paths. func (r MockResolver) FilesByPath(paths ...string) ([]Location, error) { var results []Location @@ -110,3 +96,11 @@ func (r MockResolver) RelativeFileByPath(_ Location, path string) *Location { return &paths[0] } + +func (r MockResolver) AllLocations() <-chan Location { + panic("not implemented") +} + +func (r MockResolver) FileMetadataByLocation(Location) (FileMetadata, error) { + panic("not implemented") +} diff --git a/syft/source/resolver.go b/syft/source/resolver.go deleted file mode 100644 index 019536aca..000000000 --- a/syft/source/resolver.go +++ /dev/null @@ -1,46 +0,0 @@ -package source - -import ( - "fmt" - "io" - - "github.com/anchore/stereoscope/pkg/image" -) - -// Resolver is an interface that encompasses how to get specific file references and file contents for a generic data source. -type Resolver interface { - ContentResolver - FileResolver -} - -// ContentResolver knows how to get file content for given file.References -type ContentResolver interface { - FileContentsByLocation(Location) (io.ReadCloser, error) - // TODO: it is possible to be given duplicate locations that will be overridden in the map (key), a subtle problem that coule easily be misued. - MultipleFileContentsByLocation([]Location) (map[Location]io.ReadCloser, error) -} - -// FileResolver knows how to get a Location for given string paths and globs -type FileResolver interface { - // HasPath indicates if the given path exists in the underlying source. - HasPath(path string) bool - // FilesByPath fetches a set of file references which have the given path (for an image, there may be multiple matches) - FilesByPath(paths ...string) ([]Location, error) - // FilesByGlob fetches a set of file references which the given glob matches - FilesByGlob(patterns ...string) ([]Location, error) - // RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference. - // This is helpful when attempting to find a file that is in the same layer or lower as another file. - RelativeFileByPath(_ Location, path string) *Location -} - -// getImageResolver returns the appropriate resolve for a container image given the source option -func getImageResolver(img *image.Image, scope Scope) (Resolver, error) { - switch scope { - case SquashedScope: - return NewImageSquashResolver(img) - case AllLayersScope: - return NewAllLayersResolver(img) - default: - return nil, fmt.Errorf("bad scope provided: %+v", scope) - } -} diff --git a/syft/source/source.go b/syft/source/source.go index 182a87f71..b1e081db6 100644 --- a/syft/source/source.go +++ b/syft/source/source.go @@ -18,7 +18,6 @@ import ( // Source is an object that captures the data source to be cataloged, configuration, and a specific resolver used // in cataloging (based on the data source and configuration) type Source struct { - Resolver Resolver // a Resolver object to use in file path/glob resolution and file contents resolution Image *image.Image // the image object to be cataloged (image only) Metadata Metadata } @@ -26,7 +25,7 @@ type Source struct { type sourceDetector func(string) (image.Source, string, error) // New produces a Source based on userInput like dir: or image:tag -func New(userInput string, o Scope) (Source, func(), error) { +func New(userInput string) (Source, func(), error) { fs := afero.NewOsFs() parsedScheme, location, err := detectScheme(fs, image.DetectSource, userInput) if err != nil { @@ -60,7 +59,7 @@ func New(userInput string, o Scope) (Source, func(), error) { return Source{}, cleanup, fmt.Errorf("could not fetch image '%s': %w", location, err) } - s, err := NewFromImage(img, o, location) + s, err := NewFromImage(img, location) if err != nil { return Source{}, cleanup, fmt.Errorf("could not populate source with image: %w", err) } @@ -73,9 +72,6 @@ func New(userInput string, o Scope) (Source, func(), error) { // NewFromDirectory creates a new source object tailored to catalog a given filesystem directory recursively. func NewFromDirectory(path string) (Source, error) { return Source{ - Resolver: &DirectoryResolver{ - Path: path, - }, Metadata: Metadata{ Scheme: DirectoryScheme, Path: path, @@ -85,22 +81,33 @@ func NewFromDirectory(path string) (Source, error) { // NewFromImage creates a new source object tailored to catalog a given container image, relative to the // option given (e.g. all-layers, squashed, etc) -func NewFromImage(img *image.Image, scope Scope, userImageStr string) (Source, error) { +func NewFromImage(img *image.Image, userImageStr string) (Source, error) { if img == nil { return Source{}, fmt.Errorf("no image given") } - resolver, err := getImageResolver(img, scope) - if err != nil { - return Source{}, fmt.Errorf("could not determine file resolver: %w", err) - } - return Source{ - Resolver: resolver, - Image: img, + Image: img, Metadata: Metadata{ Scheme: ImageScheme, - ImageMetadata: NewImageMetadata(img, userImageStr, scope), + ImageMetadata: NewImageMetadata(img, userImageStr), }, }, nil } + +func (s Source) FileResolver(scope Scope) (FileResolver, error) { + switch s.Metadata.Scheme { + case DirectoryScheme: + return newDirectoryResolver(s.Metadata.Path), nil + case ImageScheme: + switch scope { + case SquashedScope: + return newImageSquashResolver(s.Image) + case AllLayersScope: + return newAllLayersResolver(s.Image) + default: + return nil, fmt.Errorf("bad image scope provided: %+v", scope) + } + } + return nil, fmt.Errorf("unable to determine FilePathResolver with current scheme=%q", s.Metadata.Scheme) +} diff --git a/syft/source/source_test.go b/syft/source/source_test.go index 35117a3f1..72a97653b 100644 --- a/syft/source/source_test.go +++ b/syft/source/source_test.go @@ -1,7 +1,6 @@ package source import ( - "io/ioutil" "os" "testing" @@ -12,18 +11,7 @@ import ( func TestNewFromImageFails(t *testing.T) { t.Run("no image given", func(t *testing.T) { - _, err := NewFromImage(nil, AllLayersScope, "") - if err == nil { - t.Errorf("expected an error condition but none was given") - } - }) -} - -func TestNewFromImageUnknownOption(t *testing.T) { - img := image.Image{} - - t.Run("unknown option is an error", func(t *testing.T) { - _, err := NewFromImage(&img, UnknownScope, "") + _, err := NewFromImage(nil, "") if err == nil { t.Errorf("expected an error condition but none was given") } @@ -37,7 +25,7 @@ func TestNewFromImage(t *testing.T) { } t.Run("create a new source object from image", func(t *testing.T) { - _, err := NewFromImage(&img, AllLayersScope, "") + _, err := NewFromImage(&img, "") if err != nil { t.Errorf("unexpected error when creating a new Locations from img: %+v", err) } @@ -87,8 +75,11 @@ func TestNewFromDirectory(t *testing.T) { if src.Metadata.Path != test.input { t.Errorf("mismatched stringer: '%s' != '%s'", src.Metadata.Path, test.input) } - - refs, err := src.Resolver.FilesByPath(test.inputPaths...) + resolver, err := src.FileResolver(SquashedScope) + if err != nil { + t.Errorf("could not get resolver error: %+v", err) + } + refs, err := resolver.FilesByPath(test.inputPaths...) if err != nil { t.Errorf("FilesByPath call produced an error: %+v", err) } @@ -101,58 +92,6 @@ func TestNewFromDirectory(t *testing.T) { } } -func TestMultipleFileContentsByLocation(t *testing.T) { - testCases := []struct { - desc string - input string - path string - expected string - }{ - { - input: "test-fixtures/path-detected", - desc: "empty file", - path: "test-fixtures/path-detected/empty", - expected: "", - }, - { - input: "test-fixtures/path-detected", - desc: "file has contents", - path: "test-fixtures/path-detected/.vimrc", - expected: "\" A .vimrc file\n", - }, - } - for _, test := range testCases { - t.Run(test.desc, func(t *testing.T) { - p, err := NewFromDirectory(test.input) - if err != nil { - t.Errorf("could not create NewDirScope: %+v", err) - } - locations, err := p.Resolver.FilesByPath(test.path) - if err != nil { - t.Errorf("could not get file references from path: %s, %v", test.path, err) - } - - if len(locations) != 1 { - t.Fatalf("expected a single location to be generated but got: %d", len(locations)) - } - location := locations[0] - - contents, err := p.Resolver.MultipleFileContentsByLocation([]Location{location}) - contentReader := contents[location] - - content, err := ioutil.ReadAll(contentReader) - if err != nil { - t.Fatalf("cannot read contents: %+v", err) - } - - if string(content) != test.expected { - t.Errorf("unexpected contents from file: '%s' != '%s'", content, test.expected) - } - - }) - } -} - func TestFilesByPathDoesNotExist(t *testing.T) { testCases := []struct { desc string @@ -168,11 +107,15 @@ func TestFilesByPathDoesNotExist(t *testing.T) { } for _, test := range testCases { t.Run(test.desc, func(t *testing.T) { - p, err := NewFromDirectory(test.input) + src, err := NewFromDirectory(test.input) if err != nil { t.Errorf("could not create NewDirScope: %+v", err) } - refs, err := p.Resolver.FilesByPath(test.path) + resolver, err := src.FileResolver(SquashedScope) + if err != nil { + t.Errorf("could not get resolver error: %+v", err) + } + refs, err := resolver.FilesByPath(test.path) if err != nil { t.Errorf("could not get file references from path: %s, %v", test.path, err) } @@ -213,12 +156,15 @@ func TestFilesByGlob(t *testing.T) { } for _, test := range testCases { t.Run(test.desc, func(t *testing.T) { - p, err := NewFromDirectory(test.input) + src, err := NewFromDirectory(test.input) if err != nil { t.Errorf("could not create NewDirScope: %+v", err) } - - contents, err := p.Resolver.FilesByGlob(test.glob) + resolver, err := src.FileResolver(SquashedScope) + if err != nil { + t.Errorf("could not get resolver error: %+v", err) + } + contents, err := resolver.FilesByGlob(test.glob) if len(contents) != test.expected { t.Errorf("unexpected number of files found by glob (%s): %d != %d", test.glob, len(contents), test.expected)