mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 16:33:21 +01:00
Use file indexer directly when scanning with file source (#3333)
* Use file indexer when scanning with file source Prevents filesystem walks when scanning a single file, to optimise memory & scan times in case the scanned file lives in a directory containing many files. Signed-off-by: adammcclenaghan <adam@mcclenaghan.co.uk> * Create filetree resolver Shared behaviour for resolving indexed filetrees. Signed-off-by: adammcclenaghan <adam@mcclenaghan.co.uk> --------- Signed-off-by: adammcclenaghan <adam@mcclenaghan.co.uk>
This commit is contained in:
parent
8abd97a5bf
commit
21df38798e
@ -1,17 +1,11 @@
|
||||
package fileresolver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
stereoscopeFile "github.com/anchore/stereoscope/pkg/file"
|
||||
"github.com/anchore/stereoscope/pkg/filetree"
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/internal/windows"
|
||||
)
|
||||
|
||||
var ErrSkipPath = errors.New("skip path")
|
||||
@ -20,11 +14,8 @@ var _ file.Resolver = (*Directory)(nil)
|
||||
|
||||
// Directory implements path and content access for the directory data source.
|
||||
type Directory struct {
|
||||
filetreeResolver
|
||||
path string
|
||||
chroot ChrootContext
|
||||
tree filetree.Reader
|
||||
index filetree.IndexReader
|
||||
searchContext filetree.Searcher
|
||||
indexer *directoryIndexer
|
||||
}
|
||||
|
||||
@ -48,9 +39,11 @@ func newFromDirectoryWithoutIndex(root string, base string, pathFilters ...PathI
|
||||
|
||||
return &Directory{
|
||||
path: cleanRoot,
|
||||
filetreeResolver: filetreeResolver{
|
||||
chroot: *chroot,
|
||||
tree: filetree.New(),
|
||||
index: filetree.NewIndex(),
|
||||
},
|
||||
indexer: newDirectoryIndexer(cleanRoot, cleanBase, pathFilters...),
|
||||
}, nil
|
||||
}
|
||||
@ -66,220 +59,12 @@ func (r *Directory) buildIndex() error {
|
||||
|
||||
r.tree = tree
|
||||
r.index = index
|
||||
r.searchContext = filetree.NewSearchContext(tree, index)
|
||||
r.filetreeResolver.searchContext = filetree.NewSearchContext(tree, index)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r Directory) requestPath(userPath string) (string, error) {
|
||||
return r.chroot.ToNativePath(userPath)
|
||||
}
|
||||
|
||||
// responsePath takes a path from the underlying fs domain and converts it to a path that is relative to the root of the directory resolver.
|
||||
func (r Directory) responsePath(path string) string {
|
||||
return r.chroot.ToChrootPath(path)
|
||||
}
|
||||
|
||||
// HasPath indicates if the given path exists in the underlying source.
|
||||
func (r *Directory) HasPath(userPath string) bool {
|
||||
requestPath, err := r.requestPath(userPath)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return r.tree.HasPath(stereoscopeFile.Path(requestPath))
|
||||
}
|
||||
|
||||
// Stringer to represent a directory path data source
|
||||
func (r Directory) String() string {
|
||||
return fmt.Sprintf("dir:%s", r.path)
|
||||
}
|
||||
|
||||
// FilesByPath returns all file.References that match the given paths from the directory.
|
||||
func (r Directory) FilesByPath(userPaths ...string) ([]file.Location, error) {
|
||||
var references = make([]file.Location, 0)
|
||||
|
||||
for _, userPath := range userPaths {
|
||||
userStrPath, err := r.requestPath(userPath)
|
||||
if err != nil {
|
||||
log.Warnf("unable to get file by path=%q : %+v", userPath, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// we should be resolving symlinks and preserving this information as a AccessPath to the real file
|
||||
ref, err := r.searchContext.SearchByPath(userStrPath, filetree.FollowBasenameLinks)
|
||||
if err != nil {
|
||||
log.Tracef("unable to evaluate symlink for path=%q : %+v", userPath, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if !ref.HasReference() {
|
||||
continue
|
||||
}
|
||||
|
||||
entry, err := r.index.Get(*ref.Reference)
|
||||
if err != nil {
|
||||
log.Warnf("unable to get file by path=%q : %+v", userPath, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// don't consider directories
|
||||
if entry.Metadata.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
if windows.HostRunningOnWindows() {
|
||||
userStrPath = windows.ToPosix(userStrPath)
|
||||
}
|
||||
|
||||
if ref.HasReference() {
|
||||
references = append(references,
|
||||
file.NewVirtualLocationFromDirectory(
|
||||
r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root
|
||||
r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root
|
||||
*ref.Reference,
|
||||
),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return references, nil
|
||||
}
|
||||
|
||||
func (r Directory) requestGlob(pattern string) (string, error) {
|
||||
return r.chroot.ToNativeGlob(pattern)
|
||||
}
|
||||
|
||||
// FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image.
|
||||
func (r Directory) FilesByGlob(patterns ...string) ([]file.Location, error) {
|
||||
uniqueFileIDs := stereoscopeFile.NewFileReferenceSet()
|
||||
uniqueLocations := make([]file.Location, 0)
|
||||
|
||||
for _, pattern := range patterns {
|
||||
requestGlob, err := r.requestGlob(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
refVias, err := r.searchContext.SearchByGlob(requestGlob, filetree.FollowBasenameLinks)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, refVia := range refVias {
|
||||
if !refVia.HasReference() || uniqueFileIDs.Contains(*refVia.Reference) {
|
||||
continue
|
||||
}
|
||||
entry, err := r.index.Get(*refVia.Reference)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to get file metadata for reference %s: %w", refVia.Reference.RealPath, err)
|
||||
}
|
||||
|
||||
// don't consider directories
|
||||
if entry.Metadata.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
loc := file.NewVirtualLocationFromDirectory(
|
||||
r.responsePath(string(refVia.Reference.RealPath)), // the actual path relative to the resolver root
|
||||
r.responsePath(string(refVia.RequestPath)), // the path used to access this file, relative to the resolver root
|
||||
*refVia.Reference,
|
||||
)
|
||||
uniqueFileIDs.Add(*refVia.Reference)
|
||||
uniqueLocations = append(uniqueLocations, loc)
|
||||
}
|
||||
}
|
||||
|
||||
return uniqueLocations, nil
|
||||
}
|
||||
|
||||
// RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference.
|
||||
// This is helpful when attempting to find a file that is in the same layer or lower as another file. For the
|
||||
// Directory, this is a simple path lookup.
|
||||
func (r *Directory) RelativeFileByPath(_ file.Location, path string) *file.Location {
|
||||
paths, err := r.FilesByPath(path)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
if len(paths) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return &paths[0]
|
||||
}
|
||||
|
||||
// FileContentsByLocation fetches file contents for a single file reference relative to a directory.
|
||||
// If the path does not exist an error is returned.
|
||||
func (r Directory) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
|
||||
if location.RealPath == "" {
|
||||
return nil, errors.New("empty path given")
|
||||
}
|
||||
|
||||
entry, err := r.index.Get(location.Reference())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// don't consider directories
|
||||
if entry.Type == stereoscopeFile.TypeDirectory {
|
||||
return nil, fmt.Errorf("cannot read contents of non-file %q", location.Reference().RealPath)
|
||||
}
|
||||
|
||||
// RealPath is posix so for windows directory resolver we need to translate
|
||||
// to its true on disk path.
|
||||
filePath := string(location.Reference().RealPath)
|
||||
if windows.HostRunningOnWindows() {
|
||||
filePath = windows.FromPosix(filePath)
|
||||
}
|
||||
|
||||
return stereoscopeFile.NewLazyReadCloser(filePath), nil
|
||||
}
|
||||
|
||||
func (r *Directory) AllLocations(ctx context.Context) <-chan file.Location {
|
||||
results := make(chan file.Location)
|
||||
go func() {
|
||||
defer close(results)
|
||||
for _, ref := range r.tree.AllFiles(stereoscopeFile.AllTypes()...) {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case results <- file.NewLocationFromDirectory(r.responsePath(string(ref.RealPath)), ref):
|
||||
continue
|
||||
}
|
||||
}
|
||||
}()
|
||||
return results
|
||||
}
|
||||
|
||||
func (r *Directory) FileMetadataByLocation(location file.Location) (file.Metadata, error) {
|
||||
entry, err := r.index.Get(location.Reference())
|
||||
if err != nil {
|
||||
return file.Metadata{}, fmt.Errorf("location: %+v : %w", location, os.ErrNotExist)
|
||||
}
|
||||
|
||||
return entry.Metadata, nil
|
||||
}
|
||||
|
||||
func (r *Directory) FilesByMIMEType(types ...string) ([]file.Location, error) {
|
||||
uniqueFileIDs := stereoscopeFile.NewFileReferenceSet()
|
||||
uniqueLocations := make([]file.Location, 0)
|
||||
|
||||
refVias, err := r.searchContext.SearchByMIMEType(types...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, refVia := range refVias {
|
||||
if !refVia.HasReference() {
|
||||
continue
|
||||
}
|
||||
if uniqueFileIDs.Contains(*refVia.Reference) {
|
||||
continue
|
||||
}
|
||||
location := file.NewVirtualLocationFromDirectory(
|
||||
r.responsePath(string(refVia.Reference.RealPath)),
|
||||
r.responsePath(string(refVia.RequestPath)),
|
||||
*refVia.Reference,
|
||||
)
|
||||
uniqueFileIDs.Add(*refVia.Reference)
|
||||
uniqueLocations = append(uniqueLocations, location)
|
||||
}
|
||||
|
||||
return uniqueLocations, nil
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
62
syft/internal/fileresolver/file.go
Normal file
62
syft/internal/fileresolver/file.go
Normal file
@ -0,0 +1,62 @@
|
||||
package fileresolver
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/anchore/stereoscope/pkg/filetree"
|
||||
"github.com/anchore/syft/syft/file"
|
||||
)
|
||||
|
||||
// Compile time assurance that we meet the Resolver interface.
|
||||
var _ file.Resolver = (*File)(nil)
|
||||
|
||||
// File implements path and content access for the file data source.
|
||||
type File struct {
|
||||
filetreeResolver
|
||||
path string
|
||||
indexer *fileIndexer
|
||||
}
|
||||
|
||||
// parent should be the symlink free absolute path to the parent directory
|
||||
// path is the filepath of the file we're creating content access for
|
||||
func NewFromFile(parent, path string, pathFilters ...PathIndexVisitor) (*File, error) {
|
||||
chroot, err := NewChrootContextFromCWD(parent, parent)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to interpret chroot context: %w", err)
|
||||
}
|
||||
|
||||
cleanBase := chroot.Base()
|
||||
|
||||
file := &File{
|
||||
path: path,
|
||||
filetreeResolver: filetreeResolver{
|
||||
chroot: *chroot,
|
||||
tree: filetree.New(),
|
||||
index: filetree.NewIndex(),
|
||||
},
|
||||
indexer: newFileIndexer(path, cleanBase, pathFilters...),
|
||||
}
|
||||
|
||||
return file, file.buildIndex()
|
||||
}
|
||||
|
||||
func (r *File) buildIndex() error {
|
||||
if r.indexer == nil {
|
||||
return fmt.Errorf("no file indexer configured")
|
||||
}
|
||||
tree, index, err := r.indexer.build()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
r.tree = tree
|
||||
r.index = index
|
||||
r.filetreeResolver.searchContext = filetree.NewSearchContext(tree, index)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stringer to represent a file path data source
|
||||
func (r File) String() string {
|
||||
return fmt.Sprintf("file:%s", r.path)
|
||||
}
|
||||
223
syft/internal/fileresolver/file_indexer.go
Normal file
223
syft/internal/fileresolver/file_indexer.go
Normal file
@ -0,0 +1,223 @@
|
||||
package fileresolver
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/anchore/stereoscope/pkg/file"
|
||||
"github.com/anchore/stereoscope/pkg/filetree"
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/internal/windows"
|
||||
"github.com/wagoodman/go-progress"
|
||||
)
|
||||
|
||||
type fileIndexer struct {
|
||||
path string
|
||||
base string
|
||||
pathIndexVisitors []PathIndexVisitor
|
||||
errPaths map[string]error
|
||||
tree filetree.ReadWriter
|
||||
index filetree.Index
|
||||
}
|
||||
|
||||
func newFileIndexer(path, base string, visitors ...PathIndexVisitor) *fileIndexer {
|
||||
i := &fileIndexer{
|
||||
path: path,
|
||||
base: base,
|
||||
tree: filetree.New(),
|
||||
index: filetree.NewIndex(),
|
||||
pathIndexVisitors: append(
|
||||
[]PathIndexVisitor{
|
||||
requireFileInfo,
|
||||
disallowByFileType,
|
||||
skipPathsByMountTypeAndName(path),
|
||||
},
|
||||
visitors...,
|
||||
),
|
||||
errPaths: make(map[string]error),
|
||||
}
|
||||
|
||||
return i
|
||||
}
|
||||
|
||||
// Build the indexer
|
||||
func (r *fileIndexer) build() (filetree.Reader, filetree.IndexReader, error) {
|
||||
return r.tree, r.index, index(r.path, r.indexPath)
|
||||
}
|
||||
|
||||
// Index file at the given path
|
||||
// A file indexer simply indexes the file and its directory.
|
||||
func index(path string, indexer func(string, *progress.Stage) error) error {
|
||||
// We want to index the file at the provided path and its parent directory.
|
||||
// We need to probably check that we have file access
|
||||
// We also need to determine what to do when the file itself is a symlink.
|
||||
stager, prog := indexingProgress(path)
|
||||
defer prog.SetCompleted()
|
||||
|
||||
err := indexer(path, stager)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to index filesystem path=%q: %w", path, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// indexPath will index the file at the provided path as well as its parent directory.
|
||||
// It expects path to be a file, not a directory.
|
||||
// If a directory is provided then an error will be returned. Additionally, any IO or
|
||||
// permissions errors on the file at path or its parent directory will return an error.
|
||||
// Filter functions provided to the indexer are honoured, so if the path provided (or its parent
|
||||
// directory) is filtered by a filter function, an error is returned.
|
||||
func (r *fileIndexer) indexPath(path string, stager *progress.Stage) error {
|
||||
log.WithFields("path", path).Trace("indexing file path")
|
||||
|
||||
absPath, err := filepath.Abs(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Protect against callers trying to call file_indexer with directories
|
||||
fi, err := os.Stat(absPath)
|
||||
// The directory indexer ignores stat errors, however this file indexer won't ignore them
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to stat path=%q: %w", path, err)
|
||||
}
|
||||
if fi.IsDir() {
|
||||
return fmt.Errorf("unable to index file, given path was a directory=%q", path)
|
||||
}
|
||||
|
||||
absSymlinkFreeFilePath, err := absoluteSymlinkFreePathToFile(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Now index the file and its parent directory
|
||||
// We try to index the parent directory first, because if the parent directory
|
||||
// is ignored by any filter function, then we must ensure we also ignore the file.
|
||||
absSymlinkFreeParent, err := absoluteSymlinkFreePathToParent(absSymlinkFreeFilePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
parentFi, err := os.Stat(absSymlinkFreeParent)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to stat parent of file=%q: %w", absSymlinkFreeParent, err)
|
||||
}
|
||||
|
||||
stager.Current = absSymlinkFreeParent
|
||||
indexParentErr := r.filterAndIndex(absSymlinkFreeParent, parentFi)
|
||||
if indexParentErr != nil {
|
||||
return indexParentErr
|
||||
}
|
||||
|
||||
// We have indexed the parent successfully, now attempt to index the file.
|
||||
stager.Current = absSymlinkFreeFilePath
|
||||
indexFileErr := r.filterAndIndex(absSymlinkFreeFilePath, fi)
|
||||
if indexFileErr != nil {
|
||||
return indexFileErr
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *fileIndexer) filterAndIndex(path string, info os.FileInfo) error {
|
||||
// check if any of the filters want us to ignore this path
|
||||
for _, filterFn := range r.pathIndexVisitors {
|
||||
if filterFn == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if filterErr := filterFn(r.base, path, info, nil); filterErr != nil {
|
||||
// A filter function wants us to ignore this path, honour it
|
||||
return filterErr
|
||||
}
|
||||
}
|
||||
|
||||
// here we check to see if we need to normalize paths to posix on the way in coming from windows
|
||||
if windows.HostRunningOnWindows() {
|
||||
path = windows.ToPosix(path)
|
||||
}
|
||||
|
||||
err := r.addPathToIndex(path, info)
|
||||
// If we hit file access errors, isFileAccessErr will handle logging & adding
|
||||
// the path to the errPaths map.
|
||||
// While the directory_indexer does not let these cause the indexer to throw
|
||||
// we will here, as not having access to the file we index for a file source
|
||||
// probably makes the file source creation useless? I need to check with Syft maintainers.
|
||||
// This also poses the question, is errPaths worthwhile for file_indexer?
|
||||
if r.isFileAccessErr(path, err) {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Add path to index. File indexer doesn't need to support symlink, as we should have abs symlink free path.
|
||||
// If we somehow get a symlink here, report as an error.
|
||||
func (r *fileIndexer) addPathToIndex(path string, info os.FileInfo) error {
|
||||
switch t := file.TypeFromMode(info.Mode()); t {
|
||||
case file.TypeDirectory:
|
||||
return r.addDirectoryToIndex(path, info)
|
||||
case file.TypeRegular:
|
||||
return r.addFileToIndex(path, info)
|
||||
default:
|
||||
return fmt.Errorf("unsupported file type: %s", t)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *fileIndexer) addDirectoryToIndex(path string, info os.FileInfo) error {
|
||||
ref, err := r.tree.AddDir(file.Path(path))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
metadata := file.NewMetadataFromPath(path, info)
|
||||
r.index.Add(*ref, metadata)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *fileIndexer) addFileToIndex(path string, info os.FileInfo) error {
|
||||
ref, err := r.tree.AddFile(file.Path(path))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
metadata := file.NewMetadataFromPath(path, info)
|
||||
r.index.Add(*ref, metadata)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get absolute symlink free path to parent of the file
|
||||
func absoluteSymlinkFreePathToParent(path string) (string, error) {
|
||||
absFilePath, err := absoluteSymlinkFreePathToFile(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Dir(absFilePath), nil
|
||||
}
|
||||
|
||||
// Get absolute symlink free path to the file
|
||||
func absoluteSymlinkFreePathToFile(path string) (string, error) {
|
||||
absAnalysisPath, err := filepath.Abs(path)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to get absolute path for analysis path=%q: %w", path, err)
|
||||
}
|
||||
dereferencedAbsAnalysisPath, err := filepath.EvalSymlinks(absAnalysisPath)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to get absolute path for analysis path=%q: %w", path, err)
|
||||
}
|
||||
return dereferencedAbsAnalysisPath, nil
|
||||
}
|
||||
|
||||
func (r *fileIndexer) isFileAccessErr(path string, err error) bool {
|
||||
// don't allow for errors to stop indexing, keep track of the paths and continue.
|
||||
if err != nil {
|
||||
log.Warnf("unable to access path=%q: %+v", path, err)
|
||||
r.errPaths[path] = err
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
103
syft/internal/fileresolver/file_indexer_test.go
Normal file
103
syft/internal/fileresolver/file_indexer_test.go
Normal file
@ -0,0 +1,103 @@
|
||||
package fileresolver
|
||||
|
||||
import (
|
||||
"github.com/anchore/stereoscope/pkg/file"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// - Verify that both the parent and the path are indexed
|
||||
func Test_index(t *testing.T) {
|
||||
testPath := "test-fixtures/system_paths/target/home/place"
|
||||
indexer := newFileIndexer(testPath, "", make([]PathIndexVisitor, 0)...)
|
||||
tree, index, err := indexer.build()
|
||||
require.NoError(t, err)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
path string
|
||||
}{
|
||||
{
|
||||
name: "has path",
|
||||
path: "test-fixtures/system_paths/target/home/place",
|
||||
},
|
||||
{
|
||||
name: "has parent dir",
|
||||
path: "test-fixtures/system_paths/target/home",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
info, err := os.Stat(test.path)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// note: the index uses absolute paths, so assertions MUST keep this in mind
|
||||
cwd, err := os.Getwd()
|
||||
require.NoError(t, err)
|
||||
|
||||
p := file.Path(path.Join(cwd, test.path))
|
||||
assert.Equal(t, true, tree.HasPath(p))
|
||||
exists, ref, err := tree.File(p)
|
||||
assert.Equal(t, true, exists)
|
||||
if assert.NoError(t, err) {
|
||||
return
|
||||
}
|
||||
|
||||
entry, err := index.Get(*ref.Reference)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, info.Mode(), entry.Mode)
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// - Verify that directories are rejected
|
||||
func Test_indexRejectsDirectory(t *testing.T) {
|
||||
dirPath := "test-fixtures/system_paths/target/home"
|
||||
indexer := newFileIndexer(dirPath, "", make([]PathIndexVisitor, 0)...)
|
||||
_, _, err := indexer.build()
|
||||
require.Error(t, err)
|
||||
}
|
||||
|
||||
// - Verify ignores if filterAndIndex sets up a filter for the filepath
|
||||
func Test_ignoresPathIfFiltered(t *testing.T) {
|
||||
testPath := "test-fixtures/system_paths/target/home/place"
|
||||
cwd, cwdErr := os.Getwd()
|
||||
require.NoError(t, cwdErr)
|
||||
ignorePath := path.Join(cwd, testPath)
|
||||
filterFn := func(_, path string, _ os.FileInfo, _ error) error {
|
||||
if path == ignorePath {
|
||||
return ErrSkipPath
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
indexer := newFileIndexer(testPath, "", filterFn)
|
||||
_, _, err := indexer.build()
|
||||
require.Error(t, err)
|
||||
}
|
||||
|
||||
// - Verify ignores if filterAndIndex sets up a filter for the directory
|
||||
func Test_ignoresPathIfParentFiltered(t *testing.T) {
|
||||
testPath := "test-fixtures/system_paths/target/home/place"
|
||||
parentPath := "test-fixtures/system_paths/target/home"
|
||||
|
||||
cwd, cwdErr := os.Getwd()
|
||||
require.NoError(t, cwdErr)
|
||||
ignorePath := path.Join(cwd, parentPath)
|
||||
filterFn := func(_, path string, _ os.FileInfo, _ error) error {
|
||||
if path == ignorePath {
|
||||
return fs.SkipDir
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
indexer := newFileIndexer(testPath, "", filterFn)
|
||||
_, _, err := indexer.build()
|
||||
require.Error(t, err)
|
||||
}
|
||||
229
syft/internal/fileresolver/filetree_resolver.go
Normal file
229
syft/internal/fileresolver/filetree_resolver.go
Normal file
@ -0,0 +1,229 @@
|
||||
package fileresolver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
stereoscopeFile "github.com/anchore/stereoscope/pkg/file"
|
||||
"github.com/anchore/stereoscope/pkg/filetree"
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/internal/windows"
|
||||
)
|
||||
|
||||
type filetreeResolver struct {
|
||||
chroot ChrootContext
|
||||
tree filetree.Reader
|
||||
index filetree.IndexReader
|
||||
searchContext filetree.Searcher
|
||||
}
|
||||
|
||||
func (r *filetreeResolver) requestPath(userPath string) (string, error) {
|
||||
return r.chroot.ToNativePath(userPath)
|
||||
}
|
||||
|
||||
// responsePath takes a path from the underlying fs domain and converts it to a path that is relative to the root of the file resolver.
|
||||
func (r filetreeResolver) responsePath(path string) string {
|
||||
return r.chroot.ToChrootPath(path)
|
||||
}
|
||||
|
||||
// HasPath indicates if the given path exists in the underlying source.
|
||||
func (r *filetreeResolver) HasPath(userPath string) bool {
|
||||
requestPath, err := r.requestPath(userPath)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return r.tree.HasPath(stereoscopeFile.Path(requestPath))
|
||||
}
|
||||
|
||||
// FilesByPath returns all file.References that match the given paths from the file index.
|
||||
func (r filetreeResolver) FilesByPath(userPaths ...string) ([]file.Location, error) {
|
||||
var references = make([]file.Location, 0)
|
||||
|
||||
for _, userPath := range userPaths {
|
||||
userStrPath, err := r.requestPath(userPath)
|
||||
if err != nil {
|
||||
log.Warnf("unable to get file by path=%q : %+v", userPath, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// we should be resolving symlinks and preserving this information as a AccessPath to the real file
|
||||
ref, err := r.searchContext.SearchByPath(userStrPath, filetree.FollowBasenameLinks)
|
||||
if err != nil {
|
||||
log.Tracef("unable to evaluate symlink for path=%q : %+v", userPath, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if !ref.HasReference() {
|
||||
continue
|
||||
}
|
||||
|
||||
entry, err := r.index.Get(*ref.Reference)
|
||||
if err != nil {
|
||||
log.Warnf("unable to get file by path=%q : %+v", userPath, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// don't consider directories
|
||||
if entry.Metadata.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
if windows.HostRunningOnWindows() {
|
||||
userStrPath = windows.ToPosix(userStrPath)
|
||||
}
|
||||
|
||||
if ref.HasReference() {
|
||||
references = append(references,
|
||||
file.NewVirtualLocationFromDirectory(
|
||||
r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root
|
||||
r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root
|
||||
*ref.Reference,
|
||||
),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return references, nil
|
||||
}
|
||||
|
||||
func (r filetreeResolver) requestGlob(pattern string) (string, error) {
|
||||
return r.chroot.ToNativeGlob(pattern)
|
||||
}
|
||||
|
||||
// FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image.
|
||||
func (r filetreeResolver) FilesByGlob(patterns ...string) ([]file.Location, error) {
|
||||
uniqueFileIDs := stereoscopeFile.NewFileReferenceSet()
|
||||
uniqueLocations := make([]file.Location, 0)
|
||||
|
||||
for _, pattern := range patterns {
|
||||
requestGlob, err := r.requestGlob(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
refVias, err := r.searchContext.SearchByGlob(requestGlob, filetree.FollowBasenameLinks)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, refVia := range refVias {
|
||||
if !refVia.HasReference() || uniqueFileIDs.Contains(*refVia.Reference) {
|
||||
continue
|
||||
}
|
||||
entry, err := r.index.Get(*refVia.Reference)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to get file metadata for reference %s: %w", refVia.Reference.RealPath, err)
|
||||
}
|
||||
|
||||
// don't consider directories
|
||||
if entry.Metadata.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
loc := file.NewVirtualLocationFromDirectory(
|
||||
r.responsePath(string(refVia.Reference.RealPath)), // the actual path relative to the resolver root
|
||||
r.responsePath(string(refVia.RequestPath)), // the path used to access this file, relative to the resolver root
|
||||
*refVia.Reference,
|
||||
)
|
||||
uniqueFileIDs.Add(*refVia.Reference)
|
||||
uniqueLocations = append(uniqueLocations, loc)
|
||||
}
|
||||
}
|
||||
|
||||
return uniqueLocations, nil
|
||||
}
|
||||
|
||||
// RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference.
|
||||
// This is helpful when attempting to find a file that is in the same layer or lower as another file.
|
||||
func (r *filetreeResolver) RelativeFileByPath(_ file.Location, path string) *file.Location {
|
||||
paths, err := r.FilesByPath(path)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
if len(paths) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return &paths[0]
|
||||
}
|
||||
|
||||
// FileContentsByLocation fetches file contents for a single file reference relative to a directory.
|
||||
// If the path does not exist an error is returned.
|
||||
func (r filetreeResolver) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
|
||||
if location.RealPath == "" {
|
||||
return nil, errors.New("empty path given")
|
||||
}
|
||||
|
||||
entry, err := r.index.Get(location.Reference())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// don't consider directories
|
||||
if entry.Type == stereoscopeFile.TypeDirectory {
|
||||
return nil, fmt.Errorf("cannot read contents of non-file %q", location.Reference().RealPath)
|
||||
}
|
||||
|
||||
// RealPath is posix so for windows file resolver we need to translate
|
||||
// to its true on disk path.
|
||||
filePath := string(location.Reference().RealPath)
|
||||
if windows.HostRunningOnWindows() {
|
||||
filePath = windows.FromPosix(filePath)
|
||||
}
|
||||
|
||||
return stereoscopeFile.NewLazyReadCloser(filePath), nil
|
||||
}
|
||||
|
||||
func (r *filetreeResolver) AllLocations(ctx context.Context) <-chan file.Location {
|
||||
results := make(chan file.Location)
|
||||
go func() {
|
||||
defer close(results)
|
||||
for _, ref := range r.tree.AllFiles(stereoscopeFile.AllTypes()...) {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case results <- file.NewLocationFromDirectory(r.responsePath(string(ref.RealPath)), ref):
|
||||
continue
|
||||
}
|
||||
}
|
||||
}()
|
||||
return results
|
||||
}
|
||||
|
||||
func (r *filetreeResolver) FileMetadataByLocation(location file.Location) (file.Metadata, error) {
|
||||
entry, err := r.index.Get(location.Reference())
|
||||
if err != nil {
|
||||
return file.Metadata{}, fmt.Errorf("location: %+v : %w", location, os.ErrNotExist)
|
||||
}
|
||||
|
||||
return entry.Metadata, nil
|
||||
}
|
||||
|
||||
func (r *filetreeResolver) FilesByMIMEType(types ...string) ([]file.Location, error) {
|
||||
uniqueFileIDs := stereoscopeFile.NewFileReferenceSet()
|
||||
uniqueLocations := make([]file.Location, 0)
|
||||
|
||||
refVias, err := r.searchContext.SearchByMIMEType(types...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, refVia := range refVias {
|
||||
if !refVia.HasReference() {
|
||||
continue
|
||||
}
|
||||
if uniqueFileIDs.Contains(*refVia.Reference) {
|
||||
continue
|
||||
}
|
||||
location := file.NewVirtualLocationFromDirectory(
|
||||
r.responsePath(string(refVia.Reference.RealPath)),
|
||||
r.responsePath(string(refVia.RequestPath)),
|
||||
*refVia.Reference,
|
||||
)
|
||||
uniqueFileIDs.Add(*refVia.Reference)
|
||||
uniqueLocations = append(uniqueLocations, location)
|
||||
}
|
||||
|
||||
return uniqueLocations, nil
|
||||
}
|
||||
1611
syft/internal/fileresolver/filetree_resolver_test.go
Normal file
1611
syft/internal/fileresolver/filetree_resolver_test.go
Normal file
File diff suppressed because it is too large
Load Diff
@ -3,7 +3,6 @@ package filesource
|
||||
import (
|
||||
"crypto"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
@ -36,7 +35,7 @@ type fileSource struct {
|
||||
id artifact.ID
|
||||
digestForVersion string
|
||||
config Config
|
||||
resolver *fileresolver.Directory
|
||||
resolver file.Resolver
|
||||
mutex *sync.Mutex
|
||||
closer func() error
|
||||
digests []file.Digest
|
||||
@ -165,48 +164,22 @@ func (s fileSource) FileResolver(_ source.Scope) (file.Resolver, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var res *fileresolver.Directory
|
||||
if isArchiveAnalysis {
|
||||
// this is an analysis of an archive file... we should scan the directory where the archive contents
|
||||
res, err = fileresolver.NewFromDirectory(s.analysisPath, "", exclusionFunctions...)
|
||||
res, err := fileresolver.NewFromDirectory(s.analysisPath, "", exclusionFunctions...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to create directory resolver: %w", err)
|
||||
}
|
||||
} else {
|
||||
// this is an analysis of a single file. We want to ultimately scan the directory that the file is in, but we
|
||||
// don't want to include any other files except this the given file.
|
||||
exclusionFunctions = append([]fileresolver.PathIndexVisitor{
|
||||
|
||||
// note: we should exclude these kinds of paths first before considering any other user-provided exclusions
|
||||
func(_, p string, _ os.FileInfo, _ error) error {
|
||||
if p == absParentDir {
|
||||
// this is the root directory... always include it
|
||||
return nil
|
||||
}
|
||||
|
||||
if filepath.Dir(p) != absParentDir {
|
||||
// we are no longer in the root directory containing the single file we want to scan...
|
||||
// we should skip the directory this path resides in entirely!
|
||||
return fs.SkipDir
|
||||
}
|
||||
|
||||
if filepath.Base(p) != filepath.Base(s.config.Path) {
|
||||
// we're in the root directory, but this is not the file we want to scan...
|
||||
// we should selectively skip this file (not the directory we're in).
|
||||
return fileresolver.ErrSkipPath
|
||||
}
|
||||
return nil
|
||||
},
|
||||
}, exclusionFunctions...)
|
||||
|
||||
res, err = fileresolver.NewFromDirectory(absParentDir, absParentDir, exclusionFunctions...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to create directory resolver: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
s.resolver = res
|
||||
return s.resolver, nil
|
||||
}
|
||||
|
||||
// This is analysis of a single file. Use file indexer.
|
||||
res, err := fileresolver.NewFromFile(absParentDir, s.analysisPath, exclusionFunctions...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to create file resolver: %w", err)
|
||||
}
|
||||
s.resolver = res
|
||||
return s.resolver, nil
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user