mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 16:33:21 +01:00
Use file indexer directly when scanning with file source (#3333)
* Use file indexer when scanning with file source Prevents filesystem walks when scanning a single file, to optimise memory & scan times in case the scanned file lives in a directory containing many files. Signed-off-by: adammcclenaghan <adam@mcclenaghan.co.uk> * Create filetree resolver Shared behaviour for resolving indexed filetrees. Signed-off-by: adammcclenaghan <adam@mcclenaghan.co.uk> --------- Signed-off-by: adammcclenaghan <adam@mcclenaghan.co.uk>
This commit is contained in:
parent
8abd97a5bf
commit
21df38798e
@ -1,17 +1,11 @@
|
|||||||
package fileresolver
|
package fileresolver
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"os"
|
|
||||||
|
|
||||||
stereoscopeFile "github.com/anchore/stereoscope/pkg/file"
|
|
||||||
"github.com/anchore/stereoscope/pkg/filetree"
|
"github.com/anchore/stereoscope/pkg/filetree"
|
||||||
"github.com/anchore/syft/internal/log"
|
|
||||||
"github.com/anchore/syft/syft/file"
|
"github.com/anchore/syft/syft/file"
|
||||||
"github.com/anchore/syft/syft/internal/windows"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var ErrSkipPath = errors.New("skip path")
|
var ErrSkipPath = errors.New("skip path")
|
||||||
@ -20,12 +14,9 @@ var _ file.Resolver = (*Directory)(nil)
|
|||||||
|
|
||||||
// Directory implements path and content access for the directory data source.
|
// Directory implements path and content access for the directory data source.
|
||||||
type Directory struct {
|
type Directory struct {
|
||||||
path string
|
filetreeResolver
|
||||||
chroot ChrootContext
|
path string
|
||||||
tree filetree.Reader
|
indexer *directoryIndexer
|
||||||
index filetree.IndexReader
|
|
||||||
searchContext filetree.Searcher
|
|
||||||
indexer *directoryIndexer
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewFromDirectory(root string, base string, pathFilters ...PathIndexVisitor) (*Directory, error) {
|
func NewFromDirectory(root string, base string, pathFilters ...PathIndexVisitor) (*Directory, error) {
|
||||||
@ -47,10 +38,12 @@ func newFromDirectoryWithoutIndex(root string, base string, pathFilters ...PathI
|
|||||||
cleanBase := chroot.Base()
|
cleanBase := chroot.Base()
|
||||||
|
|
||||||
return &Directory{
|
return &Directory{
|
||||||
path: cleanRoot,
|
path: cleanRoot,
|
||||||
chroot: *chroot,
|
filetreeResolver: filetreeResolver{
|
||||||
tree: filetree.New(),
|
chroot: *chroot,
|
||||||
index: filetree.NewIndex(),
|
tree: filetree.New(),
|
||||||
|
index: filetree.NewIndex(),
|
||||||
|
},
|
||||||
indexer: newDirectoryIndexer(cleanRoot, cleanBase, pathFilters...),
|
indexer: newDirectoryIndexer(cleanRoot, cleanBase, pathFilters...),
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
@ -66,220 +59,12 @@ func (r *Directory) buildIndex() error {
|
|||||||
|
|
||||||
r.tree = tree
|
r.tree = tree
|
||||||
r.index = index
|
r.index = index
|
||||||
r.searchContext = filetree.NewSearchContext(tree, index)
|
r.filetreeResolver.searchContext = filetree.NewSearchContext(tree, index)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r Directory) requestPath(userPath string) (string, error) {
|
|
||||||
return r.chroot.ToNativePath(userPath)
|
|
||||||
}
|
|
||||||
|
|
||||||
// responsePath takes a path from the underlying fs domain and converts it to a path that is relative to the root of the directory resolver.
|
|
||||||
func (r Directory) responsePath(path string) string {
|
|
||||||
return r.chroot.ToChrootPath(path)
|
|
||||||
}
|
|
||||||
|
|
||||||
// HasPath indicates if the given path exists in the underlying source.
|
|
||||||
func (r *Directory) HasPath(userPath string) bool {
|
|
||||||
requestPath, err := r.requestPath(userPath)
|
|
||||||
if err != nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return r.tree.HasPath(stereoscopeFile.Path(requestPath))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stringer to represent a directory path data source
|
// Stringer to represent a directory path data source
|
||||||
func (r Directory) String() string {
|
func (r Directory) String() string {
|
||||||
return fmt.Sprintf("dir:%s", r.path)
|
return fmt.Sprintf("dir:%s", r.path)
|
||||||
}
|
}
|
||||||
|
|
||||||
// FilesByPath returns all file.References that match the given paths from the directory.
|
|
||||||
func (r Directory) FilesByPath(userPaths ...string) ([]file.Location, error) {
|
|
||||||
var references = make([]file.Location, 0)
|
|
||||||
|
|
||||||
for _, userPath := range userPaths {
|
|
||||||
userStrPath, err := r.requestPath(userPath)
|
|
||||||
if err != nil {
|
|
||||||
log.Warnf("unable to get file by path=%q : %+v", userPath, err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// we should be resolving symlinks and preserving this information as a AccessPath to the real file
|
|
||||||
ref, err := r.searchContext.SearchByPath(userStrPath, filetree.FollowBasenameLinks)
|
|
||||||
if err != nil {
|
|
||||||
log.Tracef("unable to evaluate symlink for path=%q : %+v", userPath, err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if !ref.HasReference() {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
entry, err := r.index.Get(*ref.Reference)
|
|
||||||
if err != nil {
|
|
||||||
log.Warnf("unable to get file by path=%q : %+v", userPath, err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// don't consider directories
|
|
||||||
if entry.Metadata.IsDir() {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if windows.HostRunningOnWindows() {
|
|
||||||
userStrPath = windows.ToPosix(userStrPath)
|
|
||||||
}
|
|
||||||
|
|
||||||
if ref.HasReference() {
|
|
||||||
references = append(references,
|
|
||||||
file.NewVirtualLocationFromDirectory(
|
|
||||||
r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root
|
|
||||||
r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root
|
|
||||||
*ref.Reference,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return references, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r Directory) requestGlob(pattern string) (string, error) {
|
|
||||||
return r.chroot.ToNativeGlob(pattern)
|
|
||||||
}
|
|
||||||
|
|
||||||
// FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image.
|
|
||||||
func (r Directory) FilesByGlob(patterns ...string) ([]file.Location, error) {
|
|
||||||
uniqueFileIDs := stereoscopeFile.NewFileReferenceSet()
|
|
||||||
uniqueLocations := make([]file.Location, 0)
|
|
||||||
|
|
||||||
for _, pattern := range patterns {
|
|
||||||
requestGlob, err := r.requestGlob(pattern)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
refVias, err := r.searchContext.SearchByGlob(requestGlob, filetree.FollowBasenameLinks)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
for _, refVia := range refVias {
|
|
||||||
if !refVia.HasReference() || uniqueFileIDs.Contains(*refVia.Reference) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
entry, err := r.index.Get(*refVia.Reference)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("unable to get file metadata for reference %s: %w", refVia.Reference.RealPath, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// don't consider directories
|
|
||||||
if entry.Metadata.IsDir() {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
loc := file.NewVirtualLocationFromDirectory(
|
|
||||||
r.responsePath(string(refVia.Reference.RealPath)), // the actual path relative to the resolver root
|
|
||||||
r.responsePath(string(refVia.RequestPath)), // the path used to access this file, relative to the resolver root
|
|
||||||
*refVia.Reference,
|
|
||||||
)
|
|
||||||
uniqueFileIDs.Add(*refVia.Reference)
|
|
||||||
uniqueLocations = append(uniqueLocations, loc)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return uniqueLocations, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference.
|
|
||||||
// This is helpful when attempting to find a file that is in the same layer or lower as another file. For the
|
|
||||||
// Directory, this is a simple path lookup.
|
|
||||||
func (r *Directory) RelativeFileByPath(_ file.Location, path string) *file.Location {
|
|
||||||
paths, err := r.FilesByPath(path)
|
|
||||||
if err != nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if len(paths) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return &paths[0]
|
|
||||||
}
|
|
||||||
|
|
||||||
// FileContentsByLocation fetches file contents for a single file reference relative to a directory.
|
|
||||||
// If the path does not exist an error is returned.
|
|
||||||
func (r Directory) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
|
|
||||||
if location.RealPath == "" {
|
|
||||||
return nil, errors.New("empty path given")
|
|
||||||
}
|
|
||||||
|
|
||||||
entry, err := r.index.Get(location.Reference())
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// don't consider directories
|
|
||||||
if entry.Type == stereoscopeFile.TypeDirectory {
|
|
||||||
return nil, fmt.Errorf("cannot read contents of non-file %q", location.Reference().RealPath)
|
|
||||||
}
|
|
||||||
|
|
||||||
// RealPath is posix so for windows directory resolver we need to translate
|
|
||||||
// to its true on disk path.
|
|
||||||
filePath := string(location.Reference().RealPath)
|
|
||||||
if windows.HostRunningOnWindows() {
|
|
||||||
filePath = windows.FromPosix(filePath)
|
|
||||||
}
|
|
||||||
|
|
||||||
return stereoscopeFile.NewLazyReadCloser(filePath), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *Directory) AllLocations(ctx context.Context) <-chan file.Location {
|
|
||||||
results := make(chan file.Location)
|
|
||||||
go func() {
|
|
||||||
defer close(results)
|
|
||||||
for _, ref := range r.tree.AllFiles(stereoscopeFile.AllTypes()...) {
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return
|
|
||||||
case results <- file.NewLocationFromDirectory(r.responsePath(string(ref.RealPath)), ref):
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
return results
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *Directory) FileMetadataByLocation(location file.Location) (file.Metadata, error) {
|
|
||||||
entry, err := r.index.Get(location.Reference())
|
|
||||||
if err != nil {
|
|
||||||
return file.Metadata{}, fmt.Errorf("location: %+v : %w", location, os.ErrNotExist)
|
|
||||||
}
|
|
||||||
|
|
||||||
return entry.Metadata, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *Directory) FilesByMIMEType(types ...string) ([]file.Location, error) {
|
|
||||||
uniqueFileIDs := stereoscopeFile.NewFileReferenceSet()
|
|
||||||
uniqueLocations := make([]file.Location, 0)
|
|
||||||
|
|
||||||
refVias, err := r.searchContext.SearchByMIMEType(types...)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
for _, refVia := range refVias {
|
|
||||||
if !refVia.HasReference() {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if uniqueFileIDs.Contains(*refVia.Reference) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
location := file.NewVirtualLocationFromDirectory(
|
|
||||||
r.responsePath(string(refVia.Reference.RealPath)),
|
|
||||||
r.responsePath(string(refVia.RequestPath)),
|
|
||||||
*refVia.Reference,
|
|
||||||
)
|
|
||||||
uniqueFileIDs.Add(*refVia.Reference)
|
|
||||||
uniqueLocations = append(uniqueLocations, location)
|
|
||||||
}
|
|
||||||
|
|
||||||
return uniqueLocations, nil
|
|
||||||
}
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
62
syft/internal/fileresolver/file.go
Normal file
62
syft/internal/fileresolver/file.go
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
package fileresolver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/anchore/stereoscope/pkg/filetree"
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Compile time assurance that we meet the Resolver interface.
|
||||||
|
var _ file.Resolver = (*File)(nil)
|
||||||
|
|
||||||
|
// File implements path and content access for the file data source.
|
||||||
|
type File struct {
|
||||||
|
filetreeResolver
|
||||||
|
path string
|
||||||
|
indexer *fileIndexer
|
||||||
|
}
|
||||||
|
|
||||||
|
// parent should be the symlink free absolute path to the parent directory
|
||||||
|
// path is the filepath of the file we're creating content access for
|
||||||
|
func NewFromFile(parent, path string, pathFilters ...PathIndexVisitor) (*File, error) {
|
||||||
|
chroot, err := NewChrootContextFromCWD(parent, parent)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to interpret chroot context: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanBase := chroot.Base()
|
||||||
|
|
||||||
|
file := &File{
|
||||||
|
path: path,
|
||||||
|
filetreeResolver: filetreeResolver{
|
||||||
|
chroot: *chroot,
|
||||||
|
tree: filetree.New(),
|
||||||
|
index: filetree.NewIndex(),
|
||||||
|
},
|
||||||
|
indexer: newFileIndexer(path, cleanBase, pathFilters...),
|
||||||
|
}
|
||||||
|
|
||||||
|
return file, file.buildIndex()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *File) buildIndex() error {
|
||||||
|
if r.indexer == nil {
|
||||||
|
return fmt.Errorf("no file indexer configured")
|
||||||
|
}
|
||||||
|
tree, index, err := r.indexer.build()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
r.tree = tree
|
||||||
|
r.index = index
|
||||||
|
r.filetreeResolver.searchContext = filetree.NewSearchContext(tree, index)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stringer to represent a file path data source
|
||||||
|
func (r File) String() string {
|
||||||
|
return fmt.Sprintf("file:%s", r.path)
|
||||||
|
}
|
||||||
223
syft/internal/fileresolver/file_indexer.go
Normal file
223
syft/internal/fileresolver/file_indexer.go
Normal file
@ -0,0 +1,223 @@
|
|||||||
|
package fileresolver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/anchore/stereoscope/pkg/file"
|
||||||
|
"github.com/anchore/stereoscope/pkg/filetree"
|
||||||
|
"github.com/anchore/syft/internal/log"
|
||||||
|
"github.com/anchore/syft/syft/internal/windows"
|
||||||
|
"github.com/wagoodman/go-progress"
|
||||||
|
)
|
||||||
|
|
||||||
|
type fileIndexer struct {
|
||||||
|
path string
|
||||||
|
base string
|
||||||
|
pathIndexVisitors []PathIndexVisitor
|
||||||
|
errPaths map[string]error
|
||||||
|
tree filetree.ReadWriter
|
||||||
|
index filetree.Index
|
||||||
|
}
|
||||||
|
|
||||||
|
func newFileIndexer(path, base string, visitors ...PathIndexVisitor) *fileIndexer {
|
||||||
|
i := &fileIndexer{
|
||||||
|
path: path,
|
||||||
|
base: base,
|
||||||
|
tree: filetree.New(),
|
||||||
|
index: filetree.NewIndex(),
|
||||||
|
pathIndexVisitors: append(
|
||||||
|
[]PathIndexVisitor{
|
||||||
|
requireFileInfo,
|
||||||
|
disallowByFileType,
|
||||||
|
skipPathsByMountTypeAndName(path),
|
||||||
|
},
|
||||||
|
visitors...,
|
||||||
|
),
|
||||||
|
errPaths: make(map[string]error),
|
||||||
|
}
|
||||||
|
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the indexer
|
||||||
|
func (r *fileIndexer) build() (filetree.Reader, filetree.IndexReader, error) {
|
||||||
|
return r.tree, r.index, index(r.path, r.indexPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Index file at the given path
|
||||||
|
// A file indexer simply indexes the file and its directory.
|
||||||
|
func index(path string, indexer func(string, *progress.Stage) error) error {
|
||||||
|
// We want to index the file at the provided path and its parent directory.
|
||||||
|
// We need to probably check that we have file access
|
||||||
|
// We also need to determine what to do when the file itself is a symlink.
|
||||||
|
stager, prog := indexingProgress(path)
|
||||||
|
defer prog.SetCompleted()
|
||||||
|
|
||||||
|
err := indexer(path, stager)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("unable to index filesystem path=%q: %w", path, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// indexPath will index the file at the provided path as well as its parent directory.
|
||||||
|
// It expects path to be a file, not a directory.
|
||||||
|
// If a directory is provided then an error will be returned. Additionally, any IO or
|
||||||
|
// permissions errors on the file at path or its parent directory will return an error.
|
||||||
|
// Filter functions provided to the indexer are honoured, so if the path provided (or its parent
|
||||||
|
// directory) is filtered by a filter function, an error is returned.
|
||||||
|
func (r *fileIndexer) indexPath(path string, stager *progress.Stage) error {
|
||||||
|
log.WithFields("path", path).Trace("indexing file path")
|
||||||
|
|
||||||
|
absPath, err := filepath.Abs(path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Protect against callers trying to call file_indexer with directories
|
||||||
|
fi, err := os.Stat(absPath)
|
||||||
|
// The directory indexer ignores stat errors, however this file indexer won't ignore them
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("unable to stat path=%q: %w", path, err)
|
||||||
|
}
|
||||||
|
if fi.IsDir() {
|
||||||
|
return fmt.Errorf("unable to index file, given path was a directory=%q", path)
|
||||||
|
}
|
||||||
|
|
||||||
|
absSymlinkFreeFilePath, err := absoluteSymlinkFreePathToFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now index the file and its parent directory
|
||||||
|
// We try to index the parent directory first, because if the parent directory
|
||||||
|
// is ignored by any filter function, then we must ensure we also ignore the file.
|
||||||
|
absSymlinkFreeParent, err := absoluteSymlinkFreePathToParent(absSymlinkFreeFilePath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
parentFi, err := os.Stat(absSymlinkFreeParent)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("unable to stat parent of file=%q: %w", absSymlinkFreeParent, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
stager.Current = absSymlinkFreeParent
|
||||||
|
indexParentErr := r.filterAndIndex(absSymlinkFreeParent, parentFi)
|
||||||
|
if indexParentErr != nil {
|
||||||
|
return indexParentErr
|
||||||
|
}
|
||||||
|
|
||||||
|
// We have indexed the parent successfully, now attempt to index the file.
|
||||||
|
stager.Current = absSymlinkFreeFilePath
|
||||||
|
indexFileErr := r.filterAndIndex(absSymlinkFreeFilePath, fi)
|
||||||
|
if indexFileErr != nil {
|
||||||
|
return indexFileErr
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *fileIndexer) filterAndIndex(path string, info os.FileInfo) error {
|
||||||
|
// check if any of the filters want us to ignore this path
|
||||||
|
for _, filterFn := range r.pathIndexVisitors {
|
||||||
|
if filterFn == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if filterErr := filterFn(r.base, path, info, nil); filterErr != nil {
|
||||||
|
// A filter function wants us to ignore this path, honour it
|
||||||
|
return filterErr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// here we check to see if we need to normalize paths to posix on the way in coming from windows
|
||||||
|
if windows.HostRunningOnWindows() {
|
||||||
|
path = windows.ToPosix(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
err := r.addPathToIndex(path, info)
|
||||||
|
// If we hit file access errors, isFileAccessErr will handle logging & adding
|
||||||
|
// the path to the errPaths map.
|
||||||
|
// While the directory_indexer does not let these cause the indexer to throw
|
||||||
|
// we will here, as not having access to the file we index for a file source
|
||||||
|
// probably makes the file source creation useless? I need to check with Syft maintainers.
|
||||||
|
// This also poses the question, is errPaths worthwhile for file_indexer?
|
||||||
|
if r.isFileAccessErr(path, err) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add path to index. File indexer doesn't need to support symlink, as we should have abs symlink free path.
|
||||||
|
// If we somehow get a symlink here, report as an error.
|
||||||
|
func (r *fileIndexer) addPathToIndex(path string, info os.FileInfo) error {
|
||||||
|
switch t := file.TypeFromMode(info.Mode()); t {
|
||||||
|
case file.TypeDirectory:
|
||||||
|
return r.addDirectoryToIndex(path, info)
|
||||||
|
case file.TypeRegular:
|
||||||
|
return r.addFileToIndex(path, info)
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("unsupported file type: %s", t)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *fileIndexer) addDirectoryToIndex(path string, info os.FileInfo) error {
|
||||||
|
ref, err := r.tree.AddDir(file.Path(path))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata := file.NewMetadataFromPath(path, info)
|
||||||
|
r.index.Add(*ref, metadata)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *fileIndexer) addFileToIndex(path string, info os.FileInfo) error {
|
||||||
|
ref, err := r.tree.AddFile(file.Path(path))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata := file.NewMetadataFromPath(path, info)
|
||||||
|
r.index.Add(*ref, metadata)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get absolute symlink free path to parent of the file
|
||||||
|
func absoluteSymlinkFreePathToParent(path string) (string, error) {
|
||||||
|
absFilePath, err := absoluteSymlinkFreePathToFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return filepath.Dir(absFilePath), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get absolute symlink free path to the file
|
||||||
|
func absoluteSymlinkFreePathToFile(path string) (string, error) {
|
||||||
|
absAnalysisPath, err := filepath.Abs(path)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("unable to get absolute path for analysis path=%q: %w", path, err)
|
||||||
|
}
|
||||||
|
dereferencedAbsAnalysisPath, err := filepath.EvalSymlinks(absAnalysisPath)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("unable to get absolute path for analysis path=%q: %w", path, err)
|
||||||
|
}
|
||||||
|
return dereferencedAbsAnalysisPath, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *fileIndexer) isFileAccessErr(path string, err error) bool {
|
||||||
|
// don't allow for errors to stop indexing, keep track of the paths and continue.
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("unable to access path=%q: %+v", path, err)
|
||||||
|
r.errPaths[path] = err
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
103
syft/internal/fileresolver/file_indexer_test.go
Normal file
103
syft/internal/fileresolver/file_indexer_test.go
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
package fileresolver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/anchore/stereoscope/pkg/file"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
"io/fs"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// - Verify that both the parent and the path are indexed
|
||||||
|
func Test_index(t *testing.T) {
|
||||||
|
testPath := "test-fixtures/system_paths/target/home/place"
|
||||||
|
indexer := newFileIndexer(testPath, "", make([]PathIndexVisitor, 0)...)
|
||||||
|
tree, index, err := indexer.build()
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
path string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "has path",
|
||||||
|
path: "test-fixtures/system_paths/target/home/place",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "has parent dir",
|
||||||
|
path: "test-fixtures/system_paths/target/home",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
info, err := os.Stat(test.path)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// note: the index uses absolute paths, so assertions MUST keep this in mind
|
||||||
|
cwd, err := os.Getwd()
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
p := file.Path(path.Join(cwd, test.path))
|
||||||
|
assert.Equal(t, true, tree.HasPath(p))
|
||||||
|
exists, ref, err := tree.File(p)
|
||||||
|
assert.Equal(t, true, exists)
|
||||||
|
if assert.NoError(t, err) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
entry, err := index.Get(*ref.Reference)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, info.Mode(), entry.Mode)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// - Verify that directories are rejected
|
||||||
|
func Test_indexRejectsDirectory(t *testing.T) {
|
||||||
|
dirPath := "test-fixtures/system_paths/target/home"
|
||||||
|
indexer := newFileIndexer(dirPath, "", make([]PathIndexVisitor, 0)...)
|
||||||
|
_, _, err := indexer.build()
|
||||||
|
require.Error(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// - Verify ignores if filterAndIndex sets up a filter for the filepath
|
||||||
|
func Test_ignoresPathIfFiltered(t *testing.T) {
|
||||||
|
testPath := "test-fixtures/system_paths/target/home/place"
|
||||||
|
cwd, cwdErr := os.Getwd()
|
||||||
|
require.NoError(t, cwdErr)
|
||||||
|
ignorePath := path.Join(cwd, testPath)
|
||||||
|
filterFn := func(_, path string, _ os.FileInfo, _ error) error {
|
||||||
|
if path == ignorePath {
|
||||||
|
return ErrSkipPath
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
indexer := newFileIndexer(testPath, "", filterFn)
|
||||||
|
_, _, err := indexer.build()
|
||||||
|
require.Error(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// - Verify ignores if filterAndIndex sets up a filter for the directory
|
||||||
|
func Test_ignoresPathIfParentFiltered(t *testing.T) {
|
||||||
|
testPath := "test-fixtures/system_paths/target/home/place"
|
||||||
|
parentPath := "test-fixtures/system_paths/target/home"
|
||||||
|
|
||||||
|
cwd, cwdErr := os.Getwd()
|
||||||
|
require.NoError(t, cwdErr)
|
||||||
|
ignorePath := path.Join(cwd, parentPath)
|
||||||
|
filterFn := func(_, path string, _ os.FileInfo, _ error) error {
|
||||||
|
if path == ignorePath {
|
||||||
|
return fs.SkipDir
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
indexer := newFileIndexer(testPath, "", filterFn)
|
||||||
|
_, _, err := indexer.build()
|
||||||
|
require.Error(t, err)
|
||||||
|
}
|
||||||
229
syft/internal/fileresolver/filetree_resolver.go
Normal file
229
syft/internal/fileresolver/filetree_resolver.go
Normal file
@ -0,0 +1,229 @@
|
|||||||
|
package fileresolver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
stereoscopeFile "github.com/anchore/stereoscope/pkg/file"
|
||||||
|
"github.com/anchore/stereoscope/pkg/filetree"
|
||||||
|
"github.com/anchore/syft/internal/log"
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
"github.com/anchore/syft/syft/internal/windows"
|
||||||
|
)
|
||||||
|
|
||||||
|
type filetreeResolver struct {
|
||||||
|
chroot ChrootContext
|
||||||
|
tree filetree.Reader
|
||||||
|
index filetree.IndexReader
|
||||||
|
searchContext filetree.Searcher
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *filetreeResolver) requestPath(userPath string) (string, error) {
|
||||||
|
return r.chroot.ToNativePath(userPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
// responsePath takes a path from the underlying fs domain and converts it to a path that is relative to the root of the file resolver.
|
||||||
|
func (r filetreeResolver) responsePath(path string) string {
|
||||||
|
return r.chroot.ToChrootPath(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
// HasPath indicates if the given path exists in the underlying source.
|
||||||
|
func (r *filetreeResolver) HasPath(userPath string) bool {
|
||||||
|
requestPath, err := r.requestPath(userPath)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return r.tree.HasPath(stereoscopeFile.Path(requestPath))
|
||||||
|
}
|
||||||
|
|
||||||
|
// FilesByPath returns all file.References that match the given paths from the file index.
|
||||||
|
func (r filetreeResolver) FilesByPath(userPaths ...string) ([]file.Location, error) {
|
||||||
|
var references = make([]file.Location, 0)
|
||||||
|
|
||||||
|
for _, userPath := range userPaths {
|
||||||
|
userStrPath, err := r.requestPath(userPath)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("unable to get file by path=%q : %+v", userPath, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// we should be resolving symlinks and preserving this information as a AccessPath to the real file
|
||||||
|
ref, err := r.searchContext.SearchByPath(userStrPath, filetree.FollowBasenameLinks)
|
||||||
|
if err != nil {
|
||||||
|
log.Tracef("unable to evaluate symlink for path=%q : %+v", userPath, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if !ref.HasReference() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
entry, err := r.index.Get(*ref.Reference)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("unable to get file by path=%q : %+v", userPath, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// don't consider directories
|
||||||
|
if entry.Metadata.IsDir() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if windows.HostRunningOnWindows() {
|
||||||
|
userStrPath = windows.ToPosix(userStrPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
if ref.HasReference() {
|
||||||
|
references = append(references,
|
||||||
|
file.NewVirtualLocationFromDirectory(
|
||||||
|
r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root
|
||||||
|
r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root
|
||||||
|
*ref.Reference,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return references, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r filetreeResolver) requestGlob(pattern string) (string, error) {
|
||||||
|
return r.chroot.ToNativeGlob(pattern)
|
||||||
|
}
|
||||||
|
|
||||||
|
// FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image.
|
||||||
|
func (r filetreeResolver) FilesByGlob(patterns ...string) ([]file.Location, error) {
|
||||||
|
uniqueFileIDs := stereoscopeFile.NewFileReferenceSet()
|
||||||
|
uniqueLocations := make([]file.Location, 0)
|
||||||
|
|
||||||
|
for _, pattern := range patterns {
|
||||||
|
requestGlob, err := r.requestGlob(pattern)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
refVias, err := r.searchContext.SearchByGlob(requestGlob, filetree.FollowBasenameLinks)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
for _, refVia := range refVias {
|
||||||
|
if !refVia.HasReference() || uniqueFileIDs.Contains(*refVia.Reference) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
entry, err := r.index.Get(*refVia.Reference)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to get file metadata for reference %s: %w", refVia.Reference.RealPath, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// don't consider directories
|
||||||
|
if entry.Metadata.IsDir() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
loc := file.NewVirtualLocationFromDirectory(
|
||||||
|
r.responsePath(string(refVia.Reference.RealPath)), // the actual path relative to the resolver root
|
||||||
|
r.responsePath(string(refVia.RequestPath)), // the path used to access this file, relative to the resolver root
|
||||||
|
*refVia.Reference,
|
||||||
|
)
|
||||||
|
uniqueFileIDs.Add(*refVia.Reference)
|
||||||
|
uniqueLocations = append(uniqueLocations, loc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return uniqueLocations, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference.
|
||||||
|
// This is helpful when attempting to find a file that is in the same layer or lower as another file.
|
||||||
|
func (r *filetreeResolver) RelativeFileByPath(_ file.Location, path string) *file.Location {
|
||||||
|
paths, err := r.FilesByPath(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if len(paths) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return &paths[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
// FileContentsByLocation fetches file contents for a single file reference relative to a directory.
|
||||||
|
// If the path does not exist an error is returned.
|
||||||
|
func (r filetreeResolver) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
|
||||||
|
if location.RealPath == "" {
|
||||||
|
return nil, errors.New("empty path given")
|
||||||
|
}
|
||||||
|
|
||||||
|
entry, err := r.index.Get(location.Reference())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// don't consider directories
|
||||||
|
if entry.Type == stereoscopeFile.TypeDirectory {
|
||||||
|
return nil, fmt.Errorf("cannot read contents of non-file %q", location.Reference().RealPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
// RealPath is posix so for windows file resolver we need to translate
|
||||||
|
// to its true on disk path.
|
||||||
|
filePath := string(location.Reference().RealPath)
|
||||||
|
if windows.HostRunningOnWindows() {
|
||||||
|
filePath = windows.FromPosix(filePath)
|
||||||
|
}
|
||||||
|
|
||||||
|
return stereoscopeFile.NewLazyReadCloser(filePath), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *filetreeResolver) AllLocations(ctx context.Context) <-chan file.Location {
|
||||||
|
results := make(chan file.Location)
|
||||||
|
go func() {
|
||||||
|
defer close(results)
|
||||||
|
for _, ref := range r.tree.AllFiles(stereoscopeFile.AllTypes()...) {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case results <- file.NewLocationFromDirectory(r.responsePath(string(ref.RealPath)), ref):
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *filetreeResolver) FileMetadataByLocation(location file.Location) (file.Metadata, error) {
|
||||||
|
entry, err := r.index.Get(location.Reference())
|
||||||
|
if err != nil {
|
||||||
|
return file.Metadata{}, fmt.Errorf("location: %+v : %w", location, os.ErrNotExist)
|
||||||
|
}
|
||||||
|
|
||||||
|
return entry.Metadata, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *filetreeResolver) FilesByMIMEType(types ...string) ([]file.Location, error) {
|
||||||
|
uniqueFileIDs := stereoscopeFile.NewFileReferenceSet()
|
||||||
|
uniqueLocations := make([]file.Location, 0)
|
||||||
|
|
||||||
|
refVias, err := r.searchContext.SearchByMIMEType(types...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
for _, refVia := range refVias {
|
||||||
|
if !refVia.HasReference() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if uniqueFileIDs.Contains(*refVia.Reference) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
location := file.NewVirtualLocationFromDirectory(
|
||||||
|
r.responsePath(string(refVia.Reference.RealPath)),
|
||||||
|
r.responsePath(string(refVia.RequestPath)),
|
||||||
|
*refVia.Reference,
|
||||||
|
)
|
||||||
|
uniqueFileIDs.Add(*refVia.Reference)
|
||||||
|
uniqueLocations = append(uniqueLocations, location)
|
||||||
|
}
|
||||||
|
|
||||||
|
return uniqueLocations, nil
|
||||||
|
}
|
||||||
1611
syft/internal/fileresolver/filetree_resolver_test.go
Normal file
1611
syft/internal/fileresolver/filetree_resolver_test.go
Normal file
File diff suppressed because it is too large
Load Diff
@ -3,7 +3,6 @@ package filesource
|
|||||||
import (
|
import (
|
||||||
"crypto"
|
"crypto"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/fs"
|
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@ -36,7 +35,7 @@ type fileSource struct {
|
|||||||
id artifact.ID
|
id artifact.ID
|
||||||
digestForVersion string
|
digestForVersion string
|
||||||
config Config
|
config Config
|
||||||
resolver *fileresolver.Directory
|
resolver file.Resolver
|
||||||
mutex *sync.Mutex
|
mutex *sync.Mutex
|
||||||
closer func() error
|
closer func() error
|
||||||
digests []file.Digest
|
digests []file.Digest
|
||||||
@ -165,48 +164,22 @@ func (s fileSource) FileResolver(_ source.Scope) (file.Resolver, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var res *fileresolver.Directory
|
|
||||||
if isArchiveAnalysis {
|
if isArchiveAnalysis {
|
||||||
// this is an analysis of an archive file... we should scan the directory where the archive contents
|
// this is an analysis of an archive file... we should scan the directory where the archive contents
|
||||||
res, err = fileresolver.NewFromDirectory(s.analysisPath, "", exclusionFunctions...)
|
res, err := fileresolver.NewFromDirectory(s.analysisPath, "", exclusionFunctions...)
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("unable to create directory resolver: %w", err)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// this is an analysis of a single file. We want to ultimately scan the directory that the file is in, but we
|
|
||||||
// don't want to include any other files except this the given file.
|
|
||||||
exclusionFunctions = append([]fileresolver.PathIndexVisitor{
|
|
||||||
|
|
||||||
// note: we should exclude these kinds of paths first before considering any other user-provided exclusions
|
|
||||||
func(_, p string, _ os.FileInfo, _ error) error {
|
|
||||||
if p == absParentDir {
|
|
||||||
// this is the root directory... always include it
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if filepath.Dir(p) != absParentDir {
|
|
||||||
// we are no longer in the root directory containing the single file we want to scan...
|
|
||||||
// we should skip the directory this path resides in entirely!
|
|
||||||
return fs.SkipDir
|
|
||||||
}
|
|
||||||
|
|
||||||
if filepath.Base(p) != filepath.Base(s.config.Path) {
|
|
||||||
// we're in the root directory, but this is not the file we want to scan...
|
|
||||||
// we should selectively skip this file (not the directory we're in).
|
|
||||||
return fileresolver.ErrSkipPath
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
},
|
|
||||||
}, exclusionFunctions...)
|
|
||||||
|
|
||||||
res, err = fileresolver.NewFromDirectory(absParentDir, absParentDir, exclusionFunctions...)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("unable to create directory resolver: %w", err)
|
return nil, fmt.Errorf("unable to create directory resolver: %w", err)
|
||||||
}
|
}
|
||||||
|
s.resolver = res
|
||||||
|
return s.resolver, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This is analysis of a single file. Use file indexer.
|
||||||
|
res, err := fileresolver.NewFromFile(absParentDir, s.analysisPath, exclusionFunctions...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to create file resolver: %w", err)
|
||||||
|
}
|
||||||
s.resolver = res
|
s.resolver = res
|
||||||
|
|
||||||
return s.resolver, nil
|
return s.resolver, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user