Use file indexer directly when scanning with file source (#3333)

* Use file indexer when scanning with file source

Prevents filesystem walks when scanning a single file, to
optimise memory & scan times in case the scanned file
lives in a directory containing many files.

Signed-off-by: adammcclenaghan <adam@mcclenaghan.co.uk>

* Create filetree resolver

Shared behaviour for resolving indexed filetrees.

Signed-off-by: adammcclenaghan <adam@mcclenaghan.co.uk>

---------

Signed-off-by: adammcclenaghan <adam@mcclenaghan.co.uk>
This commit is contained in:
Adam McClenaghan 2024-11-22 16:53:53 +00:00 committed by GitHub
parent 8abd97a5bf
commit 21df38798e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 2247 additions and 1609 deletions

View File

@ -1,17 +1,11 @@
package fileresolver package fileresolver
import ( import (
"context"
"errors" "errors"
"fmt" "fmt"
"io"
"os"
stereoscopeFile "github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/stereoscope/pkg/filetree" "github.com/anchore/stereoscope/pkg/filetree"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/windows"
) )
var ErrSkipPath = errors.New("skip path") var ErrSkipPath = errors.New("skip path")
@ -20,12 +14,9 @@ var _ file.Resolver = (*Directory)(nil)
// Directory implements path and content access for the directory data source. // Directory implements path and content access for the directory data source.
type Directory struct { type Directory struct {
path string filetreeResolver
chroot ChrootContext path string
tree filetree.Reader indexer *directoryIndexer
index filetree.IndexReader
searchContext filetree.Searcher
indexer *directoryIndexer
} }
func NewFromDirectory(root string, base string, pathFilters ...PathIndexVisitor) (*Directory, error) { func NewFromDirectory(root string, base string, pathFilters ...PathIndexVisitor) (*Directory, error) {
@ -47,10 +38,12 @@ func newFromDirectoryWithoutIndex(root string, base string, pathFilters ...PathI
cleanBase := chroot.Base() cleanBase := chroot.Base()
return &Directory{ return &Directory{
path: cleanRoot, path: cleanRoot,
chroot: *chroot, filetreeResolver: filetreeResolver{
tree: filetree.New(), chroot: *chroot,
index: filetree.NewIndex(), tree: filetree.New(),
index: filetree.NewIndex(),
},
indexer: newDirectoryIndexer(cleanRoot, cleanBase, pathFilters...), indexer: newDirectoryIndexer(cleanRoot, cleanBase, pathFilters...),
}, nil }, nil
} }
@ -66,220 +59,12 @@ func (r *Directory) buildIndex() error {
r.tree = tree r.tree = tree
r.index = index r.index = index
r.searchContext = filetree.NewSearchContext(tree, index) r.filetreeResolver.searchContext = filetree.NewSearchContext(tree, index)
return nil return nil
} }
func (r Directory) requestPath(userPath string) (string, error) {
return r.chroot.ToNativePath(userPath)
}
// responsePath takes a path from the underlying fs domain and converts it to a path that is relative to the root of the directory resolver.
func (r Directory) responsePath(path string) string {
return r.chroot.ToChrootPath(path)
}
// HasPath indicates if the given path exists in the underlying source.
func (r *Directory) HasPath(userPath string) bool {
requestPath, err := r.requestPath(userPath)
if err != nil {
return false
}
return r.tree.HasPath(stereoscopeFile.Path(requestPath))
}
// Stringer to represent a directory path data source // Stringer to represent a directory path data source
func (r Directory) String() string { func (r Directory) String() string {
return fmt.Sprintf("dir:%s", r.path) return fmt.Sprintf("dir:%s", r.path)
} }
// FilesByPath returns all file.References that match the given paths from the directory.
func (r Directory) FilesByPath(userPaths ...string) ([]file.Location, error) {
var references = make([]file.Location, 0)
for _, userPath := range userPaths {
userStrPath, err := r.requestPath(userPath)
if err != nil {
log.Warnf("unable to get file by path=%q : %+v", userPath, err)
continue
}
// we should be resolving symlinks and preserving this information as a AccessPath to the real file
ref, err := r.searchContext.SearchByPath(userStrPath, filetree.FollowBasenameLinks)
if err != nil {
log.Tracef("unable to evaluate symlink for path=%q : %+v", userPath, err)
continue
}
if !ref.HasReference() {
continue
}
entry, err := r.index.Get(*ref.Reference)
if err != nil {
log.Warnf("unable to get file by path=%q : %+v", userPath, err)
continue
}
// don't consider directories
if entry.Metadata.IsDir() {
continue
}
if windows.HostRunningOnWindows() {
userStrPath = windows.ToPosix(userStrPath)
}
if ref.HasReference() {
references = append(references,
file.NewVirtualLocationFromDirectory(
r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root
r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root
*ref.Reference,
),
)
}
}
return references, nil
}
func (r Directory) requestGlob(pattern string) (string, error) {
return r.chroot.ToNativeGlob(pattern)
}
// FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image.
func (r Directory) FilesByGlob(patterns ...string) ([]file.Location, error) {
uniqueFileIDs := stereoscopeFile.NewFileReferenceSet()
uniqueLocations := make([]file.Location, 0)
for _, pattern := range patterns {
requestGlob, err := r.requestGlob(pattern)
if err != nil {
return nil, err
}
refVias, err := r.searchContext.SearchByGlob(requestGlob, filetree.FollowBasenameLinks)
if err != nil {
return nil, err
}
for _, refVia := range refVias {
if !refVia.HasReference() || uniqueFileIDs.Contains(*refVia.Reference) {
continue
}
entry, err := r.index.Get(*refVia.Reference)
if err != nil {
return nil, fmt.Errorf("unable to get file metadata for reference %s: %w", refVia.Reference.RealPath, err)
}
// don't consider directories
if entry.Metadata.IsDir() {
continue
}
loc := file.NewVirtualLocationFromDirectory(
r.responsePath(string(refVia.Reference.RealPath)), // the actual path relative to the resolver root
r.responsePath(string(refVia.RequestPath)), // the path used to access this file, relative to the resolver root
*refVia.Reference,
)
uniqueFileIDs.Add(*refVia.Reference)
uniqueLocations = append(uniqueLocations, loc)
}
}
return uniqueLocations, nil
}
// RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference.
// This is helpful when attempting to find a file that is in the same layer or lower as another file. For the
// Directory, this is a simple path lookup.
func (r *Directory) RelativeFileByPath(_ file.Location, path string) *file.Location {
paths, err := r.FilesByPath(path)
if err != nil {
return nil
}
if len(paths) == 0 {
return nil
}
return &paths[0]
}
// FileContentsByLocation fetches file contents for a single file reference relative to a directory.
// If the path does not exist an error is returned.
func (r Directory) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
if location.RealPath == "" {
return nil, errors.New("empty path given")
}
entry, err := r.index.Get(location.Reference())
if err != nil {
return nil, err
}
// don't consider directories
if entry.Type == stereoscopeFile.TypeDirectory {
return nil, fmt.Errorf("cannot read contents of non-file %q", location.Reference().RealPath)
}
// RealPath is posix so for windows directory resolver we need to translate
// to its true on disk path.
filePath := string(location.Reference().RealPath)
if windows.HostRunningOnWindows() {
filePath = windows.FromPosix(filePath)
}
return stereoscopeFile.NewLazyReadCloser(filePath), nil
}
func (r *Directory) AllLocations(ctx context.Context) <-chan file.Location {
results := make(chan file.Location)
go func() {
defer close(results)
for _, ref := range r.tree.AllFiles(stereoscopeFile.AllTypes()...) {
select {
case <-ctx.Done():
return
case results <- file.NewLocationFromDirectory(r.responsePath(string(ref.RealPath)), ref):
continue
}
}
}()
return results
}
func (r *Directory) FileMetadataByLocation(location file.Location) (file.Metadata, error) {
entry, err := r.index.Get(location.Reference())
if err != nil {
return file.Metadata{}, fmt.Errorf("location: %+v : %w", location, os.ErrNotExist)
}
return entry.Metadata, nil
}
func (r *Directory) FilesByMIMEType(types ...string) ([]file.Location, error) {
uniqueFileIDs := stereoscopeFile.NewFileReferenceSet()
uniqueLocations := make([]file.Location, 0)
refVias, err := r.searchContext.SearchByMIMEType(types...)
if err != nil {
return nil, err
}
for _, refVia := range refVias {
if !refVia.HasReference() {
continue
}
if uniqueFileIDs.Contains(*refVia.Reference) {
continue
}
location := file.NewVirtualLocationFromDirectory(
r.responsePath(string(refVia.Reference.RealPath)),
r.responsePath(string(refVia.RequestPath)),
*refVia.Reference,
)
uniqueFileIDs.Add(*refVia.Reference)
uniqueLocations = append(uniqueLocations, location)
}
return uniqueLocations, nil
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,62 @@
package fileresolver
import (
"fmt"
"github.com/anchore/stereoscope/pkg/filetree"
"github.com/anchore/syft/syft/file"
)
// Compile time assurance that we meet the Resolver interface.
var _ file.Resolver = (*File)(nil)
// File implements path and content access for the file data source.
type File struct {
filetreeResolver
path string
indexer *fileIndexer
}
// parent should be the symlink free absolute path to the parent directory
// path is the filepath of the file we're creating content access for
func NewFromFile(parent, path string, pathFilters ...PathIndexVisitor) (*File, error) {
chroot, err := NewChrootContextFromCWD(parent, parent)
if err != nil {
return nil, fmt.Errorf("unable to interpret chroot context: %w", err)
}
cleanBase := chroot.Base()
file := &File{
path: path,
filetreeResolver: filetreeResolver{
chroot: *chroot,
tree: filetree.New(),
index: filetree.NewIndex(),
},
indexer: newFileIndexer(path, cleanBase, pathFilters...),
}
return file, file.buildIndex()
}
func (r *File) buildIndex() error {
if r.indexer == nil {
return fmt.Errorf("no file indexer configured")
}
tree, index, err := r.indexer.build()
if err != nil {
return err
}
r.tree = tree
r.index = index
r.filetreeResolver.searchContext = filetree.NewSearchContext(tree, index)
return nil
}
// Stringer to represent a file path data source
func (r File) String() string {
return fmt.Sprintf("file:%s", r.path)
}

View File

@ -0,0 +1,223 @@
package fileresolver
import (
"fmt"
"os"
"path/filepath"
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/stereoscope/pkg/filetree"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/internal/windows"
"github.com/wagoodman/go-progress"
)
type fileIndexer struct {
path string
base string
pathIndexVisitors []PathIndexVisitor
errPaths map[string]error
tree filetree.ReadWriter
index filetree.Index
}
func newFileIndexer(path, base string, visitors ...PathIndexVisitor) *fileIndexer {
i := &fileIndexer{
path: path,
base: base,
tree: filetree.New(),
index: filetree.NewIndex(),
pathIndexVisitors: append(
[]PathIndexVisitor{
requireFileInfo,
disallowByFileType,
skipPathsByMountTypeAndName(path),
},
visitors...,
),
errPaths: make(map[string]error),
}
return i
}
// Build the indexer
func (r *fileIndexer) build() (filetree.Reader, filetree.IndexReader, error) {
return r.tree, r.index, index(r.path, r.indexPath)
}
// Index file at the given path
// A file indexer simply indexes the file and its directory.
func index(path string, indexer func(string, *progress.Stage) error) error {
// We want to index the file at the provided path and its parent directory.
// We need to probably check that we have file access
// We also need to determine what to do when the file itself is a symlink.
stager, prog := indexingProgress(path)
defer prog.SetCompleted()
err := indexer(path, stager)
if err != nil {
return fmt.Errorf("unable to index filesystem path=%q: %w", path, err)
}
return nil
}
// indexPath will index the file at the provided path as well as its parent directory.
// It expects path to be a file, not a directory.
// If a directory is provided then an error will be returned. Additionally, any IO or
// permissions errors on the file at path or its parent directory will return an error.
// Filter functions provided to the indexer are honoured, so if the path provided (or its parent
// directory) is filtered by a filter function, an error is returned.
func (r *fileIndexer) indexPath(path string, stager *progress.Stage) error {
log.WithFields("path", path).Trace("indexing file path")
absPath, err := filepath.Abs(path)
if err != nil {
return err
}
// Protect against callers trying to call file_indexer with directories
fi, err := os.Stat(absPath)
// The directory indexer ignores stat errors, however this file indexer won't ignore them
if err != nil {
return fmt.Errorf("unable to stat path=%q: %w", path, err)
}
if fi.IsDir() {
return fmt.Errorf("unable to index file, given path was a directory=%q", path)
}
absSymlinkFreeFilePath, err := absoluteSymlinkFreePathToFile(path)
if err != nil {
return err
}
// Now index the file and its parent directory
// We try to index the parent directory first, because if the parent directory
// is ignored by any filter function, then we must ensure we also ignore the file.
absSymlinkFreeParent, err := absoluteSymlinkFreePathToParent(absSymlinkFreeFilePath)
if err != nil {
return err
}
parentFi, err := os.Stat(absSymlinkFreeParent)
if err != nil {
return fmt.Errorf("unable to stat parent of file=%q: %w", absSymlinkFreeParent, err)
}
stager.Current = absSymlinkFreeParent
indexParentErr := r.filterAndIndex(absSymlinkFreeParent, parentFi)
if indexParentErr != nil {
return indexParentErr
}
// We have indexed the parent successfully, now attempt to index the file.
stager.Current = absSymlinkFreeFilePath
indexFileErr := r.filterAndIndex(absSymlinkFreeFilePath, fi)
if indexFileErr != nil {
return indexFileErr
}
return nil
}
func (r *fileIndexer) filterAndIndex(path string, info os.FileInfo) error {
// check if any of the filters want us to ignore this path
for _, filterFn := range r.pathIndexVisitors {
if filterFn == nil {
continue
}
if filterErr := filterFn(r.base, path, info, nil); filterErr != nil {
// A filter function wants us to ignore this path, honour it
return filterErr
}
}
// here we check to see if we need to normalize paths to posix on the way in coming from windows
if windows.HostRunningOnWindows() {
path = windows.ToPosix(path)
}
err := r.addPathToIndex(path, info)
// If we hit file access errors, isFileAccessErr will handle logging & adding
// the path to the errPaths map.
// While the directory_indexer does not let these cause the indexer to throw
// we will here, as not having access to the file we index for a file source
// probably makes the file source creation useless? I need to check with Syft maintainers.
// This also poses the question, is errPaths worthwhile for file_indexer?
if r.isFileAccessErr(path, err) {
return err
}
return nil
}
// Add path to index. File indexer doesn't need to support symlink, as we should have abs symlink free path.
// If we somehow get a symlink here, report as an error.
func (r *fileIndexer) addPathToIndex(path string, info os.FileInfo) error {
switch t := file.TypeFromMode(info.Mode()); t {
case file.TypeDirectory:
return r.addDirectoryToIndex(path, info)
case file.TypeRegular:
return r.addFileToIndex(path, info)
default:
return fmt.Errorf("unsupported file type: %s", t)
}
}
func (r *fileIndexer) addDirectoryToIndex(path string, info os.FileInfo) error {
ref, err := r.tree.AddDir(file.Path(path))
if err != nil {
return err
}
metadata := file.NewMetadataFromPath(path, info)
r.index.Add(*ref, metadata)
return nil
}
func (r *fileIndexer) addFileToIndex(path string, info os.FileInfo) error {
ref, err := r.tree.AddFile(file.Path(path))
if err != nil {
return err
}
metadata := file.NewMetadataFromPath(path, info)
r.index.Add(*ref, metadata)
return nil
}
// Get absolute symlink free path to parent of the file
func absoluteSymlinkFreePathToParent(path string) (string, error) {
absFilePath, err := absoluteSymlinkFreePathToFile(path)
if err != nil {
return "", err
}
return filepath.Dir(absFilePath), nil
}
// Get absolute symlink free path to the file
func absoluteSymlinkFreePathToFile(path string) (string, error) {
absAnalysisPath, err := filepath.Abs(path)
if err != nil {
return "", fmt.Errorf("unable to get absolute path for analysis path=%q: %w", path, err)
}
dereferencedAbsAnalysisPath, err := filepath.EvalSymlinks(absAnalysisPath)
if err != nil {
return "", fmt.Errorf("unable to get absolute path for analysis path=%q: %w", path, err)
}
return dereferencedAbsAnalysisPath, nil
}
func (r *fileIndexer) isFileAccessErr(path string, err error) bool {
// don't allow for errors to stop indexing, keep track of the paths and continue.
if err != nil {
log.Warnf("unable to access path=%q: %+v", path, err)
r.errPaths[path] = err
return true
}
return false
}

View File

@ -0,0 +1,103 @@
package fileresolver
import (
"github.com/anchore/stereoscope/pkg/file"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"io/fs"
"os"
"path"
"testing"
)
// - Verify that both the parent and the path are indexed
func Test_index(t *testing.T) {
testPath := "test-fixtures/system_paths/target/home/place"
indexer := newFileIndexer(testPath, "", make([]PathIndexVisitor, 0)...)
tree, index, err := indexer.build()
require.NoError(t, err)
tests := []struct {
name string
path string
}{
{
name: "has path",
path: "test-fixtures/system_paths/target/home/place",
},
{
name: "has parent dir",
path: "test-fixtures/system_paths/target/home",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
info, err := os.Stat(test.path)
assert.NoError(t, err)
// note: the index uses absolute paths, so assertions MUST keep this in mind
cwd, err := os.Getwd()
require.NoError(t, err)
p := file.Path(path.Join(cwd, test.path))
assert.Equal(t, true, tree.HasPath(p))
exists, ref, err := tree.File(p)
assert.Equal(t, true, exists)
if assert.NoError(t, err) {
return
}
entry, err := index.Get(*ref.Reference)
require.NoError(t, err)
assert.Equal(t, info.Mode(), entry.Mode)
})
}
}
// - Verify that directories are rejected
func Test_indexRejectsDirectory(t *testing.T) {
dirPath := "test-fixtures/system_paths/target/home"
indexer := newFileIndexer(dirPath, "", make([]PathIndexVisitor, 0)...)
_, _, err := indexer.build()
require.Error(t, err)
}
// - Verify ignores if filterAndIndex sets up a filter for the filepath
func Test_ignoresPathIfFiltered(t *testing.T) {
testPath := "test-fixtures/system_paths/target/home/place"
cwd, cwdErr := os.Getwd()
require.NoError(t, cwdErr)
ignorePath := path.Join(cwd, testPath)
filterFn := func(_, path string, _ os.FileInfo, _ error) error {
if path == ignorePath {
return ErrSkipPath
}
return nil
}
indexer := newFileIndexer(testPath, "", filterFn)
_, _, err := indexer.build()
require.Error(t, err)
}
// - Verify ignores if filterAndIndex sets up a filter for the directory
func Test_ignoresPathIfParentFiltered(t *testing.T) {
testPath := "test-fixtures/system_paths/target/home/place"
parentPath := "test-fixtures/system_paths/target/home"
cwd, cwdErr := os.Getwd()
require.NoError(t, cwdErr)
ignorePath := path.Join(cwd, parentPath)
filterFn := func(_, path string, _ os.FileInfo, _ error) error {
if path == ignorePath {
return fs.SkipDir
}
return nil
}
indexer := newFileIndexer(testPath, "", filterFn)
_, _, err := indexer.build()
require.Error(t, err)
}

View File

@ -0,0 +1,229 @@
package fileresolver
import (
"context"
"errors"
"fmt"
"io"
"os"
stereoscopeFile "github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/stereoscope/pkg/filetree"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/windows"
)
type filetreeResolver struct {
chroot ChrootContext
tree filetree.Reader
index filetree.IndexReader
searchContext filetree.Searcher
}
func (r *filetreeResolver) requestPath(userPath string) (string, error) {
return r.chroot.ToNativePath(userPath)
}
// responsePath takes a path from the underlying fs domain and converts it to a path that is relative to the root of the file resolver.
func (r filetreeResolver) responsePath(path string) string {
return r.chroot.ToChrootPath(path)
}
// HasPath indicates if the given path exists in the underlying source.
func (r *filetreeResolver) HasPath(userPath string) bool {
requestPath, err := r.requestPath(userPath)
if err != nil {
return false
}
return r.tree.HasPath(stereoscopeFile.Path(requestPath))
}
// FilesByPath returns all file.References that match the given paths from the file index.
func (r filetreeResolver) FilesByPath(userPaths ...string) ([]file.Location, error) {
var references = make([]file.Location, 0)
for _, userPath := range userPaths {
userStrPath, err := r.requestPath(userPath)
if err != nil {
log.Warnf("unable to get file by path=%q : %+v", userPath, err)
continue
}
// we should be resolving symlinks and preserving this information as a AccessPath to the real file
ref, err := r.searchContext.SearchByPath(userStrPath, filetree.FollowBasenameLinks)
if err != nil {
log.Tracef("unable to evaluate symlink for path=%q : %+v", userPath, err)
continue
}
if !ref.HasReference() {
continue
}
entry, err := r.index.Get(*ref.Reference)
if err != nil {
log.Warnf("unable to get file by path=%q : %+v", userPath, err)
continue
}
// don't consider directories
if entry.Metadata.IsDir() {
continue
}
if windows.HostRunningOnWindows() {
userStrPath = windows.ToPosix(userStrPath)
}
if ref.HasReference() {
references = append(references,
file.NewVirtualLocationFromDirectory(
r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root
r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root
*ref.Reference,
),
)
}
}
return references, nil
}
func (r filetreeResolver) requestGlob(pattern string) (string, error) {
return r.chroot.ToNativeGlob(pattern)
}
// FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image.
func (r filetreeResolver) FilesByGlob(patterns ...string) ([]file.Location, error) {
uniqueFileIDs := stereoscopeFile.NewFileReferenceSet()
uniqueLocations := make([]file.Location, 0)
for _, pattern := range patterns {
requestGlob, err := r.requestGlob(pattern)
if err != nil {
return nil, err
}
refVias, err := r.searchContext.SearchByGlob(requestGlob, filetree.FollowBasenameLinks)
if err != nil {
return nil, err
}
for _, refVia := range refVias {
if !refVia.HasReference() || uniqueFileIDs.Contains(*refVia.Reference) {
continue
}
entry, err := r.index.Get(*refVia.Reference)
if err != nil {
return nil, fmt.Errorf("unable to get file metadata for reference %s: %w", refVia.Reference.RealPath, err)
}
// don't consider directories
if entry.Metadata.IsDir() {
continue
}
loc := file.NewVirtualLocationFromDirectory(
r.responsePath(string(refVia.Reference.RealPath)), // the actual path relative to the resolver root
r.responsePath(string(refVia.RequestPath)), // the path used to access this file, relative to the resolver root
*refVia.Reference,
)
uniqueFileIDs.Add(*refVia.Reference)
uniqueLocations = append(uniqueLocations, loc)
}
}
return uniqueLocations, nil
}
// RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference.
// This is helpful when attempting to find a file that is in the same layer or lower as another file.
func (r *filetreeResolver) RelativeFileByPath(_ file.Location, path string) *file.Location {
paths, err := r.FilesByPath(path)
if err != nil {
return nil
}
if len(paths) == 0 {
return nil
}
return &paths[0]
}
// FileContentsByLocation fetches file contents for a single file reference relative to a directory.
// If the path does not exist an error is returned.
func (r filetreeResolver) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
if location.RealPath == "" {
return nil, errors.New("empty path given")
}
entry, err := r.index.Get(location.Reference())
if err != nil {
return nil, err
}
// don't consider directories
if entry.Type == stereoscopeFile.TypeDirectory {
return nil, fmt.Errorf("cannot read contents of non-file %q", location.Reference().RealPath)
}
// RealPath is posix so for windows file resolver we need to translate
// to its true on disk path.
filePath := string(location.Reference().RealPath)
if windows.HostRunningOnWindows() {
filePath = windows.FromPosix(filePath)
}
return stereoscopeFile.NewLazyReadCloser(filePath), nil
}
func (r *filetreeResolver) AllLocations(ctx context.Context) <-chan file.Location {
results := make(chan file.Location)
go func() {
defer close(results)
for _, ref := range r.tree.AllFiles(stereoscopeFile.AllTypes()...) {
select {
case <-ctx.Done():
return
case results <- file.NewLocationFromDirectory(r.responsePath(string(ref.RealPath)), ref):
continue
}
}
}()
return results
}
func (r *filetreeResolver) FileMetadataByLocation(location file.Location) (file.Metadata, error) {
entry, err := r.index.Get(location.Reference())
if err != nil {
return file.Metadata{}, fmt.Errorf("location: %+v : %w", location, os.ErrNotExist)
}
return entry.Metadata, nil
}
func (r *filetreeResolver) FilesByMIMEType(types ...string) ([]file.Location, error) {
uniqueFileIDs := stereoscopeFile.NewFileReferenceSet()
uniqueLocations := make([]file.Location, 0)
refVias, err := r.searchContext.SearchByMIMEType(types...)
if err != nil {
return nil, err
}
for _, refVia := range refVias {
if !refVia.HasReference() {
continue
}
if uniqueFileIDs.Contains(*refVia.Reference) {
continue
}
location := file.NewVirtualLocationFromDirectory(
r.responsePath(string(refVia.Reference.RealPath)),
r.responsePath(string(refVia.RequestPath)),
*refVia.Reference,
)
uniqueFileIDs.Add(*refVia.Reference)
uniqueLocations = append(uniqueLocations, location)
}
return uniqueLocations, nil
}

File diff suppressed because it is too large Load Diff

View File

@ -3,7 +3,6 @@ package filesource
import ( import (
"crypto" "crypto"
"fmt" "fmt"
"io/fs"
"os" "os"
"path" "path"
"path/filepath" "path/filepath"
@ -36,7 +35,7 @@ type fileSource struct {
id artifact.ID id artifact.ID
digestForVersion string digestForVersion string
config Config config Config
resolver *fileresolver.Directory resolver file.Resolver
mutex *sync.Mutex mutex *sync.Mutex
closer func() error closer func() error
digests []file.Digest digests []file.Digest
@ -165,48 +164,22 @@ func (s fileSource) FileResolver(_ source.Scope) (file.Resolver, error) {
return nil, err return nil, err
} }
var res *fileresolver.Directory
if isArchiveAnalysis { if isArchiveAnalysis {
// this is an analysis of an archive file... we should scan the directory where the archive contents // this is an analysis of an archive file... we should scan the directory where the archive contents
res, err = fileresolver.NewFromDirectory(s.analysisPath, "", exclusionFunctions...) res, err := fileresolver.NewFromDirectory(s.analysisPath, "", exclusionFunctions...)
if err != nil {
return nil, fmt.Errorf("unable to create directory resolver: %w", err)
}
} else {
// this is an analysis of a single file. We want to ultimately scan the directory that the file is in, but we
// don't want to include any other files except this the given file.
exclusionFunctions = append([]fileresolver.PathIndexVisitor{
// note: we should exclude these kinds of paths first before considering any other user-provided exclusions
func(_, p string, _ os.FileInfo, _ error) error {
if p == absParentDir {
// this is the root directory... always include it
return nil
}
if filepath.Dir(p) != absParentDir {
// we are no longer in the root directory containing the single file we want to scan...
// we should skip the directory this path resides in entirely!
return fs.SkipDir
}
if filepath.Base(p) != filepath.Base(s.config.Path) {
// we're in the root directory, but this is not the file we want to scan...
// we should selectively skip this file (not the directory we're in).
return fileresolver.ErrSkipPath
}
return nil
},
}, exclusionFunctions...)
res, err = fileresolver.NewFromDirectory(absParentDir, absParentDir, exclusionFunctions...)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to create directory resolver: %w", err) return nil, fmt.Errorf("unable to create directory resolver: %w", err)
} }
s.resolver = res
return s.resolver, nil
} }
// This is analysis of a single file. Use file indexer.
res, err := fileresolver.NewFromFile(absParentDir, s.analysisPath, exclusionFunctions...)
if err != nil {
return nil, fmt.Errorf("unable to create file resolver: %w", err)
}
s.resolver = res s.resolver = res
return s.resolver, nil return s.resolver, nil
} }