mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 16:33:21 +01:00
Signed-off-by: Joseph Palermo <jpalermo@vmware.com> Signed-off-by: Chris Selzo <cselzo@vmware.com> Co-authored-by: Joseph Palermo <jpalermo@vmware.com>
486 lines
13 KiB
Go
486 lines
13 KiB
Go
package fileresolver
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"io/fs"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/wagoodman/go-partybus"
|
|
"github.com/wagoodman/go-progress"
|
|
|
|
"github.com/anchore/stereoscope/pkg/file"
|
|
"github.com/anchore/stereoscope/pkg/filetree"
|
|
"github.com/anchore/syft/internal"
|
|
"github.com/anchore/syft/internal/bus"
|
|
"github.com/anchore/syft/internal/log"
|
|
"github.com/anchore/syft/syft/event"
|
|
"github.com/anchore/syft/syft/internal/windows"
|
|
)
|
|
|
|
type PathIndexVisitor func(string, os.FileInfo, error) error
|
|
|
|
type directoryIndexer struct {
|
|
path string
|
|
base string
|
|
pathIndexVisitors []PathIndexVisitor
|
|
errPaths map[string]error
|
|
tree filetree.ReadWriter
|
|
index filetree.Index
|
|
}
|
|
|
|
func newDirectoryIndexer(path, base string, visitors ...PathIndexVisitor) *directoryIndexer {
|
|
i := &directoryIndexer{
|
|
path: path,
|
|
base: base,
|
|
tree: filetree.New(),
|
|
index: filetree.NewIndex(),
|
|
pathIndexVisitors: append([]PathIndexVisitor{requireFileInfo, disallowByFileType, disallowUnixSystemRuntimePath}, visitors...),
|
|
errPaths: make(map[string]error),
|
|
}
|
|
|
|
// these additional stateful visitors should be the first thing considered when walking / indexing
|
|
i.pathIndexVisitors = append(
|
|
[]PathIndexVisitor{
|
|
i.disallowRevisitingVisitor,
|
|
i.disallowFileAccessErr,
|
|
},
|
|
i.pathIndexVisitors...,
|
|
)
|
|
|
|
return i
|
|
}
|
|
|
|
func (r *directoryIndexer) build() (filetree.Reader, filetree.IndexReader, error) {
|
|
return r.tree, r.index, indexAllRoots(r.path, r.indexTree)
|
|
}
|
|
|
|
func indexAllRoots(root string, indexer func(string, *progress.Stage) ([]string, error)) error {
|
|
// why account for multiple roots? To cover cases when there is a symlink that references above the root path,
|
|
// in which case we need to additionally index where the link resolves to. it's for this reason why the filetree
|
|
// must be relative to the root of the filesystem (and not just relative to the given path).
|
|
pathsToIndex := []string{root}
|
|
fullPathsMap := map[string]struct{}{}
|
|
|
|
stager, prog := indexingProgress(root)
|
|
defer prog.SetCompleted()
|
|
loop:
|
|
for {
|
|
var currentPath string
|
|
switch len(pathsToIndex) {
|
|
case 0:
|
|
break loop
|
|
case 1:
|
|
currentPath, pathsToIndex = pathsToIndex[0], nil
|
|
default:
|
|
currentPath, pathsToIndex = pathsToIndex[0], pathsToIndex[1:]
|
|
}
|
|
|
|
additionalRoots, err := indexer(currentPath, stager)
|
|
if err != nil {
|
|
return fmt.Errorf("unable to index filesystem path=%q: %w", currentPath, err)
|
|
}
|
|
|
|
for _, newRoot := range additionalRoots {
|
|
if _, ok := fullPathsMap[newRoot]; !ok {
|
|
fullPathsMap[newRoot] = struct{}{}
|
|
pathsToIndex = append(pathsToIndex, newRoot)
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (r *directoryIndexer) indexTree(root string, stager *progress.Stage) ([]string, error) {
|
|
log.WithFields("path", root).Trace("indexing filetree")
|
|
|
|
var roots []string
|
|
var err error
|
|
|
|
root, err = filepath.Abs(root)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// we want to be able to index single files with the directory resolver. However, we should also allow for attempting
|
|
// to index paths that do not exist (that is, a root that does not exist is not an error case that should stop indexing).
|
|
// For this reason we look for an opportunity to discover if the given root is a file, and if so add a single root,
|
|
// but continue forth with index regardless if the given root path exists or not.
|
|
fi, err := os.Stat(root)
|
|
if err != nil && fi != nil && !fi.IsDir() {
|
|
// note: we want to index the path regardless of an error stat-ing the path
|
|
newRoot, _ := r.indexPath(root, fi, nil)
|
|
if newRoot != "" {
|
|
roots = append(roots, newRoot)
|
|
}
|
|
return roots, nil
|
|
}
|
|
|
|
shouldIndexFullTree, err := isRealPath(root)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if !shouldIndexFullTree {
|
|
newRoots, err := r.indexBranch(root, stager)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unable to index branch=%q: %w", root, err)
|
|
}
|
|
|
|
roots = append(roots, newRoots...)
|
|
|
|
return roots, nil
|
|
}
|
|
|
|
err = filepath.Walk(root,
|
|
func(path string, info os.FileInfo, err error) error {
|
|
stager.Current = path
|
|
|
|
newRoot, err := r.indexPath(path, info, err)
|
|
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if newRoot != "" {
|
|
roots = append(roots, newRoot)
|
|
}
|
|
|
|
return nil
|
|
})
|
|
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unable to index root=%q: %w", root, err)
|
|
}
|
|
|
|
return roots, nil
|
|
}
|
|
|
|
func isRealPath(root string) (bool, error) {
|
|
rootParent := filepath.Clean(filepath.Dir(root))
|
|
|
|
realRootParent, err := filepath.EvalSymlinks(rootParent)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
realRootParent = filepath.Clean(realRootParent)
|
|
|
|
return rootParent == realRootParent, nil
|
|
}
|
|
|
|
func (r *directoryIndexer) indexBranch(root string, stager *progress.Stage) ([]string, error) {
|
|
rootRealPath, err := filepath.EvalSymlinks(root)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// there is a symlink within the path to the root, we need to index the real root parent first
|
|
// then capture the symlinks to the root path
|
|
roots, err := r.indexTree(rootRealPath, stager)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unable to index real root=%q: %w", rootRealPath, err)
|
|
}
|
|
|
|
// walk down all ancestor paths and shallow-add non-existing elements to the tree
|
|
for idx, p := range allContainedPaths(root) {
|
|
var targetPath string
|
|
if idx != 0 {
|
|
parent := path.Dir(p)
|
|
cleanParent, err := filepath.EvalSymlinks(parent)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unable to evaluate symlink for contained path parent=%q: %w", parent, err)
|
|
}
|
|
targetPath = filepath.Join(cleanParent, filepath.Base(p))
|
|
} else {
|
|
targetPath = p
|
|
}
|
|
|
|
stager.Current = targetPath
|
|
|
|
lstat, err := os.Lstat(targetPath)
|
|
newRoot, err := r.indexPath(targetPath, lstat, err)
|
|
if err != nil && !errors.Is(err, ErrSkipPath) && !errors.Is(err, fs.SkipDir) {
|
|
return nil, fmt.Errorf("unable to index ancestor path=%q: %w", targetPath, err)
|
|
}
|
|
if newRoot != "" {
|
|
roots = append(roots, newRoot)
|
|
}
|
|
}
|
|
|
|
return roots, nil
|
|
}
|
|
|
|
func allContainedPaths(p string) []string {
|
|
var all []string
|
|
var currentPath string
|
|
|
|
cleanPath := strings.TrimSpace(p)
|
|
|
|
if cleanPath == "" {
|
|
return nil
|
|
}
|
|
|
|
// iterate through all parts of the path, replacing path elements with link resolutions where possible.
|
|
for idx, part := range strings.Split(filepath.Clean(cleanPath), file.DirSeparator) {
|
|
if idx == 0 && part == "" {
|
|
currentPath = file.DirSeparator
|
|
continue
|
|
}
|
|
|
|
// cumulatively gather where we are currently at and provide a rich object
|
|
currentPath = path.Join(currentPath, part)
|
|
all = append(all, currentPath)
|
|
}
|
|
return all
|
|
}
|
|
|
|
func (r *directoryIndexer) indexPath(path string, info os.FileInfo, err error) (string, error) {
|
|
// ignore any path which a filter function returns true
|
|
for _, filterFn := range r.pathIndexVisitors {
|
|
if filterFn == nil {
|
|
continue
|
|
}
|
|
|
|
if filterErr := filterFn(path, info, err); filterErr != nil {
|
|
if errors.Is(filterErr, fs.SkipDir) {
|
|
// signal to walk() to skip this directory entirely (even if we're processing a file)
|
|
return "", filterErr
|
|
}
|
|
// skip this path but don't affect walk() trajectory
|
|
return "", nil
|
|
}
|
|
}
|
|
|
|
if info == nil {
|
|
// walk may not be able to provide a FileInfo object, don't allow for this to stop indexing; keep track of the paths and continue.
|
|
r.errPaths[path] = fmt.Errorf("no file info observable at path=%q", path)
|
|
return "", nil
|
|
}
|
|
|
|
// here we check to see if we need to normalize paths to posix on the way in coming from windows
|
|
if windows.HostRunningOnWindows() {
|
|
path = windows.ToPosix(path)
|
|
}
|
|
|
|
newRoot, err := r.addPathToIndex(path, info)
|
|
if r.isFileAccessErr(path, err) {
|
|
return "", nil
|
|
}
|
|
|
|
return newRoot, nil
|
|
}
|
|
|
|
func (r *directoryIndexer) disallowFileAccessErr(path string, _ os.FileInfo, err error) error {
|
|
if r.isFileAccessErr(path, err) {
|
|
return ErrSkipPath
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (r *directoryIndexer) isFileAccessErr(path string, err error) bool {
|
|
// don't allow for errors to stop indexing, keep track of the paths and continue.
|
|
if err != nil {
|
|
log.Warnf("unable to access path=%q: %+v", path, err)
|
|
r.errPaths[path] = err
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (r directoryIndexer) addPathToIndex(p string, info os.FileInfo) (string, error) {
|
|
switch t := file.TypeFromMode(info.Mode()); t {
|
|
case file.TypeSymLink:
|
|
return r.addSymlinkToIndex(p, info)
|
|
case file.TypeDirectory:
|
|
return "", r.addDirectoryToIndex(p, info)
|
|
case file.TypeRegular:
|
|
return "", r.addFileToIndex(p, info)
|
|
default:
|
|
return "", fmt.Errorf("unsupported file type: %s", t)
|
|
}
|
|
}
|
|
|
|
func (r directoryIndexer) addDirectoryToIndex(p string, info os.FileInfo) error {
|
|
ref, err := r.tree.AddDir(file.Path(p))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
metadata := file.NewMetadataFromPath(p, info)
|
|
r.index.Add(*ref, metadata)
|
|
|
|
return nil
|
|
}
|
|
|
|
func (r directoryIndexer) addFileToIndex(p string, info os.FileInfo) error {
|
|
ref, err := r.tree.AddFile(file.Path(p))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
metadata := file.NewMetadataFromPath(p, info)
|
|
r.index.Add(*ref, metadata)
|
|
|
|
return nil
|
|
}
|
|
|
|
func (r directoryIndexer) addSymlinkToIndex(p string, info os.FileInfo) (string, error) {
|
|
linkTarget, err := os.Readlink(p)
|
|
if err != nil {
|
|
isOnWindows := windows.HostRunningOnWindows()
|
|
if isOnWindows {
|
|
p = windows.FromPosix(p)
|
|
}
|
|
|
|
linkTarget, err = filepath.EvalSymlinks(p)
|
|
|
|
if isOnWindows {
|
|
p = windows.ToPosix(p)
|
|
}
|
|
|
|
if err != nil {
|
|
return "", fmt.Errorf("unable to readlink for path=%q: %w", p, err)
|
|
}
|
|
}
|
|
|
|
if filepath.IsAbs(linkTarget) {
|
|
linkTarget = filepath.Clean(linkTarget)
|
|
// if the link is absolute (e.g, /bin/ls -> /bin/busybox) we need to
|
|
// resolve relative to the root of the base directory, if it is not already
|
|
// prefixed with a volume name
|
|
if filepath.VolumeName(linkTarget) == "" {
|
|
linkTarget = filepath.Join(r.base, filepath.Clean(linkTarget))
|
|
}
|
|
} else {
|
|
// if the link is not absolute (e.g, /dev/stderr -> fd/2 ) we need to
|
|
// resolve it relative to the directory in question (e.g. resolve to
|
|
// /dev/fd/2)
|
|
if r.base == "" {
|
|
linkTarget = filepath.Join(filepath.Dir(p), linkTarget)
|
|
} else {
|
|
// if the base is set, then we first need to resolve the link,
|
|
// before finding it's location in the base
|
|
dir, err := filepath.Rel(r.base, filepath.Dir(p))
|
|
if err != nil {
|
|
return "", fmt.Errorf("unable to resolve relative path for path=%q: %w", p, err)
|
|
}
|
|
linkTarget = filepath.Join(r.base, filepath.Clean(filepath.Join("/", dir, linkTarget)))
|
|
}
|
|
}
|
|
|
|
ref, err := r.tree.AddSymLink(file.Path(p), file.Path(linkTarget))
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
targetAbsPath := linkTarget
|
|
if !filepath.IsAbs(targetAbsPath) {
|
|
targetAbsPath = filepath.Clean(filepath.Join(path.Dir(p), linkTarget))
|
|
}
|
|
|
|
metadata := file.NewMetadataFromPath(p, info)
|
|
metadata.LinkDestination = linkTarget
|
|
r.index.Add(*ref, metadata)
|
|
|
|
// if the target path does not exist, then do not report it as a new root, or try to send
|
|
// syft parsing there.
|
|
if _, err := os.Stat(targetAbsPath); err != nil && errors.Is(err, os.ErrNotExist) {
|
|
log.Debugf("link %s points to unresolved path %s, ignoring target as new root", p, targetAbsPath)
|
|
targetAbsPath = ""
|
|
}
|
|
|
|
return targetAbsPath, nil
|
|
}
|
|
|
|
func (r directoryIndexer) hasBeenIndexed(p string) (bool, *file.Metadata) {
|
|
filePath := file.Path(p)
|
|
if !r.tree.HasPath(filePath) {
|
|
return false, nil
|
|
}
|
|
|
|
exists, ref, err := r.tree.File(filePath)
|
|
if err != nil || !exists || !ref.HasReference() {
|
|
return false, nil
|
|
}
|
|
|
|
// cases like "/" will be in the tree, but not been indexed yet (a special case). We want to capture
|
|
// these cases as new paths to index.
|
|
if !ref.HasReference() {
|
|
return false, nil
|
|
}
|
|
|
|
entry, err := r.index.Get(*ref.Reference)
|
|
if err != nil {
|
|
return false, nil
|
|
}
|
|
|
|
return true, &entry.Metadata
|
|
}
|
|
|
|
func (r *directoryIndexer) disallowRevisitingVisitor(path string, _ os.FileInfo, _ error) error {
|
|
// this prevents visiting:
|
|
// - link destinations twice, once for the real file and another through the virtual path
|
|
// - infinite link cycles
|
|
if indexed, metadata := r.hasBeenIndexed(path); indexed {
|
|
if metadata.IsDir() {
|
|
// signal to walk() that we should skip this directory entirely
|
|
return fs.SkipDir
|
|
}
|
|
return ErrSkipPath
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func disallowUnixSystemRuntimePath(path string, _ os.FileInfo, _ error) error {
|
|
if internal.HasAnyOfPrefixes(path, unixSystemRuntimePrefixes...) {
|
|
return fs.SkipDir
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func disallowByFileType(_ string, info os.FileInfo, _ error) error {
|
|
if info == nil {
|
|
// we can't filter out by filetype for non-existent files
|
|
return nil
|
|
}
|
|
switch file.TypeFromMode(info.Mode()) {
|
|
case file.TypeCharacterDevice, file.TypeSocket, file.TypeBlockDevice, file.TypeFIFO, file.TypeIrregular:
|
|
return ErrSkipPath
|
|
// note: symlinks that point to these files may still get by.
|
|
// We handle this later in processing to help prevent against infinite links traversal.
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func requireFileInfo(_ string, info os.FileInfo, _ error) error {
|
|
if info == nil {
|
|
return ErrSkipPath
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func indexingProgress(path string) (*progress.Stage, *progress.Manual) {
|
|
stage := &progress.Stage{}
|
|
prog := progress.NewManual(-1)
|
|
|
|
bus.Publish(partybus.Event{
|
|
Type: event.FileIndexingStarted,
|
|
Source: path,
|
|
Value: struct {
|
|
progress.Stager
|
|
progress.Progressable
|
|
}{
|
|
Stager: progress.Stager(stage),
|
|
Progressable: prog,
|
|
},
|
|
})
|
|
|
|
return stage, prog
|
|
}
|