Allow scanning unpacked container filesystems (#1485)

* source: avoid second-step of symlink resolution in directory resolver

We can use the already existing file tree to peform symlink resolution
for FilesByPath, instead of traversing the symlinks again.

This moves all of the symlink logic into the indexing code, and then we
can rely on syft's resolution algorithm over the index in this part of
the codebase.

Signed-off-by: Justin Chadwell <me@jedevc.com>

* source: add base parameter to directory resolver

The new base parameter is an optional parameter for the directory
resolver that resolves all symlinks relative to this root. There are two
intended use cases:

- base = "/". The previous behavior, symlinks are resolved relative to
the root filesystem.
- base = path. Symlinks are resolved relative to the target filesystem,
allowing correct behavior when scanning unpacked container filesystems
on disk.

Signed-off-by: Justin Chadwell <me@jedevc.com>

* source: add tests for new base parameter

Signed-off-by: Justin Chadwell <me@jedevc.com>

---------

Signed-off-by: Justin Chadwell <me@jedevc.com>
This commit is contained in:
Justin Chadwell 2023-01-30 18:47:24 +00:00 committed by GitHub
parent ba55963104
commit b81c9805dc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 169 additions and 38 deletions

View File

@ -37,6 +37,7 @@ type pathFilterFn func(string, os.FileInfo) bool
// directoryResolver implements path and content access for the directory data source. // directoryResolver implements path and content access for the directory data source.
type directoryResolver struct { type directoryResolver struct {
path string path string
base string
currentWdRelativeToRoot string currentWdRelativeToRoot string
currentWd string currentWd string
fileTree *filetree.FileTree fileTree *filetree.FileTree
@ -47,7 +48,7 @@ type directoryResolver struct {
errPaths map[string]error errPaths map[string]error
} }
func newDirectoryResolver(root string, pathFilters ...pathFilterFn) (*directoryResolver, error) { func newDirectoryResolver(root string, base string, pathFilters ...pathFilterFn) (*directoryResolver, error) {
currentWD, err := os.Getwd() currentWD, err := os.Getwd()
if err != nil { if err != nil {
return nil, fmt.Errorf("could not get CWD: %w", err) return nil, fmt.Errorf("could not get CWD: %w", err)
@ -64,6 +65,18 @@ func newDirectoryResolver(root string, pathFilters ...pathFilterFn) (*directoryR
return nil, fmt.Errorf("could not evaluate root=%q symlinks: %w", root, err) return nil, fmt.Errorf("could not evaluate root=%q symlinks: %w", root, err)
} }
cleanBase := ""
if base != "" {
cleanBase, err = filepath.EvalSymlinks(base)
if err != nil {
return nil, fmt.Errorf("could not evaluate base=%q symlinks: %w", base, err)
}
cleanBase, err = filepath.Abs(cleanBase)
if err != nil {
return nil, err
}
}
var currentWdRelRoot string var currentWdRelRoot string
if path.IsAbs(cleanRoot) { if path.IsAbs(cleanRoot) {
currentWdRelRoot, err = filepath.Rel(cleanCWD, cleanRoot) currentWdRelRoot, err = filepath.Rel(cleanCWD, cleanRoot)
@ -76,6 +89,7 @@ func newDirectoryResolver(root string, pathFilters ...pathFilterFn) (*directoryR
resolver := directoryResolver{ resolver := directoryResolver{
path: cleanRoot, path: cleanRoot,
base: cleanBase,
currentWd: cleanCWD, currentWd: cleanCWD,
currentWdRelativeToRoot: currentWdRelRoot, currentWdRelativeToRoot: currentWdRelRoot,
fileTree: filetree.NewFileTree(), fileTree: filetree.NewFileTree(),
@ -244,10 +258,25 @@ func (r directoryResolver) addSymlinkToIndex(p string, info os.FileInfo) (string
return "", fmt.Errorf("unable to readlink for path=%q: %w", p, err) return "", fmt.Errorf("unable to readlink for path=%q: %w", p, err)
} }
// note: if the link is not absolute (e.g, /dev/stderr -> fd/2 ) we need to resolve it relative to the directory if filepath.IsAbs(linkTarget) {
// in question (e.g. resolve to /dev/fd/2) // if the link is absolute (e.g, /bin/ls -> /bin/busybox) we need to
if !filepath.IsAbs(linkTarget) { // resolve relative to the root of the base directory
linkTarget = filepath.Join(r.base, filepath.Clean(linkTarget))
} else {
// if the link is not absolute (e.g, /dev/stderr -> fd/2 ) we need to
// resolve it relative to the directory in question (e.g. resolve to
// /dev/fd/2)
if r.base == "" {
linkTarget = filepath.Join(filepath.Dir(p), linkTarget) linkTarget = filepath.Join(filepath.Dir(p), linkTarget)
} else {
// if the base is set, then we first need to resolve the link,
// before finding it's location in the base
dir, err := filepath.Rel(r.base, filepath.Dir(p))
if err != nil {
return "", fmt.Errorf("unable to resolve relative path for path=%q: %w", p, err)
}
linkTarget = filepath.Join(r.base, filepath.Clean(filepath.Join("/", dir, linkTarget)))
}
} }
ref, err := r.fileTree.AddSymLink(file.Path(p), file.Path(linkTarget)) ref, err := r.fileTree.AddSymLink(file.Path(p), file.Path(linkTarget))
@ -336,14 +365,17 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error)
} }
// we should be resolving symlinks and preserving this information as a VirtualPath to the real file // we should be resolving symlinks and preserving this information as a VirtualPath to the real file
evaluatedPath, err := filepath.EvalSymlinks(userStrPath) exists, ref, err := r.fileTree.File(file.Path(userStrPath), filetree.FollowBasenameLinks)
if err != nil { if err != nil {
log.Tracef("unable to evaluate symlink for path=%q : %+v", userPath, err) log.Tracef("unable to evaluate symlink for path=%q : %+v", userPath, err)
continue continue
} }
if !exists {
continue
}
// TODO: why not use stored metadata? // TODO: why not use stored metadata?
fileMeta, err := os.Stat(evaluatedPath) fileMeta, err := os.Stat(string(ref.RealPath))
if errors.Is(err, os.ErrNotExist) { if errors.Is(err, os.ErrNotExist) {
// note: there are other kinds of errors other than os.ErrNotExist that may be given that is platform // note: there are other kinds of errors other than os.ErrNotExist that may be given that is platform
// specific, but essentially hints at the same overall problem (that the path does not exist). Such an // specific, but essentially hints at the same overall problem (that the path does not exist). Such an
@ -354,7 +386,7 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error)
// invalid paths. This logging statement is meant to raise IO or permissions related problems. // invalid paths. This logging statement is meant to raise IO or permissions related problems.
var pathErr *os.PathError var pathErr *os.PathError
if !errors.As(err, &pathErr) { if !errors.As(err, &pathErr) {
log.Warnf("path is not valid (%s): %+v", evaluatedPath, err) log.Warnf("path is not valid (%s): %+v", ref.RealPath, err)
} }
continue continue
} }
@ -368,8 +400,6 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error)
userStrPath = windowsToPosix(userStrPath) userStrPath = windowsToPosix(userStrPath)
} }
exists, ref, err := r.fileTree.File(file.Path(userStrPath), filetree.FollowBasenameLinks)
if err == nil && exists {
loc := NewVirtualLocationFromDirectory( loc := NewVirtualLocationFromDirectory(
r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root
r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root
@ -377,7 +407,6 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error)
) )
references = append(references, loc) references = append(references, loc)
} }
}
return references, nil return references, nil
} }

View File

@ -57,7 +57,7 @@ func TestDirectoryResolver_FilesByPath_relativeRoot(t *testing.T) {
} }
for _, c := range cases { for _, c := range cases {
t.Run(c.name, func(t *testing.T) { t.Run(c.name, func(t *testing.T) {
resolver, err := newDirectoryResolver(c.relativeRoot) resolver, err := newDirectoryResolver(c.relativeRoot, "")
assert.NoError(t, err) assert.NoError(t, err)
refs, err := resolver.FilesByPath(c.input) refs, err := resolver.FilesByPath(c.input)
@ -111,7 +111,7 @@ func TestDirectoryResolver_FilesByPath_absoluteRoot(t *testing.T) {
absRoot, err := filepath.Abs(c.relativeRoot) absRoot, err := filepath.Abs(c.relativeRoot)
require.NoError(t, err) require.NoError(t, err)
resolver, err := newDirectoryResolver(absRoot) resolver, err := newDirectoryResolver(absRoot, "")
assert.NoError(t, err) assert.NoError(t, err)
refs, err := resolver.FilesByPath(c.input) refs, err := resolver.FilesByPath(c.input)
@ -172,7 +172,7 @@ func TestDirectoryResolver_FilesByPath(t *testing.T) {
} }
for _, c := range cases { for _, c := range cases {
t.Run(c.name, func(t *testing.T) { t.Run(c.name, func(t *testing.T) {
resolver, err := newDirectoryResolver(c.root) resolver, err := newDirectoryResolver(c.root, "")
assert.NoError(t, err) assert.NoError(t, err)
hasPath := resolver.HasPath(c.input) hasPath := resolver.HasPath(c.input)
@ -220,7 +220,7 @@ func TestDirectoryResolver_MultipleFilesByPath(t *testing.T) {
} }
for _, c := range cases { for _, c := range cases {
t.Run(c.name, func(t *testing.T) { t.Run(c.name, func(t *testing.T) {
resolver, err := newDirectoryResolver("./test-fixtures") resolver, err := newDirectoryResolver("./test-fixtures", "")
assert.NoError(t, err) assert.NoError(t, err)
refs, err := resolver.FilesByPath(c.input...) refs, err := resolver.FilesByPath(c.input...)
assert.NoError(t, err) assert.NoError(t, err)
@ -233,7 +233,7 @@ func TestDirectoryResolver_MultipleFilesByPath(t *testing.T) {
} }
func TestDirectoryResolver_FilesByGlobMultiple(t *testing.T) { func TestDirectoryResolver_FilesByGlobMultiple(t *testing.T) {
resolver, err := newDirectoryResolver("./test-fixtures") resolver, err := newDirectoryResolver("./test-fixtures", "")
assert.NoError(t, err) assert.NoError(t, err)
refs, err := resolver.FilesByGlob("**/image-symlinks/file*") refs, err := resolver.FilesByGlob("**/image-symlinks/file*")
assert.NoError(t, err) assert.NoError(t, err)
@ -242,7 +242,7 @@ func TestDirectoryResolver_FilesByGlobMultiple(t *testing.T) {
} }
func TestDirectoryResolver_FilesByGlobRecursive(t *testing.T) { func TestDirectoryResolver_FilesByGlobRecursive(t *testing.T) {
resolver, err := newDirectoryResolver("./test-fixtures/image-symlinks") resolver, err := newDirectoryResolver("./test-fixtures/image-symlinks", "")
assert.NoError(t, err) assert.NoError(t, err)
refs, err := resolver.FilesByGlob("**/*.txt") refs, err := resolver.FilesByGlob("**/*.txt")
assert.NoError(t, err) assert.NoError(t, err)
@ -250,7 +250,7 @@ func TestDirectoryResolver_FilesByGlobRecursive(t *testing.T) {
} }
func TestDirectoryResolver_FilesByGlobSingle(t *testing.T) { func TestDirectoryResolver_FilesByGlobSingle(t *testing.T) {
resolver, err := newDirectoryResolver("./test-fixtures") resolver, err := newDirectoryResolver("./test-fixtures", "")
assert.NoError(t, err) assert.NoError(t, err)
refs, err := resolver.FilesByGlob("**/image-symlinks/*1.txt") refs, err := resolver.FilesByGlob("**/image-symlinks/*1.txt")
assert.NoError(t, err) assert.NoError(t, err)
@ -277,7 +277,7 @@ func TestDirectoryResolver_FilesByPath_ResolvesSymlinks(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple") resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple", "")
assert.NoError(t, err) assert.NoError(t, err)
refs, err := resolver.FilesByPath(test.fixture) refs, err := resolver.FilesByPath(test.fixture)
@ -300,7 +300,7 @@ func TestDirectoryResolver_FilesByPath_ResolvesSymlinks(t *testing.T) {
func TestDirectoryResolverDoesNotIgnoreRelativeSystemPaths(t *testing.T) { func TestDirectoryResolverDoesNotIgnoreRelativeSystemPaths(t *testing.T) {
// let's make certain that "dev/place" is not ignored, since it is not "/dev/place" // let's make certain that "dev/place" is not ignored, since it is not "/dev/place"
resolver, err := newDirectoryResolver("test-fixtures/system_paths/target") resolver, err := newDirectoryResolver("test-fixtures/system_paths/target", "")
assert.NoError(t, err) assert.NoError(t, err)
// ensure the correct filter function is wired up by default // ensure the correct filter function is wired up by default
expectedFn := reflect.ValueOf(isUnallowableFileType) expectedFn := reflect.ValueOf(isUnallowableFileType)
@ -431,7 +431,7 @@ func Test_isUnallowableFileType(t *testing.T) {
func Test_directoryResolver_index(t *testing.T) { func Test_directoryResolver_index(t *testing.T) {
// note: this test is testing the effects from newDirectoryResolver, indexTree, and addPathToIndex // note: this test is testing the effects from newDirectoryResolver, indexTree, and addPathToIndex
r, err := newDirectoryResolver("test-fixtures/system_paths/target") r, err := newDirectoryResolver("test-fixtures/system_paths/target", "")
if err != nil { if err != nil {
t.Fatalf("unable to get indexed dir resolver: %+v", err) t.Fatalf("unable to get indexed dir resolver: %+v", err)
} }
@ -608,7 +608,7 @@ func Test_directoryResolver_FilesByMIMEType(t *testing.T) {
} }
for _, test := range tests { for _, test := range tests {
t.Run(test.fixturePath, func(t *testing.T) { t.Run(test.fixturePath, func(t *testing.T) {
resolver, err := newDirectoryResolver(test.fixturePath) resolver, err := newDirectoryResolver(test.fixturePath, "")
assert.NoError(t, err) assert.NoError(t, err)
locations, err := resolver.FilesByMIMEType(test.mimeType) locations, err := resolver.FilesByMIMEType(test.mimeType)
assert.NoError(t, err) assert.NoError(t, err)
@ -621,7 +621,7 @@ func Test_directoryResolver_FilesByMIMEType(t *testing.T) {
} }
func Test_IndexingNestedSymLinks(t *testing.T) { func Test_IndexingNestedSymLinks(t *testing.T) {
resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple") resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple", "")
require.NoError(t, err) require.NoError(t, err)
// check that we can get the real path // check that we can get the real path
@ -674,7 +674,7 @@ func Test_IndexingNestedSymLinks_ignoredIndexes(t *testing.T) {
return strings.HasSuffix(path, string(filepath.Separator)+"readme") return strings.HasSuffix(path, string(filepath.Separator)+"readme")
} }
resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple", filterFn) resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple", "", filterFn)
require.NoError(t, err) require.NoError(t, err)
// the path to the real file is PRUNED from the index, so we should NOT expect a location returned // the path to the real file is PRUNED from the index, so we should NOT expect a location returned
@ -694,7 +694,7 @@ func Test_IndexingNestedSymLinks_ignoredIndexes(t *testing.T) {
} }
func Test_IndexingNestedSymLinksOutsideOfRoot(t *testing.T) { func Test_IndexingNestedSymLinksOutsideOfRoot(t *testing.T) {
resolver, err := newDirectoryResolver("./test-fixtures/symlinks-multiple-roots/root") resolver, err := newDirectoryResolver("./test-fixtures/symlinks-multiple-roots/root", "")
require.NoError(t, err) require.NoError(t, err)
// check that we can get the real path // check that we can get the real path
@ -712,7 +712,7 @@ func Test_IndexingNestedSymLinksOutsideOfRoot(t *testing.T) {
} }
func Test_RootViaSymlink(t *testing.T) { func Test_RootViaSymlink(t *testing.T) {
resolver, err := newDirectoryResolver("./test-fixtures/symlinked-root/nested/link-root") resolver, err := newDirectoryResolver("./test-fixtures/symlinked-root/nested/link-root", "")
require.NoError(t, err) require.NoError(t, err)
locations, err := resolver.FilesByPath("./file1.txt") locations, err := resolver.FilesByPath("./file1.txt")
@ -753,7 +753,7 @@ func Test_directoryResolver_FileContentsByLocation(t *testing.T) {
} }
for _, test := range tests { for _, test := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
r, err := newDirectoryResolver(".") r, err := newDirectoryResolver(".", "")
require.NoError(t, err) require.NoError(t, err)
actual, err := r.FileContentsByLocation(test.location) actual, err := r.FileContentsByLocation(test.location)
@ -819,7 +819,7 @@ func Test_isUnixSystemRuntimePath(t *testing.T) {
func Test_SymlinkLoopWithGlobsShouldResolve(t *testing.T) { func Test_SymlinkLoopWithGlobsShouldResolve(t *testing.T) {
test := func(t *testing.T) { test := func(t *testing.T) {
resolver, err := newDirectoryResolver("./test-fixtures/symlinks-loop") resolver, err := newDirectoryResolver("./test-fixtures/symlinks-loop", "")
require.NoError(t, err) require.NoError(t, err)
locations, err := resolver.FilesByGlob("**/file.target") locations, err := resolver.FilesByGlob("**/file.target")
@ -853,7 +853,7 @@ func Test_IncludeRootPathInIndex(t *testing.T) {
return path != "/" return path != "/"
} }
resolver, err := newDirectoryResolver("/", filterFn) resolver, err := newDirectoryResolver("/", "", filterFn)
require.NoError(t, err) require.NoError(t, err)
exists, ref, err := resolver.fileTree.File(file.Path("/")) exists, ref, err := resolver.fileTree.File(file.Path("/"))
@ -870,7 +870,7 @@ func TestDirectoryResolver_indexPath(t *testing.T) {
tempFile, err := os.CreateTemp("", "") tempFile, err := os.CreateTemp("", "")
require.NoError(t, err) require.NoError(t, err)
resolver, err := newDirectoryResolver(tempFile.Name()) resolver, err := newDirectoryResolver(tempFile.Name(), "")
require.NoError(t, err) require.NoError(t, err)
t.Run("filtering path with nil os.FileInfo", func(t *testing.T) { t.Run("filtering path with nil os.FileInfo", func(t *testing.T) {
@ -885,3 +885,76 @@ func TestDirectoryResolver_indexPath(t *testing.T) {
}) })
}) })
} }
func TestDirectoryResolver_FilesByPath_baseRoot(t *testing.T) {
cases := []struct {
name string
root string
input string
expected []string
}{
{
name: "should find the base file",
root: "./test-fixtures/symlinks-base/",
input: "./base",
expected: []string{
"base",
},
},
{
name: "should follow a link with a pivoted root",
root: "./test-fixtures/symlinks-base/",
input: "./foo",
expected: []string{
"base",
},
},
{
name: "should follow a relative link with extra parents",
root: "./test-fixtures/symlinks-base/",
input: "./bar",
expected: []string{
"base",
},
},
{
name: "should follow an absolute link with extra parents",
root: "./test-fixtures/symlinks-base/",
input: "./baz",
expected: []string{
"base",
},
},
{
name: "should follow an absolute link with extra parents",
root: "./test-fixtures/symlinks-base/",
input: "./sub/link",
expected: []string{
"sub/item",
},
},
{
name: "should follow chained pivoted link",
root: "./test-fixtures/symlinks-base/",
input: "./chain",
expected: []string{
"base",
},
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
resolver, err := newDirectoryResolver(c.root, c.root)
assert.NoError(t, err)
refs, err := resolver.FilesByPath(c.input)
require.NoError(t, err)
assert.Len(t, refs, len(c.expected))
s := strset.New()
for _, actual := range refs {
s.Add(actual.RealPath)
}
assert.ElementsMatch(t, c.expected, s.List())
})
}
}

View File

@ -6,5 +6,6 @@ type Metadata struct {
Scheme Scheme // the source data scheme type (directory or image) Scheme Scheme // the source data scheme type (directory or image)
ImageMetadata ImageMetadata // all image info (image only) ImageMetadata ImageMetadata // all image info (image only)
Path string // the root path to be cataloged (directory only) Path string // the root path to be cataloged (directory only)
Base string // the base path to be cataloged (directory only)
Name string Name string
} }

View File

@ -32,6 +32,7 @@ type Source struct {
Metadata Metadata Metadata Metadata
directoryResolver *directoryResolver `hash:"ignore"` directoryResolver *directoryResolver `hash:"ignore"`
path string path string
base string
mutex *sync.Mutex mutex *sync.Mutex
Exclusions []string `hash:"ignore"` Exclusions []string `hash:"ignore"`
} }
@ -252,6 +253,11 @@ func NewFromDirectory(path string) (Source, error) {
return NewFromDirectoryWithName(path, "") return NewFromDirectoryWithName(path, "")
} }
// NewFromDirectory creates a new source object tailored to catalog a given filesystem directory recursively.
func NewFromDirectoryRoot(path string) (Source, error) {
return NewFromDirectoryRootWithName(path, "")
}
// NewFromDirectoryWithName creates a new source object tailored to catalog a given filesystem directory recursively, with an explicitly provided name. // NewFromDirectoryWithName creates a new source object tailored to catalog a given filesystem directory recursively, with an explicitly provided name.
func NewFromDirectoryWithName(path string, name string) (Source, error) { func NewFromDirectoryWithName(path string, name string) (Source, error) {
s := Source{ s := Source{
@ -267,6 +273,23 @@ func NewFromDirectoryWithName(path string, name string) (Source, error) {
return s, nil return s, nil
} }
// NewFromDirectoryRootWithName creates a new source object tailored to catalog a given filesystem directory recursively, with an explicitly provided name.
func NewFromDirectoryRootWithName(path string, name string) (Source, error) {
s := Source{
mutex: &sync.Mutex{},
Metadata: Metadata{
Name: name,
Scheme: DirectoryScheme,
Path: path,
Base: path,
},
path: path,
base: path,
}
s.SetID()
return s, nil
}
// NewFromFile creates a new source object tailored to catalog a file. // NewFromFile creates a new source object tailored to catalog a file.
func NewFromFile(path string) (Source, func()) { func NewFromFile(path string) (Source, func()) {
return NewFromFileWithName(path, "") return NewFromFileWithName(path, "")
@ -428,7 +451,7 @@ func (s *Source) FileResolver(scope Scope) (FileResolver, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
resolver, err := newDirectoryResolver(s.path, exclusionFunctions...) resolver, err := newDirectoryResolver(s.path, s.base, exclusionFunctions...)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to create directory resolver: %w", err) return nil, fmt.Errorf("unable to create directory resolver: %w", err)
} }

View File

@ -121,7 +121,7 @@ func TestSetID(t *testing.T) {
Path: "test-fixtures/image-simple", Path: "test-fixtures/image-simple",
}, },
}, },
expected: artifact.ID("14b60020c4f9955"), expected: artifact.ID("1b0dc351e6577b01"),
}, },
} }

View File

@ -0,0 +1 @@
../../base

View File

@ -0,0 +1 @@
/../../base

View File

@ -0,0 +1 @@
/foo

View File

@ -0,0 +1 @@
/base

View File

@ -0,0 +1 @@
../sub/item