diff --git a/syft/file/classification_cataloger_test.go b/syft/file/classification_cataloger_test.go index f8fd60336..365dafd0f 100644 --- a/syft/file/classification_cataloger_test.go +++ b/syft/file/classification_cataloger_test.go @@ -18,7 +18,7 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) { { name: "positive-libpython3.7.so", fixtureDir: "test-fixtures/classifiers/positive", - location: "test-fixtures/classifiers/positive/libpython3.7.so", + location: "libpython3.7.so", expected: []Classification{ { Class: "python-binary", @@ -32,7 +32,7 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) { { name: "positive-python3.6", fixtureDir: "test-fixtures/classifiers/positive", - location: "test-fixtures/classifiers/positive/python3.6", + location: "python3.6", expected: []Classification{ { Class: "python-binary", @@ -46,7 +46,7 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) { { name: "positive-patchlevel.h", fixtureDir: "test-fixtures/classifiers/positive", - location: "test-fixtures/classifiers/positive/patchlevel.h", + location: "patchlevel.h", expected: []Classification{ { Class: "cpython-source", @@ -60,7 +60,7 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) { { name: "positive-go", fixtureDir: "test-fixtures/classifiers/positive", - location: "test-fixtures/classifiers/positive/go", + location: "go", expected: []Classification{ { Class: "go-binary", @@ -74,7 +74,7 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) { { name: "positive-go-hint", fixtureDir: "test-fixtures/classifiers/positive", - location: "test-fixtures/classifiers/positive/VERSION", + location: "VERSION", expected: []Classification{ { Class: "go-binary-hint", @@ -88,7 +88,7 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) { { name: "positive-busybox", fixtureDir: "test-fixtures/classifiers/positive", - location: "test-fixtures/classifiers/positive/busybox", + location: "busybox", expected: []Classification{ { Class: "busybox-binary", diff --git a/syft/pkg/catalog.go b/syft/pkg/catalog.go index ce3d4c7ba..d0008990a 100644 --- a/syft/pkg/catalog.go +++ b/syft/pkg/catalog.go @@ -103,6 +103,10 @@ func (c *Catalog) Enumerate(types ...Type) <-chan Package { channel := make(chan Package) go func() { defer close(channel) + if c == nil { + // we should allow enumerating from a catalog that was never created (which will result in no packages enumerated) + return + } for ty, ids := range c.idsByType { if len(types) != 0 { found := false diff --git a/syft/pkg/catalog_test.go b/syft/pkg/catalog_test.go index a0ea66818..5700bf813 100644 --- a/syft/pkg/catalog_test.go +++ b/syft/pkg/catalog_test.go @@ -3,6 +3,8 @@ package pkg import ( "testing" + "github.com/stretchr/testify/assert" + "github.com/scylladb/go-set/strset" "github.com/anchore/syft/syft/source" @@ -144,3 +146,8 @@ func TestCatalog_PathIndexDeduplicatesRealVsVirtualPaths(t *testing.T) { } } + +func TestCatalog_EnumerateNilCatalog(t *testing.T) { + var c *Catalog + assert.Empty(t, c.Enumerate()) +} diff --git a/syft/pkg/cataloger/catalog.go b/syft/pkg/cataloger/catalog.go index fb250886a..5093fa965 100644 --- a/syft/pkg/cataloger/catalog.go +++ b/syft/pkg/cataloger/catalog.go @@ -51,6 +51,7 @@ func Catalog(resolver source.FileResolver, theDistro *distro.Distro, catalogers var errs error for _, theCataloger := range catalogers { // find packages from the underlying raw data + log.Debugf("cataloging with %q", theCataloger.Name()) packages, relationships, err := theCataloger.Catalog(resolver) if err != nil { errs = multierror.Append(errs, err) @@ -59,7 +60,7 @@ func Catalog(resolver source.FileResolver, theDistro *distro.Distro, catalogers catalogedPackages := len(packages) - log.Debugf("package cataloger %q discovered %d packages", theCataloger.Name(), catalogedPackages) + log.Debugf("discovered %d packages", catalogedPackages) packagesDiscovered.N += int64(catalogedPackages) for _, p := range packages { diff --git a/syft/pkg/cataloger/javascript/parse_package_json.go b/syft/pkg/cataloger/javascript/parse_package_json.go index ad570a740..eb3fdd3dc 100644 --- a/syft/pkg/cataloger/javascript/parse_package_json.go +++ b/syft/pkg/cataloger/javascript/parse_package_json.go @@ -208,7 +208,14 @@ func (p PackageJSON) hasNameAndVersionValues() bool { return p.Name != "" && p.Version != "" } -func pathContainsNodeModulesDirectory(path string) bool { - isNodeModulesPath, _ := regexp.MatchString("[\\/]node_modules[\\/]", path) - return isNodeModulesPath +// this supports both windows and unix paths +var filepathSeparator = regexp.MustCompile(`[\\/]`) + +func pathContainsNodeModulesDirectory(p string) bool { + for _, subPath := range filepathSeparator.Split(p, -1) { + if subPath == "node_modules" { + return true + } + } + return false } diff --git a/syft/pkg/cataloger/javascript/parse_package_json_test.go b/syft/pkg/cataloger/javascript/parse_package_json_test.go index cce594cb1..c162e97ed 100644 --- a/syft/pkg/cataloger/javascript/parse_package_json_test.go +++ b/syft/pkg/cataloger/javascript/parse_package_json_test.go @@ -6,6 +6,7 @@ import ( "github.com/anchore/syft/syft/pkg" "github.com/go-test/deep" + "github.com/stretchr/testify/assert" ) func TestParsePackageJSON(t *testing.T) { @@ -160,3 +161,50 @@ func TestParsePackageJSON_Partial(t *testing.T) { // see https://github.com/anch t.Errorf("no packages should've been returned (but got %d packages)", actualCount) } } + +func Test_pathContainsNodeModulesDirectory(t *testing.T) { + tests := []struct { + path string + expected bool + }{ + // positive + { + path: "something/node_modules/package", + expected: true, + }, + { + path: "node_modules/package", + expected: true, + }, + { + path: "something/node_modules", + expected: true, + }, + { + path: "\\something\\node_modules\\", + expected: true, + }, + { + path: "\\something\\node_modules", + expected: true, + }, + // negative + { + path: "something/node_bogus_modules", + expected: false, + }, + { + path: "something/node_modules_bogus", + expected: false, + }, + { + path: "something/node_bogus_modules/package", + expected: false, + }, + } + for _, test := range tests { + t.Run(test.path, func(t *testing.T) { + assert.Equal(t, test.expected, pathContainsNodeModulesDirectory(test.path)) + }) + } +} diff --git a/syft/source/directory_resolver.go b/syft/source/directory_resolver.go index 083a54862..e0a9608c6 100644 --- a/syft/source/directory_resolver.go +++ b/syft/source/directory_resolver.go @@ -9,9 +9,10 @@ import ( "path/filepath" "strings" + "github.com/anchore/syft/internal" + "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/filetree" - "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/bus" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/event" @@ -21,23 +22,21 @@ import ( var unixSystemRuntimePrefixes = []string{ "/proc", - "/sys", "/dev", + "/sys", } -var ErrIgnoreIrregularFile = errors.New("ignoring irregular file type") - var _ FileResolver = (*directoryResolver)(nil) -type pathFilterFn func(string) bool +type pathFilterFn func(string, os.FileInfo) bool // directoryResolver implements path and content access for the directory data source. type directoryResolver struct { - path string - cwdRelativeToRoot string - cwd string - fileTree *filetree.FileTree - metadata map[file.ID]FileMetadata + path string + currentWdRelativeToRoot string + currentWd string + fileTree *filetree.FileTree + metadata map[file.ID]FileMetadata // TODO: wire up to report these paths in the json report pathFilterFns []pathFilterFn refsByMIMEType map[string][]file.Reference @@ -45,39 +44,41 @@ type directoryResolver struct { } func newDirectoryResolver(root string, pathFilters ...pathFilterFn) (*directoryResolver, error) { - cwd, err := os.Getwd() + currentWd, err := os.Getwd() if err != nil { return nil, fmt.Errorf("could not create directory resolver: %w", err) } - var cwdRelRoot string + var currentWdRelRoot string if path.IsAbs(root) { - cwdRelRoot, err = filepath.Rel(cwd, root) + currentWdRelRoot, err = filepath.Rel(currentWd, root) if err != nil { return nil, fmt.Errorf("could not create directory resolver: %w", err) } + } else { + currentWdRelRoot = filepath.Clean(root) } if pathFilters == nil { - pathFilters = []pathFilterFn{isUnixSystemRuntimePath} + pathFilters = []pathFilterFn{isUnallowableFileType, isUnixSystemRuntimePath} } resolver := directoryResolver{ - path: root, - cwd: cwd, - cwdRelativeToRoot: cwdRelRoot, - fileTree: filetree.NewFileTree(), - metadata: make(map[file.ID]FileMetadata), - pathFilterFns: pathFilters, - refsByMIMEType: make(map[string][]file.Reference), - errPaths: make(map[string]error), + path: root, + currentWd: currentWd, + currentWdRelativeToRoot: currentWdRelRoot, + fileTree: filetree.NewFileTree(), + metadata: make(map[file.ID]FileMetadata), + pathFilterFns: pathFilters, + refsByMIMEType: make(map[string][]file.Reference), + errPaths: make(map[string]error), } return &resolver, indexAllRoots(root, resolver.indexTree) } func (r *directoryResolver) indexTree(root string, stager *progress.Stage) ([]string, error) { - log.Infof("indexing filesystem path=%q", root) + log.Debugf("indexing filesystem path=%q", root) var roots []string var err error @@ -93,11 +94,8 @@ func (r *directoryResolver) indexTree(root string, stager *progress.Stage) ([]st // but continue forth with index regardless if the given root path exists or not. fi, err := os.Stat(root) if err != nil && fi != nil && !fi.IsDir() { - newRoot, err := r.addPathToIndex(root, fi) - if err = r.handleFileAccessErr(root, err); err != nil { - return nil, fmt.Errorf("unable to index path: %w", err) - } - + // note: we want to index the path regardless of an error stat-ing the path + newRoot := r.indexPath(root, fi, nil) if newRoot != "" { roots = append(roots, newRoot) } @@ -108,102 +106,133 @@ func (r *directoryResolver) indexTree(root string, stager *progress.Stage) ([]st func(path string, info os.FileInfo, err error) error { stager.Current = path - newRoot, indexErr := r.indexPath(path, info, err) + newRoot := r.indexPath(path, info, err) if newRoot != "" { roots = append(roots, newRoot) } - return indexErr + return nil }) } -func (r *directoryResolver) indexPath(path string, info os.FileInfo, err error) (string, error) { +func (r *directoryResolver) indexPath(path string, info os.FileInfo, err error) string { // ignore any path which a filter function returns true for _, filterFn := range r.pathFilterFns { - if filterFn(path) { - return "", nil + if filterFn(path, info) { + return "" } } - if err = r.handleFileAccessErr(path, err); err != nil { - return "", err + if r.isFileAccessErr(path, err) { + return "" } // link cycles could cause a revisit --we should not allow this if r.fileTree.HasPath(file.Path(path)) { - return "", nil + return "" } if info == nil { // walk may not be able to provide a FileInfo object, don't allow for this to stop indexing; keep track of the paths and continue. r.errPaths[path] = fmt.Errorf("no file info observable at path=%q", path) - return "", nil + return "" } newRoot, err := r.addPathToIndex(path, info) - if err = r.handleFileAccessErr(path, err); err != nil { - return "", fmt.Errorf("unable to index path: %w", err) + if r.isFileAccessErr(path, err) { + return "" } - return newRoot, nil + return newRoot } -func (r *directoryResolver) handleFileAccessErr(path string, err error) error { - if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrNotExist) || errors.Is(err, ErrIgnoreIrregularFile) { - // don't allow for permission errors to stop indexing, keep track of the paths and continue. +func (r *directoryResolver) isFileAccessErr(path string, err error) bool { + // don't allow for errors to stop indexing, keep track of the paths and continue. + if err != nil { log.Warnf("unable to access path=%q: %+v", path, err) r.errPaths[path] = err - return nil - } else if err != nil { - return fmt.Errorf("unable to access path=%q: %w", path, err) + return true } - return nil + return false } func (r directoryResolver) addPathToIndex(p string, info os.FileInfo) (string, error) { - var ref *file.Reference - var err error - var newRoot string - - switch newFileTypeFromMode(info.Mode()) { + switch t := newFileTypeFromMode(info.Mode()); t { case SymbolicLink: - linkTarget, err := os.Readlink(p) - if err != nil { - return "", fmt.Errorf("unable to readlink for path=%q: %w", p, err) - } - ref, err = r.fileTree.AddSymLink(file.Path(p), file.Path(linkTarget)) - if err != nil { - return "", err - } - - targetAbsPath := linkTarget - if !filepath.IsAbs(targetAbsPath) { - targetAbsPath = filepath.Clean(filepath.Join(path.Dir(p), linkTarget)) - } - - newRoot = targetAbsPath - + return r.addSymlinkToIndex(p, info) case Directory: - ref, err = r.fileTree.AddDir(file.Path(p)) - if err != nil { - return "", err - } - case IrregularFile: - return "", ErrIgnoreIrregularFile + return "", r.addDirectoryToIndex(p, info) + case RegularFile: + return "", r.addFileToIndex(p, info) default: - ref, err = r.fileTree.AddFile(file.Path(p)) - if err != nil { - return "", err - } + return "", fmt.Errorf("unsupported file type: %s", t) + } +} + +func (r directoryResolver) addDirectoryToIndex(p string, info os.FileInfo) error { + ref, err := r.fileTree.AddDir(file.Path(p)) + if err != nil { + return err } - metadata := fileMetadataFromPath(p, info) + location := NewLocationFromDirectory(p, *ref) + metadata := fileMetadataFromPath(p, info, r.isInIndex(location)) + r.addFileMetadataToIndex(ref, metadata) + + return nil +} + +func (r directoryResolver) addFileToIndex(p string, info os.FileInfo) error { + ref, err := r.fileTree.AddFile(file.Path(p)) + if err != nil { + return err + } + + location := NewLocationFromDirectory(p, *ref) + metadata := fileMetadataFromPath(p, info, r.isInIndex(location)) + r.addFileMetadataToIndex(ref, metadata) + + return nil +} + +func (r directoryResolver) addSymlinkToIndex(p string, info os.FileInfo) (string, error) { + var usedInfo = info + + linkTarget, err := os.Readlink(p) + if err != nil { + return "", fmt.Errorf("unable to readlink for path=%q: %w", p, err) + } + + // note: if the link is not absolute (e.g, /dev/stderr -> fd/2 ) we need to resolve it relative to the directory + // in question (e.g. resolve to /dev/fd/2) + if !filepath.IsAbs(linkTarget) { + linkTarget = filepath.Join(filepath.Dir(p), linkTarget) + } + + ref, err := r.fileTree.AddSymLink(file.Path(p), file.Path(linkTarget)) + if err != nil { + return "", err + } + + targetAbsPath := linkTarget + if !filepath.IsAbs(targetAbsPath) { + targetAbsPath = filepath.Clean(filepath.Join(path.Dir(p), linkTarget)) + } + + location := NewLocationFromDirectory(p, *ref) + metadata := fileMetadataFromPath(p, usedInfo, r.isInIndex(location)) + r.addFileMetadataToIndex(ref, metadata) + + return targetAbsPath, nil +} + +func (r directoryResolver) addFileMetadataToIndex(ref *file.Reference, metadata FileMetadata) { if ref != nil { - r.refsByMIMEType[metadata.MIMEType] = append(r.refsByMIMEType[metadata.MIMEType], *ref) + if metadata.MIMEType != "" { + r.refsByMIMEType[metadata.MIMEType] = append(r.refsByMIMEType[metadata.MIMEType], *ref) + } + r.metadata[ref.ID()] = metadata } - - r.metadata[ref.ID()] = metadata - return newRoot, nil } func (r directoryResolver) requestPath(userPath string) (string, error) { @@ -212,7 +241,7 @@ func (r directoryResolver) requestPath(userPath string) (string, error) { userPath = path.Join(r.path, userPath) } else { // ensure we take into account any relative difference between the root path and the CWD for relative requests - userPath = path.Join(r.cwdRelativeToRoot, userPath) + userPath = path.Join(r.currentWdRelativeToRoot, userPath) } var err error @@ -227,7 +256,7 @@ func (r directoryResolver) responsePath(path string) string { // always return references relative to the request path (not absolute path) if filepath.IsAbs(path) { // we need to account for the cwd relative to the running process and the given root for the directory resolver - prefix := filepath.Clean(filepath.Join(r.cwd, r.cwdRelativeToRoot)) + prefix := filepath.Clean(filepath.Join(r.currentWd, r.currentWdRelativeToRoot)) return strings.TrimPrefix(path, prefix+string(filepath.Separator)) } return path @@ -283,8 +312,6 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error) exists, ref, err := r.fileTree.File(file.Path(userStrPath)) if err == nil && exists { references = append(references, NewLocationFromDirectory(r.responsePath(userStrPath), *ref)) - } else { - log.Warnf("path (%s) not found in file tree: Exists: %t Err:%+v", userStrPath, exists, err) } } @@ -329,9 +356,22 @@ func (r directoryResolver) FileContentsByLocation(location Location) (io.ReadClo if location.ref.RealPath == "" { return nil, errors.New("empty path given") } + if !r.isInIndex(location) { + // this is in cases where paths have been explicitly excluded from the tree index. In which case + // we should DENY all content requests. Why? These paths have been indicated to be inaccessible (either + // by preference or these files are not readable by the current user). + return nil, fmt.Errorf("file content is inaccessible path=%q", location.ref.RealPath) + } return file.NewLazyReadCloser(string(location.ref.RealPath)), nil } +func (r directoryResolver) isInIndex(location Location) bool { + if location.ref.RealPath == "" { + return false + } + return r.fileTree.HasPath(location.ref.RealPath, filetree.FollowBasenameLinks) +} + func (r *directoryResolver) AllLocations() <-chan Location { results := make(chan Location) go func() { @@ -364,10 +404,25 @@ func (r *directoryResolver) FilesByMIMEType(types ...string) ([]Location, error) return locations, nil } -func isUnixSystemRuntimePath(path string) bool { +func isUnixSystemRuntimePath(path string, _ os.FileInfo) bool { return internal.HasAnyOfPrefixes(path, unixSystemRuntimePrefixes...) } +func isUnallowableFileType(_ string, info os.FileInfo) bool { + if info == nil { + // we can't filter out by filetype for non-existent files + return false + } + switch newFileTypeFromMode(info.Mode()) { + case CharacterDevice, Socket, BlockDevice, FIFONode, IrregularFile: + return true + // note: symlinks that point to these files may still get by. We handle this later in processing to help prevent + // against infinite links traversal. + } + + return false +} + func indexAllRoots(root string, indexer func(string, *progress.Stage) ([]string, error)) error { // why account for multiple roots? To cover cases when there is a symlink that references above the root path, // in which case we need to additionally index where the link resolves to. it's for this reason why the filetree diff --git a/syft/source/directory_resolver_test.go b/syft/source/directory_resolver_test.go index 6951fdf5e..d820574b2 100644 --- a/syft/source/directory_resolver_test.go +++ b/syft/source/directory_resolver_test.go @@ -1,14 +1,15 @@ package source import ( + "io/fs" "io/ioutil" "os" "path" "path/filepath" "reflect" "strings" - "syscall" "testing" + "time" "github.com/stretchr/testify/require" @@ -19,6 +20,55 @@ import ( "github.com/wagoodman/go-progress" ) +func TestDirectoryResolver_FilesByPath_relativeRoot(t *testing.T) { + cases := []struct { + name string + relativeRoot string + input string + expected []string + }{ + { + name: "should find a file from an absolute input", + relativeRoot: "./test-fixtures/", + input: "/image-symlinks/file-1.txt", + expected: []string{ + "image-symlinks/file-1.txt", + }, + }, + { + name: "should find a file from a relative path", + relativeRoot: "./test-fixtures/", + input: "image-symlinks/file-1.txt", + expected: []string{ + "image-symlinks/file-1.txt", + }, + }, + { + name: "should find a file from a relative path (root above cwd)", + relativeRoot: "../", + input: "sbom/sbom.go", + expected: []string{ + "sbom/sbom.go", + }, + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + resolver, err := newDirectoryResolver(c.relativeRoot) + assert.NoError(t, err) + + refs, err := resolver.FilesByPath(c.input) + require.NoError(t, err) + assert.Len(t, refs, len(c.expected)) + s := strset.New() + for _, actual := range refs { + s.Add(actual.RealPath) + } + assert.ElementsMatch(t, c.expected, s.List()) + }) + } +} + func TestDirectoryResolver_FilesByPath_absoluteRoot(t *testing.T) { cases := []struct { name string @@ -85,23 +135,16 @@ func TestDirectoryResolver_FilesByPath(t *testing.T) { { name: "finds a file (relative)", root: "./test-fixtures/", - input: "test-fixtures/image-symlinks/file-1.txt", - expected: "test-fixtures/image-symlinks/file-1.txt", + input: "image-symlinks/file-1.txt", + expected: "image-symlinks/file-1.txt", refCount: 1, }, { name: "finds a file with relative indirection", root: "./test-fixtures/../test-fixtures", - input: "test-fixtures/image-symlinks/file-1.txt", - expected: "test-fixtures/image-symlinks/file-1.txt", - refCount: 1, - }, - { - // note: this is asserting the old behavior is not supported - name: "relative lookup with wrong path fails", - root: "./test-fixtures/", input: "image-symlinks/file-1.txt", - refCount: 0, + expected: "image-symlinks/file-1.txt", + refCount: 1, }, { name: "managed non-existing files (relative)", @@ -113,7 +156,7 @@ func TestDirectoryResolver_FilesByPath(t *testing.T) { name: "finds a file (absolute)", root: "./test-fixtures/", input: "/image-symlinks/file-1.txt", - expected: "test-fixtures/image-symlinks/file-1.txt", + expected: "image-symlinks/file-1.txt", refCount: 1, }, { @@ -158,17 +201,17 @@ func TestDirectoryResolver_MultipleFilesByPath(t *testing.T) { }{ { name: "finds multiple files", - input: []string{"test-fixtures/image-symlinks/file-1.txt", "test-fixtures/image-symlinks/file-2.txt"}, + input: []string{"image-symlinks/file-1.txt", "image-symlinks/file-2.txt"}, refCount: 2, }, { name: "skips non-existing files", - input: []string{"test-fixtures/image-symlinks/bogus.txt", "test-fixtures/image-symlinks/file-1.txt"}, + input: []string{"image-symlinks/bogus.txt", "image-symlinks/file-1.txt"}, refCount: 1, }, { name: "does not return anything for non-existing directories", - input: []string{"test-fixtures/non-existing/bogus.txt", "test-fixtures/non-existing/file-1.txt"}, + input: []string{"non-existing/bogus.txt", "non-existing/file-1.txt"}, refCount: 0, }, } @@ -210,7 +253,7 @@ func TestDirectoryResolver_FilesByGlobSingle(t *testing.T) { assert.NoError(t, err) assert.Len(t, refs, 1) - assert.Equal(t, "test-fixtures/image-symlinks/file-1.txt", refs[0].RealPath) + assert.Equal(t, "image-symlinks/file-1.txt", refs[0].RealPath) } func TestDirectoryResolverDoesNotIgnoreRelativeSystemPaths(t *testing.T) { @@ -218,95 +261,128 @@ func TestDirectoryResolverDoesNotIgnoreRelativeSystemPaths(t *testing.T) { resolver, err := newDirectoryResolver("test-fixtures/system_paths/target") assert.NoError(t, err) // ensure the correct filter function is wired up by default - expectedFn := reflect.ValueOf(isUnixSystemRuntimePath) + expectedFn := reflect.ValueOf(isUnallowableFileType) actualFn := reflect.ValueOf(resolver.pathFilterFns[0]) assert.Equal(t, expectedFn.Pointer(), actualFn.Pointer()) // all paths should be found (non filtering matches a path) - refs, err := resolver.FilesByGlob("**/place") + locations, err := resolver.FilesByGlob("**/place") assert.NoError(t, err) // 4: within target/ // 1: target/link --> relative path to "place" // 1: outside_root/link_target/place - assert.Len(t, refs, 6) + assert.Len(t, locations, 6) // ensure that symlink indexing outside of root worked + testLocation := "test-fixtures/system_paths/outside_root/link_target/place" ok := false - test_location := "test-fixtures/system_paths/outside_root/link_target/place" - for _, actual_loc := range refs { - if test_location == actual_loc.RealPath { + for _, location := range locations { + if strings.HasSuffix(location.RealPath, testLocation) { ok = true } } if !ok { - t.Fatalf("could not find test location=%q", test_location) + t.Fatalf("could not find test location=%q", testLocation) } } -func TestDirectoryResolverUsesPathFilterFunction(t *testing.T) { - // let's make certain that the index honors the filter function - filter := func(s string) bool { - // a dummy function that works for testing purposes - return strings.Contains(s, "dev/place") || strings.Contains(s, "proc/place") || strings.Contains(s, "sys/place") - } +var _ fs.FileInfo = (*testFileInfo)(nil) - resolver, err := newDirectoryResolver("test-fixtures/system_paths/target", filter) - assert.NoError(t, err) - - // ensure the correct filter function is wired up by default - expectedFn := reflect.ValueOf(filter) - actualFn := reflect.ValueOf(resolver.pathFilterFns[0]) - assert.Equal(t, expectedFn.Pointer(), actualFn.Pointer()) - assert.Len(t, resolver.pathFilterFns, 1) - - refs, err := resolver.FilesByGlob("**/place") - assert.NoError(t, err) - // target/home/place + target/link/.../place + outside_root/.../place - assert.Len(t, refs, 3) +type testFileInfo struct { + mode os.FileMode } -func Test_isUnixSystemRuntimePath(t *testing.T) { +func (t testFileInfo) Name() string { + panic("implement me") +} + +func (t testFileInfo) Size() int64 { + panic("implement me") +} + +func (t testFileInfo) Mode() fs.FileMode { + return t.mode +} + +func (t testFileInfo) ModTime() time.Time { + panic("implement me") +} + +func (t testFileInfo) IsDir() bool { + panic("implement me") +} + +func (t testFileInfo) Sys() interface{} { + panic("implement me") +} + +func Test_isUnallowableFileType(t *testing.T) { tests := []struct { - path string + name string + info os.FileInfo expected bool }{ { - path: "proc/place", + name: "regular file", + info: testFileInfo{ + mode: 0, + }, expected: false, }, { - path: "/proc/place", - expected: true, - }, - { - path: "/proc", - expected: true, - }, - { - path: "/pro/c", + name: "dir", + info: testFileInfo{ + mode: os.ModeDir, + }, expected: false, }, { - path: "/pro", + name: "symlink", + info: testFileInfo{ + mode: os.ModeSymlink, + }, expected: false, }, { - path: "/dev", + name: "socket", + info: testFileInfo{ + mode: os.ModeSocket, + }, expected: true, }, { - path: "/sys", + name: "named pipe", + info: testFileInfo{ + mode: os.ModeNamedPipe, + }, expected: true, }, { - path: "/something/sys", - expected: false, + name: "char device", + info: testFileInfo{ + mode: os.ModeCharDevice, + }, + expected: true, + }, + { + name: "block device", + info: testFileInfo{ + mode: os.ModeDevice, + }, + expected: true, + }, + { + name: "irregular", + info: testFileInfo{ + mode: os.ModeIrregular, + }, + expected: true, }, } for _, test := range tests { - t.Run(test.path, func(t *testing.T) { - assert.Equal(t, test.expected, isUnixSystemRuntimePath(test.path)) + t.Run(test.name, func(t *testing.T) { + assert.Equal(t, test.expected, isUnallowableFileType("dont/care", test.info)) }) } } @@ -345,9 +421,7 @@ func Test_directoryResolver_index(t *testing.T) { // note: the index uses absolute paths, so assertions MUST keep this in mind cwd, err := os.Getwd() - if err != nil { - t.Fatalf("could not get working dir: %+v", err) - } + require.NoError(t, err) p := file.Path(path.Join(cwd, test.path)) assert.Equal(t, true, r.fileTree.HasPath(p)) @@ -365,32 +439,27 @@ func Test_handleFileAccessErr(t *testing.T) { tests := []struct { name string input error - expectedErr error expectedPathTracked bool }{ { name: "permission error does not propagate", input: os.ErrPermission, expectedPathTracked: true, - expectedErr: nil, }, { name: "file does not exist error does not propagate", input: os.ErrNotExist, expectedPathTracked: true, - expectedErr: nil, }, { - name: "non-permission errors propagate", + name: "non-permission errors are tracked", input: os.ErrInvalid, - expectedPathTracked: false, - expectedErr: os.ErrInvalid, + expectedPathTracked: true, }, { name: "non-errors ignored", input: nil, expectedPathTracked: false, - expectedErr: nil, }, } @@ -400,7 +469,7 @@ func Test_handleFileAccessErr(t *testing.T) { errPaths: make(map[string]error), } p := "a/place" - assert.ErrorIs(t, r.handleFileAccessErr(p, test.input), test.expectedErr) + assert.Equal(t, r.isFileAccessErr(p, test.input), test.expectedPathTracked) _, exists := r.errPaths[p] assert.Equal(t, test.expectedPathTracked, exists) }) @@ -493,7 +562,7 @@ func Test_directoryResolver_FilesByMIMEType(t *testing.T) { { fixturePath: "./test-fixtures/image-simple", mimeType: "text/plain", - expectedPaths: strset.New("test-fixtures/image-simple/file-1.txt", "test-fixtures/image-simple/file-2.txt", "test-fixtures/image-simple/target/really/nested/file-3.txt", "test-fixtures/image-simple/Dockerfile"), + expectedPaths: strset.New("file-1.txt", "file-2.txt", "target/really/nested/file-3.txt", "Dockerfile"), }, } for _, test := range tests { @@ -511,33 +580,81 @@ func Test_directoryResolver_FilesByMIMEType(t *testing.T) { } } -func Test_ignoreIrregularFiles(t *testing.T) { - // NOTE: craeting a pipe/fifo file on demand since git doesn't let me - // commit one. The directory resolver should ignore it (and any other - // file that is neither regular, a symlink or directory) when indexing a directory - dir := "./test-fixtures/irregular-files" - f := "f.fifo" - - err := syscall.Mknod(filepath.Join(dir, f), syscall.S_IFIFO|0666, 0) - assert.NoError(t, err) - defer func() { - err := os.Remove(filepath.Join(dir, f)) - assert.NoError(t, err) - }() - - fileRefs, err := ioutil.ReadDir(dir) - assert.NoError(t, err) - assert.Len(t, fileRefs, 2) // two files (f.fifo and readme) - - resolver, err := newDirectoryResolver(dir) +func Test_IndexingNestedSymLinks(t *testing.T) { + resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple") assert.NoError(t, err) - assert.Len(t, resolver.fileTree.AllFiles(), 1) - rp := resolver.fileTree.AllFiles()[0].RealPath - assert.True(t, strings.Contains(string(rp), filepath.Join(dir, "readme"))) + // check that we can get the real path + locations, err := resolver.FilesByPath("./readme") + require.NoError(t, err) + assert.Len(t, locations, 1) + + // check that we can access the same file via 1 symlink + locations, err = resolver.FilesByPath("./link_to_new_readme") + require.NoError(t, err) + assert.Len(t, locations, 1) + + // check that we can access the same file via 2 symlinks + locations, err = resolver.FilesByPath("./link_to_link_to_new_readme") + require.NoError(t, err) + assert.Len(t, locations, 1) +} + +func Test_IndexingNestedSymLinks_ignoredIndexes(t *testing.T) { + filterFn := func(path string, _ os.FileInfo) bool { + return strings.HasSuffix(path, string(filepath.Separator)+"readme") + } + + resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple", filterFn) + assert.NoError(t, err) + + var testingLocations []Location + + // the path to the real file is PRUNED from the index, so we should NOT expect a location returned + locations, err := resolver.FilesByPath("./readme") + require.NoError(t, err) + assert.Empty(t, locations) + + // check that we can access the same file via 1 symlink + locations, err = resolver.FilesByPath("./link_to_new_readme") + require.NoError(t, err) + assert.Len(t, locations, 1) + testingLocations = append(testingLocations, locations...) + + // check that we can access the same file via 2 symlinks + locations, err = resolver.FilesByPath("./link_to_link_to_new_readme") + require.NoError(t, err) + assert.Len(t, locations, 1) + testingLocations = append(testingLocations, locations...) + + // check that we CANNOT get contents from any of the link locations + for _, location := range testingLocations { + contentReader, err := resolver.FileContentsByLocation(location) + assert.Errorf(t, err, "expected an error for getting content from a location not in the index") + assert.Nil(t, contentReader) + } +} + +func Test_IndexingNestedSymLinksOutsideOfRoot(t *testing.T) { + resolver, err := newDirectoryResolver("./test-fixtures/symlinks-roots/root") + assert.NoError(t, err) + + // check that we can get the real path + locations, err := resolver.FilesByPath("./readme") + require.NoError(t, err) + assert.Len(t, locations, 1) + + // check that we can access the same file via 2 symlinks (link_to_link_to_readme -> link_to_readme -> readme) + locations, err = resolver.FilesByPath("./link_to_link_to_readme") + require.NoError(t, err) + assert.Len(t, locations, 1) + } func Test_directoryResolver_FileContentsByLocation(t *testing.T) { + cwd, err := os.Getwd() + require.NoError(t, err) + tests := []struct { name string location Location @@ -547,7 +664,7 @@ func Test_directoryResolver_FileContentsByLocation(t *testing.T) { { name: "use file reference for content requests", location: NewLocationFromDirectory("some/place", file.Reference{ - RealPath: "test-fixtures/image-simple/file-1.txt", + RealPath: file.Path(filepath.Join(cwd, "test-fixtures/image-simple/file-1.txt")), }), expects: "this file has contents", }, @@ -578,3 +695,79 @@ func Test_directoryResolver_FileContentsByLocation(t *testing.T) { }) } } + +func Test_isUnixSystemRuntimePath(t *testing.T) { + tests := []struct { + path string + expected bool + }{ + { + path: "proc/place", + expected: false, + }, + { + path: "/proc/place", + expected: true, + }, + { + path: "/proc", + expected: true, + }, + { + path: "/pro/c", + expected: false, + }, + { + path: "/pro", + expected: false, + }, + { + path: "/dev", + expected: true, + }, + { + path: "/sys", + expected: true, + }, + { + path: "/something/sys", + expected: false, + }, + } + for _, test := range tests { + t.Run(test.path, func(t *testing.T) { + assert.Equal(t, test.expected, isUnixSystemRuntimePath(test.path, nil)) + }) + } +} + +func Test_SymlinkLoopWithGlobsShouldResolve(t *testing.T) { + test := func(t *testing.T) { + resolver, err := newDirectoryResolver("./test-fixtures/symlinks-loop") + require.NoError(t, err) + + locations, err := resolver.FilesByGlob("**/file.target") + require.NoError(t, err) + // Note: I'm not certain that this behavior is correct, but it is not an infinite loop (which is the point of the test) + // - block/loop0/file.target + // - devices/loop0/file.target + // - devices/loop0/subsystem/loop0/file.target + assert.Len(t, locations, 3) + } + + testWithTimeout(t, 5*time.Second, test) +} + +func testWithTimeout(t *testing.T, timeout time.Duration, test func(*testing.T)) { + done := make(chan bool) + go func() { + test(t) + done <- true + }() + + select { + case <-time.After(timeout): + t.Fatal("test timed out") + case <-done: + } +} diff --git a/syft/source/file_metadata.go b/syft/source/file_metadata.go index 463c336bc..37c97d818 100644 --- a/syft/source/file_metadata.go +++ b/syft/source/file_metadata.go @@ -3,10 +3,10 @@ package source import ( "os" - "github.com/anchore/syft/internal/log" - "github.com/anchore/stereoscope/pkg/file" + "github.com/anchore/syft/internal/log" + "github.com/anchore/stereoscope/pkg/image" ) @@ -37,19 +37,24 @@ func fileMetadataByLocation(img *image.Image, location Location) (FileMetadata, }, nil } -func fileMetadataFromPath(path string, info os.FileInfo) FileMetadata { +func fileMetadataFromPath(path string, info os.FileInfo, withMIMEType bool) FileMetadata { + var mimeType string uid, gid := GetXid(info) - f, err := os.Open(path) - if err != nil { - // TODO: it may be that the file is inaccessible, however, this is not an error or a warning. In the future we need to track these as known-unknowns - f = nil - } else { - defer func() { - if err := f.Close(); err != nil { - log.Warnf("unable to close file while obtaining metadata: %s", path) - } - }() + if withMIMEType { + f, err := os.Open(path) + if err != nil { + // TODO: it may be that the file is inaccessible, however, this is not an error or a warning. In the future we need to track these as known-unknowns + f = nil + } else { + defer func() { + if err := f.Close(); err != nil { + log.Warnf("unable to close file while obtaining metadata: %s", path) + } + }() + } + + mimeType = file.MIMEType(f) } return FileMetadata{ @@ -58,6 +63,6 @@ func fileMetadataFromPath(path string, info os.FileInfo) FileMetadata { // unsupported across platforms UserID: uid, GroupID: gid, - MIMEType: file.MIMEType(f), + MIMEType: mimeType, } } diff --git a/syft/source/file_metadata_test.go b/syft/source/file_metadata_test.go new file mode 100644 index 000000000..db714d0f7 --- /dev/null +++ b/syft/source/file_metadata_test.go @@ -0,0 +1,54 @@ +package source + +import ( + "os" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func Test_fileMetadataFromPath(t *testing.T) { + + tests := []struct { + path string + withMIMEType bool + expectedType string + expectedMIMEType string + }{ + { + path: "test-fixtures/symlinks-simple/readme", + withMIMEType: true, + expectedType: "RegularFile", + expectedMIMEType: "text/plain", + }, + { + path: "test-fixtures/symlinks-simple/link_to_new_readme", + withMIMEType: true, + expectedType: "SymbolicLink", + expectedMIMEType: "text/plain", + }, + { + path: "test-fixtures/symlinks-simple/readme", + withMIMEType: false, + expectedType: "RegularFile", + expectedMIMEType: "", + }, + { + path: "test-fixtures/symlinks-simple/link_to_new_readme", + withMIMEType: false, + expectedType: "SymbolicLink", + expectedMIMEType: "", + }, + } + for _, test := range tests { + t.Run(test.path, func(t *testing.T) { + info, err := os.Lstat(test.path) + require.NoError(t, err) + + actual := fileMetadataFromPath(test.path, info, test.withMIMEType) + assert.Equal(t, test.expectedMIMEType, actual.MIMEType) + assert.Equal(t, test.expectedType, string(actual.Type)) + }) + } +} diff --git a/syft/source/file_type.go b/syft/source/file_type.go index 4e5268fe5..370ea0f8d 100644 --- a/syft/source/file_type.go +++ b/syft/source/file_type.go @@ -6,8 +6,7 @@ import ( ) const ( - UnknownFileType FileType = "UnknownFileType" - RegularFile FileType = "RegularFile" + RegularFile FileType = "RegularFile" // IrregularFile is how syft defines files that are neither regular, symbolic or directory. // For ref: the seven standard Unix file types are regular, directory, symbolic link, // FIFO special, block special, character special, and socket as defined by POSIX. @@ -18,6 +17,7 @@ const ( BlockDevice FileType = "BlockDevice" Directory FileType = "Directory" FIFONode FileType = "FIFONode" + Socket FileType = "Socket" ) type FileType string @@ -39,19 +39,32 @@ func newFileTypeFromTarHeaderTypeFlag(flag byte) FileType { case tar.TypeFifo: return FIFONode } - return UnknownFileType + return IrregularFile } -// TODO: fill in more types from mod... func newFileTypeFromMode(mode os.FileMode) FileType { switch { - case mode&os.ModeSymlink == os.ModeSymlink: + case isSet(mode, os.ModeSymlink): return SymbolicLink + case isSet(mode, os.ModeIrregular): + return IrregularFile + case isSet(mode, os.ModeCharDevice): + return CharacterDevice + case isSet(mode, os.ModeDevice): + return BlockDevice + case isSet(mode, os.ModeNamedPipe): + return FIFONode + case isSet(mode, os.ModeSocket): + return Socket case mode.IsDir(): return Directory - case !mode.IsRegular(): - return IrregularFile - default: + case mode.IsRegular(): return RegularFile + default: + return IrregularFile } } + +func isSet(mode, field os.FileMode) bool { + return mode&field != 0 +} diff --git a/syft/source/source_test.go b/syft/source/source_test.go index 1b3dbd4f9..53cef9435 100644 --- a/syft/source/source_test.go +++ b/syft/source/source_test.go @@ -55,19 +55,19 @@ func TestNewFromDirectory(t *testing.T) { { desc: "path detected", input: "test-fixtures", - inputPaths: []string{"test-fixtures/path-detected/.vimrc"}, + inputPaths: []string{"path-detected/.vimrc"}, expRefs: 1, }, { desc: "directory ignored", input: "test-fixtures", - inputPaths: []string{"test-fixtures/path-detected"}, + inputPaths: []string{"path-detected"}, expRefs: 0, }, { desc: "no files-by-path detected", input: "test-fixtures", - inputPaths: []string{"test-fixtures/no-path-detected"}, + inputPaths: []string{"no-path-detected"}, expRefs: 0, }, } @@ -184,21 +184,21 @@ func TestNewFromDirectoryShared(t *testing.T) { desc: "path detected", input: "test-fixtures", notExist: "foobar/", - inputPaths: []string{"test-fixtures/path-detected/.vimrc"}, + inputPaths: []string{"path-detected/.vimrc"}, expRefs: 1, }, { desc: "directory ignored", input: "test-fixtures", notExist: "foobar/", - inputPaths: []string{"test-fixtures/path-detected"}, + inputPaths: []string{"path-detected"}, expRefs: 0, }, { desc: "no files-by-path detected", input: "test-fixtures", notExist: "foobar/", - inputPaths: []string{"test-fixtures/no-path-detected"}, + inputPaths: []string{"no-path-detected"}, expRefs: 0, }, } @@ -306,7 +306,9 @@ func TestFilesByGlob(t *testing.T) { t.Errorf("could not get resolver error: %+v", err) } contents, err := resolver.FilesByGlob(test.glob) - + if err != nil { + t.Errorf("could not get files by glob: %s+v", err) + } if len(contents) != test.expected { t.Errorf("unexpected number of files found by glob (%s): %d != %d", test.glob, len(contents), test.expected) } diff --git a/syft/source/test-fixtures/symlinks-loop/README.md b/syft/source/test-fixtures/symlinks-loop/README.md new file mode 100644 index 000000000..71967bddd --- /dev/null +++ b/syft/source/test-fixtures/symlinks-loop/README.md @@ -0,0 +1 @@ +this mimics a partial layout on a linux system within /sys/devices/virtual to help with ensuring globbing for files does not end up in a infinite loop diff --git a/syft/source/test-fixtures/symlinks-loop/block/loop0 b/syft/source/test-fixtures/symlinks-loop/block/loop0 new file mode 120000 index 000000000..3e5e8ddf7 --- /dev/null +++ b/syft/source/test-fixtures/symlinks-loop/block/loop0 @@ -0,0 +1 @@ +../devices/loop0 \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-loop/devices/loop0/file.target b/syft/source/test-fixtures/symlinks-loop/devices/loop0/file.target new file mode 100644 index 000000000..cb3e22033 --- /dev/null +++ b/syft/source/test-fixtures/symlinks-loop/devices/loop0/file.target @@ -0,0 +1 @@ +contents! \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-loop/devices/loop0/subsystem b/syft/source/test-fixtures/symlinks-loop/devices/loop0/subsystem new file mode 120000 index 000000000..f33eb2721 --- /dev/null +++ b/syft/source/test-fixtures/symlinks-loop/devices/loop0/subsystem @@ -0,0 +1 @@ +../../block \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-roots/outside/link_to_readme b/syft/source/test-fixtures/symlinks-roots/outside/link_to_readme new file mode 120000 index 000000000..0dae899c0 --- /dev/null +++ b/syft/source/test-fixtures/symlinks-roots/outside/link_to_readme @@ -0,0 +1 @@ +../root/readme \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-roots/root/link_to_link_to_readme b/syft/source/test-fixtures/symlinks-roots/root/link_to_link_to_readme new file mode 120000 index 000000000..6c91014fd --- /dev/null +++ b/syft/source/test-fixtures/symlinks-roots/root/link_to_link_to_readme @@ -0,0 +1 @@ +../outside/link_to_readme \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-roots/root/readme b/syft/source/test-fixtures/symlinks-roots/root/readme new file mode 100644 index 000000000..cb3e22033 --- /dev/null +++ b/syft/source/test-fixtures/symlinks-roots/root/readme @@ -0,0 +1 @@ +contents! \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-simple/link_to_link_to_new_readme b/syft/source/test-fixtures/symlinks-simple/link_to_link_to_new_readme new file mode 120000 index 000000000..1ab249f74 --- /dev/null +++ b/syft/source/test-fixtures/symlinks-simple/link_to_link_to_new_readme @@ -0,0 +1 @@ +./link_to_new_readme \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-simple/link_to_new_readme b/syft/source/test-fixtures/symlinks-simple/link_to_new_readme new file mode 120000 index 000000000..c944df565 --- /dev/null +++ b/syft/source/test-fixtures/symlinks-simple/link_to_new_readme @@ -0,0 +1 @@ +./readme \ No newline at end of file diff --git a/syft/source/test-fixtures/irregular-files/readme b/syft/source/test-fixtures/symlinks-simple/readme similarity index 100% rename from syft/source/test-fixtures/irregular-files/readme rename to syft/source/test-fixtures/symlinks-simple/readme diff --git a/test/cli/dir_root_scan_regression_test.go b/test/cli/dir_root_scan_regression_test.go index 284447629..88d5ce0d5 100644 --- a/test/cli/dir_root_scan_regression_test.go +++ b/test/cli/dir_root_scan_regression_test.go @@ -24,7 +24,7 @@ func TestDirectoryScanCompletesWithinTimeout(t *testing.T) { select { case <-done: break - case <-time.After(5 * time.Second): + case <-time.After(10 * time.Second): t.Fatalf("directory scan is taking too long") }