Allow for cataloging a single file (#608)

* allow for cataloging a single file Signed-off-by: Alex Goodman <alex.goodman@anchore.com> * use all catalogers for file schemes Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
2026-02-14 19:46:42 +01:00 · 2021-11-02 12:09:06 -04:00 · 2021-11-02 12:09:06 -04:00 · 3ac95ac4f6
commit 3ac95ac4f6
parent a2882ee810
8 changed files with 199 additions and 54 deletions
--- a/syft/lib.go
+++ b/syft/lib.go
@ -52,6 +52,9 @@ func CatalogPackages(src *source.Source, scope source.Scope) (*pkg.Catalog, *dis
 	case source.ImageScheme:
 		log.Info("cataloging image")
 		catalogers = cataloger.ImageCatalogers()
+	case source.FileScheme:
+		log.Info("cataloging file")
+		catalogers = cataloger.AllCatalogers()
 	case source.DirectoryScheme:
 		log.Info("cataloging directory")
 		catalogers = cataloger.DirectoryCatalogers()
--- a/syft/pkg/cataloger/cataloger.go
+++ b/syft/pkg/cataloger/cataloger.go
@ -59,3 +59,22 @@ func DirectoryCatalogers() []Cataloger {
 		rust.NewCargoLockCataloger(),
 	}
 }
+
+// AllCatalogers returns all implemented catalogers
+func AllCatalogers() []Cataloger {
+	return []Cataloger{
+		ruby.NewGemFileLockCataloger(),
+		ruby.NewGemSpecCataloger(),
+		python.NewPythonIndexCataloger(),
+		python.NewPythonPackageCataloger(),
+		javascript.NewJavascriptLockCataloger(),
+		javascript.NewJavascriptPackageCataloger(),
+		deb.NewDpkgdbCataloger(),
+		rpmdb.NewRpmdbCataloger(),
+		java.NewJavaCataloger(),
+		apkdb.NewApkdbCataloger(),
+		golang.NewGoModuleBinaryCataloger(),
+		golang.NewGoModFileCataloger(),
+		rust.NewCargoLockCataloger(),
+	}
+}
--- a/syft/source/directory_resolver.go
+++ b/syft/source/directory_resolver.go
@ -66,52 +66,76 @@ func newDirectoryResolver(root string, pathFilters ...pathFilterFn) (*directoryR

 func (r *directoryResolver) indexTree(root string, stager *progress.Stage) ([]string, error) {
 	log.Infof("indexing filesystem path=%q", root)
+
+	var roots []string
 	var err error
+
 	root, err = filepath.Abs(root)
 	if err != nil {
 		return nil, err
 	}
-	var roots []string
+
+	// we want to be able to index single files with the directory resolver. However, we should also allow for attempting
+	// to index paths that do not exist (that is, a root that does not exist is not an error case that should stop indexing).
+	// For this reason we look for an opportunity to discover if the given root is a file, and if so add a single root,
+	// but continue forth with index regardless if the given root path exists or not.
+	fi, err := os.Stat(root)
+	if err != nil && fi != nil && !fi.IsDir() {
+		newRoot, err := r.addPathToIndex(root, fi)
+		if err = r.handleFileAccessErr(root, err); err != nil {
+			return nil, fmt.Errorf("unable to index path: %w", err)
+		}
+
+		if newRoot != "" {
+			roots = append(roots, newRoot)
+		}
+		return roots, nil
+	}

 	return roots, filepath.Walk(root,
 		func(path string, info os.FileInfo, err error) error {
 			stager.Current = path

-			// ignore any path which a filter function returns true
-			for _, filterFn := range r.pathFilterFns {
-				if filterFn(path) {
-					return nil
-				}
-			}
-
-			if err = r.handleFileAccessErr(path, err); err != nil {
-				return err
-			}
-
-			// link cycles could cause a revisit --we should not allow this
-			if r.fileTree.HasPath(file.Path(path)) {
-				return nil
-			}
-
-			if info == nil {
-				// walk may not be able to provide a FileInfo object, don't allow for this to stop indexing; keep track of the paths and continue.
-				r.errPaths[path] = fmt.Errorf("no file info observable at path=%q", path)
-				return nil
-			}
-
-			newRoot, err := r.addPathToIndex(path, info)
-			if err = r.handleFileAccessErr(path, err); err != nil {
-				return fmt.Errorf("unable to index path: %w", err)
-			}
-
+			newRoot, indexErr := r.indexPath(path, info, err)
 			if newRoot != "" {
 				roots = append(roots, newRoot)
 			}

-			return nil
+			return indexErr
 		})
 }

+func (r *directoryResolver) indexPath(path string, info os.FileInfo, err error) (string, error) {
+	// ignore any path which a filter function returns true
+	for _, filterFn := range r.pathFilterFns {
+		if filterFn(path) {
+			return "", nil
+		}
+	}
+
+	if err = r.handleFileAccessErr(path, err); err != nil {
+		return "", err
+	}
+
+	// link cycles could cause a revisit --we should not allow this
+	if r.fileTree.HasPath(file.Path(path)) {
+		return "", nil
+	}
+
+	if info == nil {
+		// walk may not be able to provide a FileInfo object, don't allow for this to stop indexing; keep track of the paths and continue.
+		r.errPaths[path] = fmt.Errorf("no file info observable at path=%q", path)
+		return "", nil
+	}
+
+	newRoot, err := r.addPathToIndex(path, info)
+	if err = r.handleFileAccessErr(path, err); err != nil {
+		return "", fmt.Errorf("unable to index path: %w", err)
+	}
+
+	return newRoot, nil
+}
+
 func (r *directoryResolver) handleFileAccessErr(path string, err error) error {
 	if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrNotExist) {
 		// don't allow for permission errors to stop indexing, keep track of the paths and continue.
@ -213,12 +237,22 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error)
 			log.Warnf("unable to get file by path=%q : %+v", userPath, err)
 			continue
 		}
+
 		// TODO: why not use stored metadata?
 		fileMeta, err := os.Stat(userStrPath)
-		if os.IsNotExist(err) {
+		if errors.Is(err, os.ErrNotExist) {
+			// note: there are other kinds of errors other than os.ErrNotExist that may be given that is platform
+			// specific, but essentially hints at the same overall problem (that the path does not exist). Such an
+			// error could be syscall.ENOTDIR (see https://github.com/golang/go/issues/18974).
 			continue
 		} else if err != nil {
-			log.Warnf("path (%r) is not valid: %+v", userStrPath, err)
+			// we don't want to consider any other syscalls that may hint at non-existence of the file/dir as
+			// invalid paths. This logging statement is meant to raise IO or permissions related problems.
+			var pathErr *os.PathError
+			if !errors.As(err, &pathErr) {
+				log.Warnf("path is not valid (%s): %+v", userStrPath, err)
+			}
+			continue
 		}

 		// don't consider directories
--- a/syft/source/scheme.go
+++ b/syft/source/scheme.go
@ -19,41 +19,49 @@ const (
 	DirectoryScheme Scheme = "DirectoryScheme"
 	// ImageScheme indicates the source being cataloged is a container image
 	ImageScheme Scheme = "ImageScheme"
+	// FileScheme indicates the source being cataloged is a single file
+	FileScheme Scheme = "FileScheme"
 )

 func detectScheme(fs afero.Fs, imageDetector sourceDetector, userInput string) (Scheme, image.Source, string, error) {
-	if strings.HasPrefix(userInput, "dir:") {
-		// blindly trust the user's scheme
+	switch {
+	case strings.HasPrefix(userInput, "dir:"):
 		dirLocation, err := homedir.Expand(strings.TrimPrefix(userInput, "dir:"))
 		if err != nil {
 			return UnknownScheme, image.UnknownSource, "", fmt.Errorf("unable to expand directory path: %w", err)
 		}
 		return DirectoryScheme, image.UnknownSource, dirLocation, nil
+
+	case strings.HasPrefix(userInput, "file:"):
+		fileLocation, err := homedir.Expand(strings.TrimPrefix(userInput, "file:"))
+		if err != nil {
+			return UnknownScheme, image.UnknownSource, "", fmt.Errorf("unable to expand directory path: %w", err)
+		}
+		return FileScheme, image.UnknownSource, fileLocation, nil
 	}

-	// we should attempt to let stereoscope determine what the source is first --but, just because the source is a valid directory
-	// doesn't mean we yet know if it is an OCI layout directory (to be treated as an image) or if it is a generic filesystem directory.
+	// try the most specific sources first and move out towards more generic sources.
+
+	// first: let's try the image detector, which has more scheme parsing internal to stereoscope
 	source, imageSpec, err := imageDetector(userInput)
-	if err != nil {
-		return UnknownScheme, image.UnknownSource, "", fmt.Errorf("unable to detect the scheme from %q: %w", userInput, err)
+	if err == nil && source != image.UnknownSource {
+		return ImageScheme, source, imageSpec, nil
 	}

-	if source == image.UnknownSource {
-		dirLocation, err := homedir.Expand(userInput)
-		if err != nil {
-			return UnknownScheme, image.UnknownSource, "", fmt.Errorf("unable to expand potential directory path: %w", err)
-		}
+	// next: let's try more generic sources (dir, file, etc.)

-		fileMeta, err := fs.Stat(dirLocation)
-		if err != nil {
-			return UnknownScheme, source, "", nil
-		}
+	location, err := homedir.Expand(userInput)
+	if err != nil {
+		return UnknownScheme, image.UnknownSource, "", fmt.Errorf("unable to expand potential directory path: %w", err)
+	}

-		if fileMeta.IsDir() {
-			return DirectoryScheme, source, dirLocation, nil
-		}
+	fileMeta, err := fs.Stat(location)
+	if err != nil {
 		return UnknownScheme, source, "", nil
 	}

-	return ImageScheme, source, imageSpec, nil
+	if fileMeta.IsDir() {
+		return DirectoryScheme, source, location, nil
+	}
+	return FileScheme, source, location, nil
 }
--- a/syft/source/scheme_test.go
+++ b/syft/source/scheme_test.go
@ -21,6 +21,7 @@ func TestDetectScheme(t *testing.T) {
 		name             string
 		userInput        string
 		dirs             []string
+		files            []string
 		detection        detectorResult
 		expectedScheme   Scheme
 		expectedLocation string
@ -152,6 +153,28 @@ func TestDetectScheme(t *testing.T) {
 			expectedScheme:   DirectoryScheme,
 			expectedLocation: "some/path-to-dir",
 		},
+		{
+			name:      "explicit-file",
+			userInput: "file:some/path-to-file",
+			detection: detectorResult{
+				src: image.UnknownSource,
+				ref: "",
+			},
+			files:            []string{"some/path-to-file"},
+			expectedScheme:   FileScheme,
+			expectedLocation: "some/path-to-file",
+		},
+		{
+			name:      "implicit-file",
+			userInput: "some/path-to-file",
+			detection: detectorResult{
+				src: image.UnknownSource,
+				ref: "",
+			},
+			files:            []string{"some/path-to-file"},
+			expectedScheme:   FileScheme,
+			expectedLocation: "some/path-to-file",
+		},
 		{
 			name:      "explicit-current-dir",
 			userInput: "dir:.",
@ -225,7 +248,18 @@ func TestDetectScheme(t *testing.T) {
 				}
 				err = fs.Mkdir(expandedExpectedLocation, os.ModePerm)
 				if err != nil {
-					t.Fatalf("failed to create dummy tar: %+v", err)
+					t.Fatalf("failed to create dummy dir: %+v", err)
+				}
+			}
+
+			for _, p := range test.files {
+				expandedExpectedLocation, err := homedir.Expand(p)
+				if err != nil {
+					t.Fatalf("unable to expand path=%q: %+v", p, err)
+				}
+				_, err = fs.Create(expandedExpectedLocation)
+				if err != nil {
+					t.Fatalf("failed to create dummy file: %+v", err)
 				}
 			}

--- a/syft/source/source.go
+++ b/syft/source/source.go
@ -34,6 +34,22 @@ func New(userInput string, registryOptions *image.RegistryOptions) (*Source, fun
 	}

 	switch parsedScheme {
+	case FileScheme:
+		fileMeta, err := fs.Stat(location)
+		if err != nil {
+			return &Source{}, func() {}, fmt.Errorf("unable to stat dir=%q: %w", location, err)
+		}
+
+		if fileMeta.IsDir() {
+			return &Source{}, func() {}, fmt.Errorf("given path is not a directory (path=%q): %w", location, err)
+		}
+
+		s, err := NewFromFile(location)
+		if err != nil {
+			return &Source{}, func() {}, fmt.Errorf("could not populate source from path=%q: %w", location, err)
+		}
+		return &s, func() {}, nil
+
 	case DirectoryScheme:
 		fileMeta, err := fs.Stat(location)
 		if err != nil {
@ -79,6 +95,17 @@ func NewFromDirectory(path string) (Source, error) {
 	}, nil
 }

+// NewFromDirectory creates a new source object tailored to catalog a given filesystem directory recursively.
+func NewFromFile(path string) (Source, error) {
+	return Source{
+		Mutex: &sync.Mutex{},
+		Metadata: Metadata{
+			Scheme: FileScheme,
+			Path:   path,
+		},
+	}, nil
+}
+
 // NewFromImage creates a new source object tailored to catalog a given container image, relative to the
 // option given (e.g. all-layers, squashed, etc)
 func NewFromImage(img *image.Image, userImageStr string) (Source, error) {
@ -97,15 +124,15 @@ func NewFromImage(img *image.Image, userImageStr string) (Source, error) {

 func (s *Source) FileResolver(scope Scope) (FileResolver, error) {
 	switch s.Metadata.Scheme {
-	case DirectoryScheme:
+	case DirectoryScheme, FileScheme:
 		s.Mutex.Lock()
 		defer s.Mutex.Unlock()
 		if s.DirectoryResolver == nil {
-			directoryResolver, err := newDirectoryResolver(s.Metadata.Path)
+			resolver, err := newDirectoryResolver(s.Metadata.Path)
 			if err != nil {
 				return nil, err
 			}
-			s.DirectoryResolver = directoryResolver
+			s.DirectoryResolver = resolver
 		}
 		return s.DirectoryResolver, nil
 	case ImageScheme:
--- a/test/cli/packages_cmd_test.go
+++ b/test/cli/packages_cmd_test.go
@ -130,6 +130,16 @@ func TestPackagesCmdFlags(t *testing.T) {
 				assertFailingReturnCode, // upload can't go anywhere, so if this passes that would be surprising
 			},
 		},
+		{
+			// we want to make certain that syft can catalog a single go binary and get a SBOM report that is not empty
+			name: "catalog-single-go-binary",
+			args: []string{"packages", "-o", "json", getSyftBinaryLocation(t)},
+			assertions: []traitAssertion{
+				assertJsonReport,
+				assertStdoutLengthGreaterThan(1000),
+				assertSuccessfulReturnCode,
+			},
+		},
 	}

 	for _, test := range tests {
--- a/test/cli/trait_assertions_test.go
+++ b/test/cli/trait_assertions_test.go
@ -75,6 +75,16 @@ func assertInOutput(data string) traitAssertion {
 	}
 }

+func assertStdoutLengthGreaterThan(length uint) traitAssertion {
+	return func(tb testing.TB, stdout, _ string, _ int) {
+		tb.Helper()
+		if uint(len(stdout)) < length {
+			tb.Errorf("not enough output (expected at least %d, got %d)", length, len(stdout))
+
+		}
+	}
+}
+
 func assertFailingReturnCode(tb testing.TB, _, _ string, rc int) {
 	tb.Helper()
 	if rc == 0 {