From 3ac95ac4f6c663ce3e01f8bbb7b08d669ce77179 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 2 Nov 2021 12:09:06 -0400 Subject: [PATCH] Allow for cataloging a single file (#608) * allow for cataloging a single file Signed-off-by: Alex Goodman * use all catalogers for file schemes Signed-off-by: Alex Goodman --- syft/lib.go | 3 + syft/pkg/cataloger/cataloger.go | 19 ++++++ syft/source/directory_resolver.go | 96 +++++++++++++++++++++---------- syft/source/scheme.go | 46 +++++++++------ syft/source/scheme_test.go | 36 +++++++++++- syft/source/source.go | 33 ++++++++++- test/cli/packages_cmd_test.go | 10 ++++ test/cli/trait_assertions_test.go | 10 ++++ 8 files changed, 199 insertions(+), 54 deletions(-) diff --git a/syft/lib.go b/syft/lib.go index 4b76a9900..ccf85cd6b 100644 --- a/syft/lib.go +++ b/syft/lib.go @@ -52,6 +52,9 @@ func CatalogPackages(src *source.Source, scope source.Scope) (*pkg.Catalog, *dis case source.ImageScheme: log.Info("cataloging image") catalogers = cataloger.ImageCatalogers() + case source.FileScheme: + log.Info("cataloging file") + catalogers = cataloger.AllCatalogers() case source.DirectoryScheme: log.Info("cataloging directory") catalogers = cataloger.DirectoryCatalogers() diff --git a/syft/pkg/cataloger/cataloger.go b/syft/pkg/cataloger/cataloger.go index 992f0e14f..a92a2dfbe 100644 --- a/syft/pkg/cataloger/cataloger.go +++ b/syft/pkg/cataloger/cataloger.go @@ -59,3 +59,22 @@ func DirectoryCatalogers() []Cataloger { rust.NewCargoLockCataloger(), } } + +// AllCatalogers returns all implemented catalogers +func AllCatalogers() []Cataloger { + return []Cataloger{ + ruby.NewGemFileLockCataloger(), + ruby.NewGemSpecCataloger(), + python.NewPythonIndexCataloger(), + python.NewPythonPackageCataloger(), + javascript.NewJavascriptLockCataloger(), + javascript.NewJavascriptPackageCataloger(), + deb.NewDpkgdbCataloger(), + rpmdb.NewRpmdbCataloger(), + java.NewJavaCataloger(), + apkdb.NewApkdbCataloger(), + golang.NewGoModuleBinaryCataloger(), + golang.NewGoModFileCataloger(), + rust.NewCargoLockCataloger(), + } +} diff --git a/syft/source/directory_resolver.go b/syft/source/directory_resolver.go index 516981dc6..e5ef80aad 100644 --- a/syft/source/directory_resolver.go +++ b/syft/source/directory_resolver.go @@ -66,52 +66,76 @@ func newDirectoryResolver(root string, pathFilters ...pathFilterFn) (*directoryR func (r *directoryResolver) indexTree(root string, stager *progress.Stage) ([]string, error) { log.Infof("indexing filesystem path=%q", root) + + var roots []string var err error + root, err = filepath.Abs(root) if err != nil { return nil, err } - var roots []string + + // we want to be able to index single files with the directory resolver. However, we should also allow for attempting + // to index paths that do not exist (that is, a root that does not exist is not an error case that should stop indexing). + // For this reason we look for an opportunity to discover if the given root is a file, and if so add a single root, + // but continue forth with index regardless if the given root path exists or not. + fi, err := os.Stat(root) + if err != nil && fi != nil && !fi.IsDir() { + newRoot, err := r.addPathToIndex(root, fi) + if err = r.handleFileAccessErr(root, err); err != nil { + return nil, fmt.Errorf("unable to index path: %w", err) + } + + if newRoot != "" { + roots = append(roots, newRoot) + } + return roots, nil + } return roots, filepath.Walk(root, func(path string, info os.FileInfo, err error) error { stager.Current = path - // ignore any path which a filter function returns true - for _, filterFn := range r.pathFilterFns { - if filterFn(path) { - return nil - } - } - - if err = r.handleFileAccessErr(path, err); err != nil { - return err - } - - // link cycles could cause a revisit --we should not allow this - if r.fileTree.HasPath(file.Path(path)) { - return nil - } - - if info == nil { - // walk may not be able to provide a FileInfo object, don't allow for this to stop indexing; keep track of the paths and continue. - r.errPaths[path] = fmt.Errorf("no file info observable at path=%q", path) - return nil - } - - newRoot, err := r.addPathToIndex(path, info) - if err = r.handleFileAccessErr(path, err); err != nil { - return fmt.Errorf("unable to index path: %w", err) - } - + newRoot, indexErr := r.indexPath(path, info, err) if newRoot != "" { roots = append(roots, newRoot) } - return nil + return indexErr }) } +func (r *directoryResolver) indexPath(path string, info os.FileInfo, err error) (string, error) { + // ignore any path which a filter function returns true + for _, filterFn := range r.pathFilterFns { + if filterFn(path) { + return "", nil + } + } + + if err = r.handleFileAccessErr(path, err); err != nil { + return "", err + } + + // link cycles could cause a revisit --we should not allow this + if r.fileTree.HasPath(file.Path(path)) { + return "", nil + } + + if info == nil { + // walk may not be able to provide a FileInfo object, don't allow for this to stop indexing; keep track of the paths and continue. + r.errPaths[path] = fmt.Errorf("no file info observable at path=%q", path) + return "", nil + } + + newRoot, err := r.addPathToIndex(path, info) + if err = r.handleFileAccessErr(path, err); err != nil { + return "", fmt.Errorf("unable to index path: %w", err) + } + + return newRoot, nil +} + func (r *directoryResolver) handleFileAccessErr(path string, err error) error { if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrNotExist) { // don't allow for permission errors to stop indexing, keep track of the paths and continue. @@ -213,12 +237,22 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error) log.Warnf("unable to get file by path=%q : %+v", userPath, err) continue } + // TODO: why not use stored metadata? fileMeta, err := os.Stat(userStrPath) - if os.IsNotExist(err) { + if errors.Is(err, os.ErrNotExist) { + // note: there are other kinds of errors other than os.ErrNotExist that may be given that is platform + // specific, but essentially hints at the same overall problem (that the path does not exist). Such an + // error could be syscall.ENOTDIR (see https://github.com/golang/go/issues/18974). continue } else if err != nil { - log.Warnf("path (%r) is not valid: %+v", userStrPath, err) + // we don't want to consider any other syscalls that may hint at non-existence of the file/dir as + // invalid paths. This logging statement is meant to raise IO or permissions related problems. + var pathErr *os.PathError + if !errors.As(err, &pathErr) { + log.Warnf("path is not valid (%s): %+v", userStrPath, err) + } + continue } // don't consider directories diff --git a/syft/source/scheme.go b/syft/source/scheme.go index 94c3b433a..ded508003 100644 --- a/syft/source/scheme.go +++ b/syft/source/scheme.go @@ -19,41 +19,49 @@ const ( DirectoryScheme Scheme = "DirectoryScheme" // ImageScheme indicates the source being cataloged is a container image ImageScheme Scheme = "ImageScheme" + // FileScheme indicates the source being cataloged is a single file + FileScheme Scheme = "FileScheme" ) func detectScheme(fs afero.Fs, imageDetector sourceDetector, userInput string) (Scheme, image.Source, string, error) { - if strings.HasPrefix(userInput, "dir:") { - // blindly trust the user's scheme + switch { + case strings.HasPrefix(userInput, "dir:"): dirLocation, err := homedir.Expand(strings.TrimPrefix(userInput, "dir:")) if err != nil { return UnknownScheme, image.UnknownSource, "", fmt.Errorf("unable to expand directory path: %w", err) } return DirectoryScheme, image.UnknownSource, dirLocation, nil + + case strings.HasPrefix(userInput, "file:"): + fileLocation, err := homedir.Expand(strings.TrimPrefix(userInput, "file:")) + if err != nil { + return UnknownScheme, image.UnknownSource, "", fmt.Errorf("unable to expand directory path: %w", err) + } + return FileScheme, image.UnknownSource, fileLocation, nil } - // we should attempt to let stereoscope determine what the source is first --but, just because the source is a valid directory - // doesn't mean we yet know if it is an OCI layout directory (to be treated as an image) or if it is a generic filesystem directory. + // try the most specific sources first and move out towards more generic sources. + + // first: let's try the image detector, which has more scheme parsing internal to stereoscope source, imageSpec, err := imageDetector(userInput) - if err != nil { - return UnknownScheme, image.UnknownSource, "", fmt.Errorf("unable to detect the scheme from %q: %w", userInput, err) + if err == nil && source != image.UnknownSource { + return ImageScheme, source, imageSpec, nil } - if source == image.UnknownSource { - dirLocation, err := homedir.Expand(userInput) - if err != nil { - return UnknownScheme, image.UnknownSource, "", fmt.Errorf("unable to expand potential directory path: %w", err) - } + // next: let's try more generic sources (dir, file, etc.) - fileMeta, err := fs.Stat(dirLocation) - if err != nil { - return UnknownScheme, source, "", nil - } + location, err := homedir.Expand(userInput) + if err != nil { + return UnknownScheme, image.UnknownSource, "", fmt.Errorf("unable to expand potential directory path: %w", err) + } - if fileMeta.IsDir() { - return DirectoryScheme, source, dirLocation, nil - } + fileMeta, err := fs.Stat(location) + if err != nil { return UnknownScheme, source, "", nil } - return ImageScheme, source, imageSpec, nil + if fileMeta.IsDir() { + return DirectoryScheme, source, location, nil + } + return FileScheme, source, location, nil } diff --git a/syft/source/scheme_test.go b/syft/source/scheme_test.go index e96b01d1e..db74c89b4 100644 --- a/syft/source/scheme_test.go +++ b/syft/source/scheme_test.go @@ -21,6 +21,7 @@ func TestDetectScheme(t *testing.T) { name string userInput string dirs []string + files []string detection detectorResult expectedScheme Scheme expectedLocation string @@ -152,6 +153,28 @@ func TestDetectScheme(t *testing.T) { expectedScheme: DirectoryScheme, expectedLocation: "some/path-to-dir", }, + { + name: "explicit-file", + userInput: "file:some/path-to-file", + detection: detectorResult{ + src: image.UnknownSource, + ref: "", + }, + files: []string{"some/path-to-file"}, + expectedScheme: FileScheme, + expectedLocation: "some/path-to-file", + }, + { + name: "implicit-file", + userInput: "some/path-to-file", + detection: detectorResult{ + src: image.UnknownSource, + ref: "", + }, + files: []string{"some/path-to-file"}, + expectedScheme: FileScheme, + expectedLocation: "some/path-to-file", + }, { name: "explicit-current-dir", userInput: "dir:.", @@ -225,7 +248,18 @@ func TestDetectScheme(t *testing.T) { } err = fs.Mkdir(expandedExpectedLocation, os.ModePerm) if err != nil { - t.Fatalf("failed to create dummy tar: %+v", err) + t.Fatalf("failed to create dummy dir: %+v", err) + } + } + + for _, p := range test.files { + expandedExpectedLocation, err := homedir.Expand(p) + if err != nil { + t.Fatalf("unable to expand path=%q: %+v", p, err) + } + _, err = fs.Create(expandedExpectedLocation) + if err != nil { + t.Fatalf("failed to create dummy file: %+v", err) } } diff --git a/syft/source/source.go b/syft/source/source.go index 951de5549..9c798aaff 100644 --- a/syft/source/source.go +++ b/syft/source/source.go @@ -34,6 +34,22 @@ func New(userInput string, registryOptions *image.RegistryOptions) (*Source, fun } switch parsedScheme { + case FileScheme: + fileMeta, err := fs.Stat(location) + if err != nil { + return &Source{}, func() {}, fmt.Errorf("unable to stat dir=%q: %w", location, err) + } + + if fileMeta.IsDir() { + return &Source{}, func() {}, fmt.Errorf("given path is not a directory (path=%q): %w", location, err) + } + + s, err := NewFromFile(location) + if err != nil { + return &Source{}, func() {}, fmt.Errorf("could not populate source from path=%q: %w", location, err) + } + return &s, func() {}, nil + case DirectoryScheme: fileMeta, err := fs.Stat(location) if err != nil { @@ -79,6 +95,17 @@ func NewFromDirectory(path string) (Source, error) { }, nil } +// NewFromDirectory creates a new source object tailored to catalog a given filesystem directory recursively. +func NewFromFile(path string) (Source, error) { + return Source{ + Mutex: &sync.Mutex{}, + Metadata: Metadata{ + Scheme: FileScheme, + Path: path, + }, + }, nil +} + // NewFromImage creates a new source object tailored to catalog a given container image, relative to the // option given (e.g. all-layers, squashed, etc) func NewFromImage(img *image.Image, userImageStr string) (Source, error) { @@ -97,15 +124,15 @@ func NewFromImage(img *image.Image, userImageStr string) (Source, error) { func (s *Source) FileResolver(scope Scope) (FileResolver, error) { switch s.Metadata.Scheme { - case DirectoryScheme: + case DirectoryScheme, FileScheme: s.Mutex.Lock() defer s.Mutex.Unlock() if s.DirectoryResolver == nil { - directoryResolver, err := newDirectoryResolver(s.Metadata.Path) + resolver, err := newDirectoryResolver(s.Metadata.Path) if err != nil { return nil, err } - s.DirectoryResolver = directoryResolver + s.DirectoryResolver = resolver } return s.DirectoryResolver, nil case ImageScheme: diff --git a/test/cli/packages_cmd_test.go b/test/cli/packages_cmd_test.go index 0b2141cc8..48762a6e7 100644 --- a/test/cli/packages_cmd_test.go +++ b/test/cli/packages_cmd_test.go @@ -130,6 +130,16 @@ func TestPackagesCmdFlags(t *testing.T) { assertFailingReturnCode, // upload can't go anywhere, so if this passes that would be surprising }, }, + { + // we want to make certain that syft can catalog a single go binary and get a SBOM report that is not empty + name: "catalog-single-go-binary", + args: []string{"packages", "-o", "json", getSyftBinaryLocation(t)}, + assertions: []traitAssertion{ + assertJsonReport, + assertStdoutLengthGreaterThan(1000), + assertSuccessfulReturnCode, + }, + }, } for _, test := range tests { diff --git a/test/cli/trait_assertions_test.go b/test/cli/trait_assertions_test.go index 90728aae1..dcfce76d1 100644 --- a/test/cli/trait_assertions_test.go +++ b/test/cli/trait_assertions_test.go @@ -75,6 +75,16 @@ func assertInOutput(data string) traitAssertion { } } +func assertStdoutLengthGreaterThan(length uint) traitAssertion { + return func(tb testing.TB, stdout, _ string, _ int) { + tb.Helper() + if uint(len(stdout)) < length { + tb.Errorf("not enough output (expected at least %d, got %d)", length, len(stdout)) + + } + } +} + func assertFailingReturnCode(tb testing.TB, _, _ string, rc int) { tb.Helper() if rc == 0 {