From 6cd9c2b77150168099d797ed1b981b92a5102c2c Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Wed, 21 Oct 2020 10:49:33 -0400 Subject: [PATCH 1/2] upgrade cataloger interface to use full resolver (remove SelectFiles function) Signed-off-by: Alex Goodman --- syft/cataloger/catalog.go | 23 ++------- syft/cataloger/cataloger.go | 7 +-- syft/cataloger/common/generic_cataloger.go | 49 +++++++++++-------- .../common/generic_cataloger_test.go | 34 +++++-------- 4 files changed, 45 insertions(+), 68 deletions(-) diff --git a/syft/cataloger/catalog.go b/syft/cataloger/catalog.go index e1399a381..bda9a1a2e 100644 --- a/syft/cataloger/catalog.go +++ b/syft/cataloger/catalog.go @@ -1,7 +1,6 @@ package cataloger import ( - "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/internal/bus" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/event" @@ -39,35 +38,19 @@ func newMonitor() (*progress.Manual, *progress.Manual) { // request. func Catalog(resolver scope.Resolver, catalogers ...Cataloger) (*pkg.Catalog, error) { catalog := pkg.NewCatalog() - fileSelection := make([]file.Reference, 0) - filesProcessed, packagesDiscovered := newMonitor() - // ask catalogers for files to extract from the image tar - for _, a := range catalogers { - fileSelection = append(fileSelection, a.SelectFiles(resolver)...) - log.Debugf("cataloger '%s' selected '%d' files", a.Name(), len(fileSelection)) - filesProcessed.N += int64(len(fileSelection)) - } - - // fetch contents for requested selection by catalogers - // TODO: we should consider refactoring to return a set of io.Readers instead of the full contents themselves (allow for optional buffering). - contents, err := resolver.MultipleFileContentsByRef(fileSelection...) - if err != nil { - return nil, err - } - // perform analysis, accumulating errors for each failed analysis var errs error - for _, a := range catalogers { + for _, theCataloger := range catalogers { // TODO: check for multiple rounds of analyses by Iterate error - packages, err := a.Catalog(contents) + packages, err := theCataloger.Catalog(resolver) if err != nil { errs = multierror.Append(errs, err) continue } - log.Debugf("cataloger '%s' discovered '%d' packages", a.Name(), len(packages)) + log.Debugf("cataloger '%s' discovered '%d' packages", theCataloger.Name(), len(packages)) packagesDiscovered.N += int64(len(packages)) for _, p := range packages { diff --git a/syft/cataloger/cataloger.go b/syft/cataloger/cataloger.go index 9b6b5cd0a..817a7f8d4 100644 --- a/syft/cataloger/cataloger.go +++ b/syft/cataloger/cataloger.go @@ -6,7 +6,6 @@ catalogers defined in child packages as well as the interface definition to impl package cataloger import ( - "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/syft/cataloger/apkdb" "github.com/anchore/syft/syft/cataloger/deb" "github.com/anchore/syft/syft/cataloger/golang" @@ -25,10 +24,8 @@ import ( type Cataloger interface { // Name returns a string that uniquely describes a cataloger Name() string - // SelectFiles discovers and returns specific files that the cataloger would like to inspect the contents of. - SelectFiles(scope.FileResolver) []file.Reference - // Catalog is given the file contents and should return any discovered Packages after analyzing the contents. - Catalog(map[file.Reference]string) ([]pkg.Package, error) + // Catalog is given an object to resolve file references and content, this function should return any discovered Packages after analyzing the catalog source. + Catalog(resolver scope.Resolver) ([]pkg.Package, error) // TODO: add "IterationNeeded" error to indicate to the driver to continue with another Select/Catalog pass // TODO: we should consider refactoring to return a set of io.Readers instead of the full contents themselves (allow for optional buffering). } diff --git a/syft/cataloger/common/generic_cataloger.go b/syft/cataloger/common/generic_cataloger.go index ed314fcae..ce9c6d885 100644 --- a/syft/cataloger/common/generic_cataloger.go +++ b/syft/cataloger/common/generic_cataloger.go @@ -34,73 +34,82 @@ func NewGenericCataloger(pathParsers map[string]ParserFn, globParsers map[string } // Name returns a string that uniquely describes the upstream cataloger that this Generic Cataloger represents. -func (a *GenericCataloger) Name() string { - return a.upstreamCataloger +func (c *GenericCataloger) Name() string { + return c.upstreamCataloger } // register pairs a set of file references with a parser function for future cataloging (when the file contents are resolved) -func (a *GenericCataloger) register(files []file.Reference, parser ParserFn) { - a.selectedFiles = append(a.selectedFiles, files...) +func (c *GenericCataloger) register(files []file.Reference, parser ParserFn) { + c.selectedFiles = append(c.selectedFiles, files...) for _, f := range files { - a.parsers[f] = parser + c.parsers[f] = parser } } // clear deletes all registered file-reference-to-parser-function pairings from former SelectFiles() and register() calls -func (a *GenericCataloger) clear() { - a.selectedFiles = make([]file.Reference, 0) - a.parsers = make(map[file.Reference]ParserFn) +func (c *GenericCataloger) clear() { + c.selectedFiles = make([]file.Reference, 0) + c.parsers = make(map[file.Reference]ParserFn) +} + +func (c *GenericCataloger) Catalog(resolver scope.Resolver) ([]pkg.Package, error) { + fileSelection := c.selectFiles(resolver) + contents, err := resolver.MultipleFileContentsByRef(fileSelection...) + if err != nil { + return nil, err + } + return c.catalog(contents) } // SelectFiles takes a set of file trees and resolves and file references of interest for future cataloging -func (a *GenericCataloger) SelectFiles(resolver scope.FileResolver) []file.Reference { +func (c *GenericCataloger) selectFiles(resolver scope.FileResolver) []file.Reference { // select by exact path - for path, parser := range a.pathParsers { + for path, parser := range c.pathParsers { files, err := resolver.FilesByPath(file.Path(path)) if err != nil { log.Errorf("cataloger failed to select files by path: %+v", err) } if files != nil { - a.register(files, parser) + c.register(files, parser) } } // select by glob pattern - for globPattern, parser := range a.globParsers { + for globPattern, parser := range c.globParsers { fileMatches, err := resolver.FilesByGlob(globPattern) if err != nil { log.Errorf("failed to find files by glob: %s", globPattern) } if fileMatches != nil { - a.register(fileMatches, parser) + c.register(fileMatches, parser) } } - return a.selectedFiles + return c.selectedFiles } // Catalog takes a set of file contents and uses any configured parser functions to resolve and return discovered packages -func (a *GenericCataloger) Catalog(contents map[file.Reference]string) ([]pkg.Package, error) { - defer a.clear() +func (c *GenericCataloger) catalog(contents map[file.Reference]string) ([]pkg.Package, error) { + defer c.clear() packages := make([]pkg.Package, 0) - for reference, parser := range a.parsers { + for reference, parser := range c.parsers { content, ok := contents[reference] if !ok { - log.Errorf("cataloger '%s' missing file content: %+v", a.upstreamCataloger, reference) + log.Errorf("cataloger '%s' missing file content: %+v", c.upstreamCataloger, reference) continue } entries, err := parser(string(reference.Path), strings.NewReader(content)) if err != nil { // TODO: should we fail? or only log? - log.Errorf("cataloger '%s' failed to parse entries (reference=%+v): %+v", a.upstreamCataloger, reference, err) + log.Errorf("cataloger '%s' failed to parse entries (reference=%+v): %+v", c.upstreamCataloger, reference, err) continue } for _, entry := range entries { - entry.FoundBy = a.upstreamCataloger + entry.FoundBy = c.upstreamCataloger entry.Source = []file.Reference{reference} packages = append(packages, entry) diff --git a/syft/cataloger/common/generic_cataloger_test.go b/syft/cataloger/common/generic_cataloger_test.go index 57c724f07..0478ed7be 100644 --- a/syft/cataloger/common/generic_cataloger_test.go +++ b/syft/cataloger/common/generic_cataloger_test.go @@ -7,7 +7,6 @@ import ( "testing" "github.com/anchore/stereoscope/pkg/file" - "github.com/anchore/syft/internal" "github.com/anchore/syft/syft/pkg" ) @@ -21,6 +20,10 @@ func newTestResolver() *testResolver { } } +func (r *testResolver) MultipleFileContentsByRef(_ ...file.Reference) (map[file.Reference]string, error) { + return r.contents, nil +} + func (r *testResolver) FilesByPath(paths ...file.Path) ([]file.Reference, error) { results := make([]file.Reference, len(paths)) @@ -32,7 +35,7 @@ func (r *testResolver) FilesByPath(paths ...file.Path) ([]file.Reference, error) return results, nil } -func (r *testResolver) FilesByGlob(patterns ...string) ([]file.Reference, error) { +func (r *testResolver) FilesByGlob(_ ...string) ([]file.Reference, error) { path := "/a-path.txt" ref := file.NewFileReference(file.Path(path)) r.contents[ref] = fmt.Sprintf("%s file contents!", path) @@ -64,31 +67,16 @@ func TestGenericCataloger(t *testing.T) { resolver := newTestResolver() cataloger := NewGenericCataloger(pathParsers, globParsers, upstream) - selected := cataloger.SelectFiles(resolver) - - if len(selected) != 3 { - t.Fatalf("unexpected selection length: %d", len(selected)) - } - - expectedSelection := internal.NewStringSetFromSlice([]string{"/last/path.txt", "/another-path.txt", "/a-path.txt"}) - selectionByPath := make(map[string]file.Reference) - for _, s := range selected { - if !expectedSelection.Contains(string(s.Path)) { - t.Errorf("unexpected selection path: %+v", s.Path) - } - selectionByPath[string(s.Path)] = s - } - - expectedPkgs := make(map[file.Reference]pkg.Package) - for path, ref := range selectionByPath { - expectedPkgs[ref] = pkg.Package{ + expectedSelection := []string{"/last/path.txt", "/another-path.txt", "/a-path.txt"} + expectedPkgs := make(map[string]pkg.Package) + for _, path := range expectedSelection { + expectedPkgs[path] = pkg.Package{ FoundBy: upstream, - Source: []file.Reference{ref}, Name: fmt.Sprintf("%s file contents!", path), } } - actualPkgs, err := cataloger.Catalog(resolver.contents) + actualPkgs, err := cataloger.Catalog(resolver) if err != nil { t.Fatalf("cataloger catalog action failed: %+v", err) } @@ -99,7 +87,7 @@ func TestGenericCataloger(t *testing.T) { for _, p := range actualPkgs { ref := p.Source[0] - exP, ok := expectedPkgs[ref] + exP, ok := expectedPkgs[string(ref.Path)] if !ok { t.Errorf("missing expected pkg: ref=%+v", ref) continue From d4ca0ab16753e2003242b00b48a81feeb3c98148 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Wed, 21 Oct 2020 11:09:43 -0400 Subject: [PATCH 2/2] expand the resolver to include content requests for a single reference Signed-off-by: Alex Goodman --- syft/cataloger/catalog.go | 7 ++++--- syft/cataloger/cataloger.go | 4 +--- syft/cataloger/common/generic_cataloger.go | 3 ++- syft/cataloger/common/generic_cataloger_test.go | 4 ++++ syft/scope/resolver.go | 2 ++ syft/scope/resolvers/all_layers_resolver.go | 6 ++++++ syft/scope/resolvers/directory_resolver.go | 13 ++++++++++++- syft/scope/resolvers/image_squash_resolver.go | 6 ++++++ 8 files changed, 37 insertions(+), 8 deletions(-) diff --git a/syft/cataloger/catalog.go b/syft/cataloger/catalog.go index bda9a1a2e..022f81855 100644 --- a/syft/cataloger/catalog.go +++ b/syft/cataloger/catalog.go @@ -43,15 +43,16 @@ func Catalog(resolver scope.Resolver, catalogers ...Cataloger) (*pkg.Catalog, er // perform analysis, accumulating errors for each failed analysis var errs error for _, theCataloger := range catalogers { - // TODO: check for multiple rounds of analyses by Iterate error packages, err := theCataloger.Catalog(resolver) if err != nil { errs = multierror.Append(errs, err) continue } - log.Debugf("cataloger '%s' discovered '%d' packages", theCataloger.Name(), len(packages)) - packagesDiscovered.N += int64(len(packages)) + catalogedPackages := len(packages) + + log.Debugf("cataloger '%s' discovered '%d' packages", theCataloger.Name(), catalogedPackages) + packagesDiscovered.N += int64(catalogedPackages) for _, p := range packages { catalog.Add(p) diff --git a/syft/cataloger/cataloger.go b/syft/cataloger/cataloger.go index 817a7f8d4..e10de959b 100644 --- a/syft/cataloger/cataloger.go +++ b/syft/cataloger/cataloger.go @@ -24,10 +24,8 @@ import ( type Cataloger interface { // Name returns a string that uniquely describes a cataloger Name() string - // Catalog is given an object to resolve file references and content, this function should return any discovered Packages after analyzing the catalog source. + // Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing the catalog source. Catalog(resolver scope.Resolver) ([]pkg.Package, error) - // TODO: add "IterationNeeded" error to indicate to the driver to continue with another Select/Catalog pass - // TODO: we should consider refactoring to return a set of io.Readers instead of the full contents themselves (allow for optional buffering). } // ImageCatalogers returns a slice of locally implemented catalogers that are fit for detecting installations of packages. diff --git a/syft/cataloger/common/generic_cataloger.go b/syft/cataloger/common/generic_cataloger.go index ce9c6d885..90ca62d06 100644 --- a/syft/cataloger/common/generic_cataloger.go +++ b/syft/cataloger/common/generic_cataloger.go @@ -52,6 +52,7 @@ func (c *GenericCataloger) clear() { c.parsers = make(map[file.Reference]ParserFn) } +// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing the catalog source. func (c *GenericCataloger) Catalog(resolver scope.Resolver) ([]pkg.Package, error) { fileSelection := c.selectFiles(resolver) contents, err := resolver.MultipleFileContentsByRef(fileSelection...) @@ -88,7 +89,7 @@ func (c *GenericCataloger) selectFiles(resolver scope.FileResolver) []file.Refer return c.selectedFiles } -// Catalog takes a set of file contents and uses any configured parser functions to resolve and return discovered packages +// catalog takes a set of file contents and uses any configured parser functions to resolve and return discovered packages func (c *GenericCataloger) catalog(contents map[file.Reference]string) ([]pkg.Package, error) { defer c.clear() diff --git a/syft/cataloger/common/generic_cataloger_test.go b/syft/cataloger/common/generic_cataloger_test.go index 0478ed7be..39298ec71 100644 --- a/syft/cataloger/common/generic_cataloger_test.go +++ b/syft/cataloger/common/generic_cataloger_test.go @@ -20,6 +20,10 @@ func newTestResolver() *testResolver { } } +func (r *testResolver) FileContentsByRef(_ file.Reference) (string, error) { + return "", fmt.Errorf("not implemented") +} + func (r *testResolver) MultipleFileContentsByRef(_ ...file.Reference) (map[file.Reference]string, error) { return r.contents, nil } diff --git a/syft/scope/resolver.go b/syft/scope/resolver.go index 4a965d30e..74a20783a 100644 --- a/syft/scope/resolver.go +++ b/syft/scope/resolver.go @@ -16,7 +16,9 @@ type Resolver interface { // ContentResolver knows how to get file content for given file.References type ContentResolver interface { + FileContentsByRef(ref file.Reference) (string, error) MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]string, error) + // TODO: we should consider refactoring to return a set of io.Readers or file.Openers instead of the full contents themselves (allow for optional buffering). } // FileResolver knows how to get file.References for given string paths and globs diff --git a/syft/scope/resolvers/all_layers_resolver.go b/syft/scope/resolvers/all_layers_resolver.go index e14eae8fd..757d8129f 100644 --- a/syft/scope/resolvers/all_layers_resolver.go +++ b/syft/scope/resolvers/all_layers_resolver.go @@ -114,3 +114,9 @@ func (r *AllLayersResolver) FilesByGlob(patterns ...string) ([]file.Reference, e func (r *AllLayersResolver) MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]string, error) { return r.img.MultipleFileContentsByRef(f...) } + +// FileContentsByRef fetches file contents for a single file reference, irregardless of the source layer. +// If the path does not exist an error is returned. +func (r *AllLayersResolver) FileContentsByRef(ref file.Reference) (string, error) { + return r.img.FileContentsByRef(ref) +} diff --git a/syft/scope/resolvers/directory_resolver.go b/syft/scope/resolvers/directory_resolver.go index b8ed280bc..54db895bc 100644 --- a/syft/scope/resolvers/directory_resolver.go +++ b/syft/scope/resolvers/directory_resolver.go @@ -82,9 +82,20 @@ func (s DirectoryResolver) MultipleFileContentsByRef(f ...file.Reference) (map[f contents, err := fileContents(fileRef.Path) if err != nil { - return refContents, fmt.Errorf("could not read contents of file: %s", fileRef.Path) + return nil, fmt.Errorf("could not read contents of file: %s", fileRef.Path) } refContents[fileRef] = string(contents) } return refContents, nil } + +// FileContentsByRef fetches file contents for a single file reference relative to a directory. +// If the path does not exist an error is returned. +func (s DirectoryResolver) FileContentsByRef(ref file.Reference) (string, error) { + contents, err := fileContents(ref.Path) + if err != nil { + return "", fmt.Errorf("could not read contents of file: %s", ref.Path) + } + + return string(contents), nil +} diff --git a/syft/scope/resolvers/image_squash_resolver.go b/syft/scope/resolvers/image_squash_resolver.go index 3df7efb88..429adff34 100644 --- a/syft/scope/resolvers/image_squash_resolver.go +++ b/syft/scope/resolvers/image_squash_resolver.go @@ -78,3 +78,9 @@ func (r *ImageSquashResolver) FilesByGlob(patterns ...string) ([]file.Reference, func (r *ImageSquashResolver) MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]string, error) { return r.img.MultipleFileContentsByRef(f...) } + +// FileContentsByRef fetches file contents for a single file reference, irregardless of the source layer. +// If the path does not exist an error is returned. +func (r *ImageSquashResolver) FileContentsByRef(ref file.Reference) (string, error) { + return r.img.FileContentsByRef(ref) +}