Merge pull request #232 from anchore/cataloger-interface-upgrade

Upgrade cataloger interface to use full resolver
This commit is contained in:
Alex Goodman 2020-10-21 11:47:52 -04:00 committed by GitHub
commit beb6afff36
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 80 additions and 74 deletions

View File

@ -1,7 +1,6 @@
package cataloger
import (
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/syft/internal/bus"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/event"
@ -39,36 +38,21 @@ func newMonitor() (*progress.Manual, *progress.Manual) {
// request.
func Catalog(resolver scope.Resolver, catalogers ...Cataloger) (*pkg.Catalog, error) {
catalog := pkg.NewCatalog()
fileSelection := make([]file.Reference, 0)
filesProcessed, packagesDiscovered := newMonitor()
// ask catalogers for files to extract from the image tar
for _, a := range catalogers {
fileSelection = append(fileSelection, a.SelectFiles(resolver)...)
log.Debugf("cataloger '%s' selected '%d' files", a.Name(), len(fileSelection))
filesProcessed.N += int64(len(fileSelection))
}
// fetch contents for requested selection by catalogers
// TODO: we should consider refactoring to return a set of io.Readers instead of the full contents themselves (allow for optional buffering).
contents, err := resolver.MultipleFileContentsByRef(fileSelection...)
if err != nil {
return nil, err
}
// perform analysis, accumulating errors for each failed analysis
var errs error
for _, a := range catalogers {
// TODO: check for multiple rounds of analyses by Iterate error
packages, err := a.Catalog(contents)
for _, theCataloger := range catalogers {
packages, err := theCataloger.Catalog(resolver)
if err != nil {
errs = multierror.Append(errs, err)
continue
}
log.Debugf("cataloger '%s' discovered '%d' packages", a.Name(), len(packages))
packagesDiscovered.N += int64(len(packages))
catalogedPackages := len(packages)
log.Debugf("cataloger '%s' discovered '%d' packages", theCataloger.Name(), catalogedPackages)
packagesDiscovered.N += int64(catalogedPackages)
for _, p := range packages {
catalog.Add(p)

View File

@ -6,7 +6,6 @@ catalogers defined in child packages as well as the interface definition to impl
package cataloger
import (
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/syft/syft/cataloger/apkdb"
"github.com/anchore/syft/syft/cataloger/deb"
"github.com/anchore/syft/syft/cataloger/golang"
@ -25,12 +24,8 @@ import (
type Cataloger interface {
// Name returns a string that uniquely describes a cataloger
Name() string
// SelectFiles discovers and returns specific files that the cataloger would like to inspect the contents of.
SelectFiles(scope.FileResolver) []file.Reference
// Catalog is given the file contents and should return any discovered Packages after analyzing the contents.
Catalog(map[file.Reference]string) ([]pkg.Package, error)
// TODO: add "IterationNeeded" error to indicate to the driver to continue with another Select/Catalog pass
// TODO: we should consider refactoring to return a set of io.Readers instead of the full contents themselves (allow for optional buffering).
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing the catalog source.
Catalog(resolver scope.Resolver) ([]pkg.Package, error)
}
// ImageCatalogers returns a slice of locally implemented catalogers that are fit for detecting installations of packages.

View File

@ -34,73 +34,83 @@ func NewGenericCataloger(pathParsers map[string]ParserFn, globParsers map[string
}
// Name returns a string that uniquely describes the upstream cataloger that this Generic Cataloger represents.
func (a *GenericCataloger) Name() string {
return a.upstreamCataloger
func (c *GenericCataloger) Name() string {
return c.upstreamCataloger
}
// register pairs a set of file references with a parser function for future cataloging (when the file contents are resolved)
func (a *GenericCataloger) register(files []file.Reference, parser ParserFn) {
a.selectedFiles = append(a.selectedFiles, files...)
func (c *GenericCataloger) register(files []file.Reference, parser ParserFn) {
c.selectedFiles = append(c.selectedFiles, files...)
for _, f := range files {
a.parsers[f] = parser
c.parsers[f] = parser
}
}
// clear deletes all registered file-reference-to-parser-function pairings from former SelectFiles() and register() calls
func (a *GenericCataloger) clear() {
a.selectedFiles = make([]file.Reference, 0)
a.parsers = make(map[file.Reference]ParserFn)
func (c *GenericCataloger) clear() {
c.selectedFiles = make([]file.Reference, 0)
c.parsers = make(map[file.Reference]ParserFn)
}
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing the catalog source.
func (c *GenericCataloger) Catalog(resolver scope.Resolver) ([]pkg.Package, error) {
fileSelection := c.selectFiles(resolver)
contents, err := resolver.MultipleFileContentsByRef(fileSelection...)
if err != nil {
return nil, err
}
return c.catalog(contents)
}
// SelectFiles takes a set of file trees and resolves and file references of interest for future cataloging
func (a *GenericCataloger) SelectFiles(resolver scope.FileResolver) []file.Reference {
func (c *GenericCataloger) selectFiles(resolver scope.FileResolver) []file.Reference {
// select by exact path
for path, parser := range a.pathParsers {
for path, parser := range c.pathParsers {
files, err := resolver.FilesByPath(file.Path(path))
if err != nil {
log.Errorf("cataloger failed to select files by path: %+v", err)
}
if files != nil {
a.register(files, parser)
c.register(files, parser)
}
}
// select by glob pattern
for globPattern, parser := range a.globParsers {
for globPattern, parser := range c.globParsers {
fileMatches, err := resolver.FilesByGlob(globPattern)
if err != nil {
log.Errorf("failed to find files by glob: %s", globPattern)
}
if fileMatches != nil {
a.register(fileMatches, parser)
c.register(fileMatches, parser)
}
}
return a.selectedFiles
return c.selectedFiles
}
// Catalog takes a set of file contents and uses any configured parser functions to resolve and return discovered packages
func (a *GenericCataloger) Catalog(contents map[file.Reference]string) ([]pkg.Package, error) {
defer a.clear()
// catalog takes a set of file contents and uses any configured parser functions to resolve and return discovered packages
func (c *GenericCataloger) catalog(contents map[file.Reference]string) ([]pkg.Package, error) {
defer c.clear()
packages := make([]pkg.Package, 0)
for reference, parser := range a.parsers {
for reference, parser := range c.parsers {
content, ok := contents[reference]
if !ok {
log.Errorf("cataloger '%s' missing file content: %+v", a.upstreamCataloger, reference)
log.Errorf("cataloger '%s' missing file content: %+v", c.upstreamCataloger, reference)
continue
}
entries, err := parser(string(reference.Path), strings.NewReader(content))
if err != nil {
// TODO: should we fail? or only log?
log.Errorf("cataloger '%s' failed to parse entries (reference=%+v): %+v", a.upstreamCataloger, reference, err)
log.Errorf("cataloger '%s' failed to parse entries (reference=%+v): %+v", c.upstreamCataloger, reference, err)
continue
}
for _, entry := range entries {
entry.FoundBy = a.upstreamCataloger
entry.FoundBy = c.upstreamCataloger
entry.Source = []file.Reference{reference}
packages = append(packages, entry)

View File

@ -7,7 +7,6 @@ import (
"testing"
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/pkg"
)
@ -21,6 +20,14 @@ func newTestResolver() *testResolver {
}
}
func (r *testResolver) FileContentsByRef(_ file.Reference) (string, error) {
return "", fmt.Errorf("not implemented")
}
func (r *testResolver) MultipleFileContentsByRef(_ ...file.Reference) (map[file.Reference]string, error) {
return r.contents, nil
}
func (r *testResolver) FilesByPath(paths ...file.Path) ([]file.Reference, error) {
results := make([]file.Reference, len(paths))
@ -32,7 +39,7 @@ func (r *testResolver) FilesByPath(paths ...file.Path) ([]file.Reference, error)
return results, nil
}
func (r *testResolver) FilesByGlob(patterns ...string) ([]file.Reference, error) {
func (r *testResolver) FilesByGlob(_ ...string) ([]file.Reference, error) {
path := "/a-path.txt"
ref := file.NewFileReference(file.Path(path))
r.contents[ref] = fmt.Sprintf("%s file contents!", path)
@ -64,31 +71,16 @@ func TestGenericCataloger(t *testing.T) {
resolver := newTestResolver()
cataloger := NewGenericCataloger(pathParsers, globParsers, upstream)
selected := cataloger.SelectFiles(resolver)
if len(selected) != 3 {
t.Fatalf("unexpected selection length: %d", len(selected))
}
expectedSelection := internal.NewStringSetFromSlice([]string{"/last/path.txt", "/another-path.txt", "/a-path.txt"})
selectionByPath := make(map[string]file.Reference)
for _, s := range selected {
if !expectedSelection.Contains(string(s.Path)) {
t.Errorf("unexpected selection path: %+v", s.Path)
}
selectionByPath[string(s.Path)] = s
}
expectedPkgs := make(map[file.Reference]pkg.Package)
for path, ref := range selectionByPath {
expectedPkgs[ref] = pkg.Package{
expectedSelection := []string{"/last/path.txt", "/another-path.txt", "/a-path.txt"}
expectedPkgs := make(map[string]pkg.Package)
for _, path := range expectedSelection {
expectedPkgs[path] = pkg.Package{
FoundBy: upstream,
Source: []file.Reference{ref},
Name: fmt.Sprintf("%s file contents!", path),
}
}
actualPkgs, err := cataloger.Catalog(resolver.contents)
actualPkgs, err := cataloger.Catalog(resolver)
if err != nil {
t.Fatalf("cataloger catalog action failed: %+v", err)
}
@ -99,7 +91,7 @@ func TestGenericCataloger(t *testing.T) {
for _, p := range actualPkgs {
ref := p.Source[0]
exP, ok := expectedPkgs[ref]
exP, ok := expectedPkgs[string(ref.Path)]
if !ok {
t.Errorf("missing expected pkg: ref=%+v", ref)
continue

View File

@ -16,7 +16,9 @@ type Resolver interface {
// ContentResolver knows how to get file content for given file.References
type ContentResolver interface {
FileContentsByRef(ref file.Reference) (string, error)
MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]string, error)
// TODO: we should consider refactoring to return a set of io.Readers or file.Openers instead of the full contents themselves (allow for optional buffering).
}
// FileResolver knows how to get file.References for given string paths and globs

View File

@ -114,3 +114,9 @@ func (r *AllLayersResolver) FilesByGlob(patterns ...string) ([]file.Reference, e
func (r *AllLayersResolver) MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]string, error) {
return r.img.MultipleFileContentsByRef(f...)
}
// FileContentsByRef fetches file contents for a single file reference, irregardless of the source layer.
// If the path does not exist an error is returned.
func (r *AllLayersResolver) FileContentsByRef(ref file.Reference) (string, error) {
return r.img.FileContentsByRef(ref)
}

View File

@ -82,9 +82,20 @@ func (s DirectoryResolver) MultipleFileContentsByRef(f ...file.Reference) (map[f
contents, err := fileContents(fileRef.Path)
if err != nil {
return refContents, fmt.Errorf("could not read contents of file: %s", fileRef.Path)
return nil, fmt.Errorf("could not read contents of file: %s", fileRef.Path)
}
refContents[fileRef] = string(contents)
}
return refContents, nil
}
// FileContentsByRef fetches file contents for a single file reference relative to a directory.
// If the path does not exist an error is returned.
func (s DirectoryResolver) FileContentsByRef(ref file.Reference) (string, error) {
contents, err := fileContents(ref.Path)
if err != nil {
return "", fmt.Errorf("could not read contents of file: %s", ref.Path)
}
return string(contents), nil
}

View File

@ -78,3 +78,9 @@ func (r *ImageSquashResolver) FilesByGlob(patterns ...string) ([]file.Reference,
func (r *ImageSquashResolver) MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]string, error) {
return r.img.MultipleFileContentsByRef(f...)
}
// FileContentsByRef fetches file contents for a single file reference, irregardless of the source layer.
// If the path does not exist an error is returned.
func (r *ImageSquashResolver) FileContentsByRef(ref file.Reference) (string, error) {
return r.img.FileContentsByRef(ref)
}