refactor catalogers to consume io.Readers instead of strings

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2020-08-31 17:41:05 -04:00
parent df7c83c9b0
commit 63ee5ba098
No known key found for this signature in database
GPG Key ID: 86E2870463D5E890
18 changed files with 93 additions and 81 deletions

3
go.mod
View File

@ -21,6 +21,7 @@ require (
github.com/mitchellh/go-homedir v1.1.0
github.com/mitchellh/mapstructure v1.3.1
github.com/olekukonko/tablewriter v0.0.4
github.com/opencontainers/runc v0.1.1 // indirect
github.com/package-url/packageurl-go v0.1.0
github.com/pelletier/go-toml v1.8.0
github.com/rogpeppe/go-internal v1.5.2
@ -41,3 +42,5 @@ require (
gopkg.in/ini.v1 v1.57.0 // indirect
gopkg.in/yaml.v2 v2.3.0
)
replace github.com/anchore/stereoscope => ../stereoscope

View File

@ -8,6 +8,7 @@ import (
"github.com/anchore/syft/syft/cataloger/common"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/scope"
"io"
)
// Cataloger catalogs pkg.ApkPkg Package Types defined in Alpine DB files.
@ -27,16 +28,16 @@ func New() *Cataloger {
}
// Name returns a string that uniquely describes this cataloger.
func (a *Cataloger) Name() string {
func (c *Cataloger) Name() string {
return "apkdb-cataloger"
}
// SelectFiles returns a set of discovered Alpine DB files from the user content source.
func (a *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference {
return a.cataloger.SelectFiles(resolver)
func (c *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference {
return c.cataloger.SelectFiles(resolver)
}
// Catalog returns the Packages indexed from all Alpine DB files discovered.
func (a *Cataloger) Catalog(contents map[file.Reference]string) ([]pkg.Package, error) {
return a.cataloger.Catalog(contents, a.Name())
func (c *Cataloger) Catalog(contents map[file.Reference]io.Reader) ([]pkg.Package, error) {
return c.cataloger.Catalog(contents)
}

View File

@ -8,6 +8,7 @@ import (
"github.com/anchore/syft/syft/cataloger/common"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/scope"
"io"
)
// Cataloger catalogs pkg.GemPkg Package Types defined in Bundler Gemfile.lock files.
@ -27,16 +28,16 @@ func New() *Cataloger {
}
// Name returns a string that uniquely describes this cataloger.
func (a *Cataloger) Name() string {
func (c *Cataloger) Name() string {
return "bundler-cataloger"
}
// SelectFiles returns a set of discovered Gemfile.lock files from the user content source.
func (a *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference {
return a.cataloger.SelectFiles(resolver)
func (c *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference {
return c.cataloger.SelectFiles(resolver)
}
// Catalog returns the Packages indexed from all Gemfile.lock files discovered.
func (a *Cataloger) Catalog(contents map[file.Reference]string) ([]pkg.Package, error) {
return a.cataloger.Catalog(contents, a.Name())
func (c *Cataloger) Catalog(contents map[file.Reference]io.Reader) ([]pkg.Package, error) {
return c.cataloger.Catalog(contents)
}

View File

@ -59,18 +59,20 @@ func Catalog(resolver scope.Resolver, catalogers ...Cataloger) (*pkg.Catalog, er
// perform analysis, accumulating errors for each failed analysis
var errs error
for _, a := range catalogers {
for _, c := range catalogers {
// TODO: check for multiple rounds of analyses by Iterate error
packages, err := a.Catalog(contents)
packages, err := c.Catalog(contents)
if err != nil {
errs = multierror.Append(errs, err)
continue
}
log.Debugf("cataloger '%s' discovered '%d' packages", a.Name(), len(packages))
log.Debugf("cataloger '%s' discovered '%d' packages", c.Name(), len(packages))
packagesDiscovered.N += int64(len(packages))
catalogerName := c.Name()
for _, p := range packages {
p.FoundBy = catalogerName
catalog.Add(p)
}
}

View File

@ -17,6 +17,7 @@ import (
"github.com/anchore/syft/syft/cataloger/rpmdb"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/scope"
"io"
)
// Cataloger describes behavior for an object to participate in parsing container image or file system
@ -28,9 +29,8 @@ type Cataloger interface {
// SelectFiles discovers and returns specific files that the cataloger would like to inspect the contents of.
SelectFiles(scope.FileResolver) []file.Reference
// Catalog is given the file contents and should return any discovered Packages after analyzing the contents.
Catalog(map[file.Reference]string) ([]pkg.Package, error)
Catalog(map[file.Reference]io.Reader) ([]pkg.Package, error)
// TODO: add "IterationNeeded" error to indicate to the driver to continue with another Select/Catalog pass
// TODO: we should consider refactoring to return a set of io.Readers instead of the full contents themselves (allow for optional buffering).
}
// All returns a slice of all locally defined catalogers (defined in child packages).

View File

@ -4,12 +4,11 @@ Package common provides generic utilities used by multiple catalogers.
package common
import (
"strings"
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/scope"
"io"
)
// GenericCataloger implements the Catalog interface and is responsible for dispatching the proper parser function for
@ -73,7 +72,7 @@ func (a *GenericCataloger) SelectFiles(resolver scope.FileResolver) []file.Refer
}
// Catalog takes a set of file contents and uses any configured parser functions to resolve and return discovered packages
func (a *GenericCataloger) Catalog(contents map[file.Reference]string, upstreamMatcher string) ([]pkg.Package, error) {
func (a *GenericCataloger) Catalog(contents map[file.Reference]io.Reader) ([]pkg.Package, error) {
defer a.clear()
packages := make([]pkg.Package, 0)
@ -81,19 +80,18 @@ func (a *GenericCataloger) Catalog(contents map[file.Reference]string, upstreamM
for reference, parser := range a.parsers {
content, ok := contents[reference]
if !ok {
log.Errorf("cataloger '%s' missing file content: %+v", upstreamMatcher, reference)
log.Errorf("cataloger missing file content: %+v", reference)
continue
}
entries, err := parser(string(reference.Path), strings.NewReader(content))
entries, err := parser(string(reference.Path), content)
if err != nil {
// TODO: should we fail? or only log?
log.Errorf("cataloger '%s' failed to parse entries (reference=%+v): %w", upstreamMatcher, reference, err)
log.Errorf("cataloger failed to parse entries (reference=%+v): %w", reference, err)
continue
}
for _, entry := range entries {
entry.FoundBy = upstreamMatcher
entry.Source = []file.Reference{reference}
packages = append(packages, entry)

View File

@ -79,17 +79,15 @@ func TestGenericCataloger(t *testing.T) {
selectionByPath[string(s.Path)] = s
}
upstream := "some-other-cataloger"
expectedPkgs := make(map[file.Reference]pkg.Package)
for path, ref := range selectionByPath {
expectedPkgs[ref] = pkg.Package{
FoundBy: upstream,
Source: []file.Reference{ref},
Name: fmt.Sprintf("%s file contents!", path),
Source: []file.Reference{ref},
Name: fmt.Sprintf("%s file contents!", path),
}
}
actualPkgs, err := cataloger.Catalog(resolver.contents, upstream)
actualPkgs, err := cataloger.Catalog(resolver.contents)
if err != nil {
t.Fatalf("cataloger catalog action failed: %+v", err)
}

View File

@ -8,6 +8,7 @@ import (
"github.com/anchore/syft/syft/cataloger/common"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/scope"
"io"
)
// Cataloger catalogs pkg.DebPkg Package Types defined in DPKG status files.
@ -27,16 +28,16 @@ func New() *Cataloger {
}
// Name returns a string that uniquely describes this cataloger.
func (a *Cataloger) Name() string {
func (c *Cataloger) Name() string {
return "dpkg-cataloger"
}
// SelectFiles returns a set of discovered DPKG status files from the user content source.
func (a *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference {
return a.cataloger.SelectFiles(resolver)
func (c *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference {
return c.cataloger.SelectFiles(resolver)
}
// Catalog returns the Packages indexed from all DPKG status files discovered.
func (a *Cataloger) Catalog(contents map[file.Reference]string) ([]pkg.Package, error) {
return a.cataloger.Catalog(contents, a.Name())
func (c *Cataloger) Catalog(contents map[file.Reference]io.Reader) ([]pkg.Package, error) {
return c.cataloger.Catalog(contents)
}

View File

@ -8,6 +8,7 @@ import (
"github.com/anchore/syft/syft/cataloger/common"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/scope"
"io"
)
// Cataloger catalogs pkg.GoModulePkg Package Types defined in go.mod files.
@ -27,16 +28,16 @@ func New() *Cataloger {
}
// Name returns a string that uniquely describes this cataloger.
func (a *Cataloger) Name() string {
func (c *Cataloger) Name() string {
return "go-cataloger"
}
// SelectFiles returns a set of discovered go.mod files from the user content source.
func (a *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference {
return a.cataloger.SelectFiles(resolver)
func (c *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference {
return c.cataloger.SelectFiles(resolver)
}
// Catalog returns the Packages indexed from all go.mod files discovered.
func (a *Cataloger) Catalog(contents map[file.Reference]string) ([]pkg.Package, error) {
return a.cataloger.Catalog(contents, a.Name())
func (c *Cataloger) Catalog(contents map[file.Reference]io.Reader) ([]pkg.Package, error) {
return c.cataloger.Catalog(contents)
}

View File

@ -8,6 +8,7 @@ import (
"github.com/anchore/syft/syft/cataloger/common"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/scope"
"io"
)
// Cataloger catalogs pkg.JavaPkg and pkg.JenkinsPluginPkg Package Types defined in java archive files.
@ -28,16 +29,16 @@ func New() *Cataloger {
}
// Name returns a string that uniquely describes this cataloger.
func (a *Cataloger) Name() string {
func (c *Cataloger) Name() string {
return "java-cataloger"
}
// SelectFiles returns a set of discovered Java archive files from the user content source.
func (a *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference {
return a.cataloger.SelectFiles(resolver)
func (c *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference {
return c.cataloger.SelectFiles(resolver)
}
// Catalog returns the Packages indexed from all Java archive files discovered.
func (a *Cataloger) Catalog(contents map[file.Reference]string) ([]pkg.Package, error) {
return a.cataloger.Catalog(contents, a.Name())
func (c *Cataloger) Catalog(contents map[file.Reference]io.Reader) ([]pkg.Package, error) {
return c.cataloger.Catalog(contents)
}

View File

@ -8,6 +8,7 @@ import (
"github.com/anchore/syft/syft/cataloger/common"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/scope"
"io"
)
// Cataloger catalogs pkg.YarnPkg and pkg.NpmPkg Package Types defined in package-lock.json and yarn.lock files.
@ -28,16 +29,16 @@ func New() *Cataloger {
}
// Name returns a string that uniquely describes this cataloger.
func (a *Cataloger) Name() string {
func (c *Cataloger) Name() string {
return "javascript-cataloger"
}
// SelectFiles returns a set of discovered Javascript ecosystem files from the user content source.
func (a *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference {
return a.cataloger.SelectFiles(resolver)
func (c *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference {
return c.cataloger.SelectFiles(resolver)
}
// Catalog returns the Packages indexed from all Javascript ecosystem files discovered.
func (a *Cataloger) Catalog(contents map[file.Reference]string) ([]pkg.Package, error) {
return a.cataloger.Catalog(contents, a.Name())
func (c *Cataloger) Catalog(contents map[file.Reference]io.Reader) ([]pkg.Package, error) {
return c.cataloger.Catalog(contents)
}

View File

@ -8,6 +8,7 @@ import (
"github.com/anchore/syft/syft/cataloger/common"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/scope"
"io"
)
// Cataloger catalogs pkg.WheelPkg, pkg.EggPkg, and pkg.PythonRequirementsPkg Package Types defined in Python ecosystem files.
@ -31,16 +32,16 @@ func New() *Cataloger {
}
// Name returns a string that uniquely describes this cataloger.
func (a *Cataloger) Name() string {
func (c *Cataloger) Name() string {
return "python-cataloger"
}
// SelectFiles returns a set of discovered Python ecosystem files from the user content source.
func (a *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference {
return a.cataloger.SelectFiles(resolver)
func (c *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference {
return c.cataloger.SelectFiles(resolver)
}
// Catalog returns the Packages indexed from all Python ecosystem files discovered.
func (a *Cataloger) Catalog(contents map[file.Reference]string) ([]pkg.Package, error) {
return a.cataloger.Catalog(contents, a.Name())
func (c *Cataloger) Catalog(contents map[file.Reference]io.Reader) ([]pkg.Package, error) {
return c.cataloger.Catalog(contents)
}

View File

@ -8,6 +8,7 @@ import (
"github.com/anchore/syft/syft/cataloger/common"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/scope"
"io"
)
// Cataloger catalogs pkg.RpmPkg Package Types defined in RPM DB files.
@ -27,16 +28,16 @@ func New() *Cataloger {
}
// Name returns a string that uniquely describes this cataloger.
func (a *Cataloger) Name() string {
func (c *Cataloger) Name() string {
return "rpmdb-cataloger"
}
// SelectFiles returns a set of discovered RPM DB files from the user content source.
func (a *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference {
return a.cataloger.SelectFiles(resolver)
func (c *Cataloger) SelectFiles(resolver scope.FileResolver) []file.Reference {
return c.cataloger.SelectFiles(resolver)
}
// Catalog returns the Packages indexed from all RPM DB files discovered.
func (a *Cataloger) Catalog(contents map[file.Reference]string) ([]pkg.Package, error) {
return a.cataloger.Catalog(contents, a.Name())
func (c *Cataloger) Catalog(contents map[file.Reference]io.Reader) ([]pkg.Package, error) {
return c.cataloger.Catalog(contents)
}

View File

@ -1,6 +1,7 @@
package distro
import (
"io/ioutil"
"regexp"
"strings"
@ -10,6 +11,7 @@ import (
)
// returns a distro or nil
// TODO: refactor to io.Reader
type parseFunc func(string) *Distro
type parseEntry struct {
@ -52,8 +54,8 @@ identifyLoop:
}
for _, ref := range refs {
contents, err := resolver.MultipleFileContentsByRef(ref)
content, ok := contents[ref]
contentReaders, err := resolver.MultipleFileContentsByRef(ref)
contentReader, ok := contentReaders[ref]
if !ok {
log.Infof("no content present for ref: %s", ref)
@ -65,6 +67,14 @@ identifyLoop:
continue
}
bytes, err := ioutil.ReadAll(contentReader)
if err != nil {
log.Debugf("unable to read contents from %s: %s", entry.path, err)
continue
}
content := string(bytes)
if content == "" {
log.Debugf("no contents in file, skipping: %s", entry.path)
continue

View File

@ -2,6 +2,7 @@ package scope
import (
"fmt"
"io"
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/stereoscope/pkg/image"
@ -16,7 +17,7 @@ type Resolver interface {
// ContentResolver knows how to get file content for given file.References
type ContentResolver interface {
MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]string, error)
MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]io.Reader, error)
}
// FileResolver knows how to get file.References for given string paths and globs

View File

@ -3,6 +3,7 @@ package resolvers
import (
"archive/tar"
"fmt"
"io"
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/stereoscope/pkg/image"
@ -111,6 +112,6 @@ func (r *AllLayersResolver) FilesByGlob(patterns ...string) ([]file.Reference, e
// MultipleFileContentsByRef returns the file contents for all file.References relative to the image. Note that a
// file.Reference is a path relative to a particular layer.
func (r *AllLayersResolver) MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]string, error) {
func (r *AllLayersResolver) MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]io.Reader, error) {
return r.img.MultipleFileContentsByRef(f...)
}

View File

@ -2,13 +2,12 @@ package resolvers
import (
"fmt"
"io/ioutil"
"os"
"path"
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/syft/internal/log"
"github.com/bmatcuk/doublestar"
"io"
"os"
"path"
)
// DirectoryResolver implements path and content access for the directory data source.
@ -40,15 +39,6 @@ func (s DirectoryResolver) FilesByPath(userPaths ...file.Path) ([]file.Reference
return references, nil
}
func fileContents(path file.Path) ([]byte, error) {
contents, err := ioutil.ReadFile(string(path))
if err != nil {
return nil, err
}
return contents, nil
}
// FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image.
func (s DirectoryResolver) FilesByGlob(patterns ...string) ([]file.Reference, error) {
result := make([]file.Reference, 0)
@ -76,15 +66,15 @@ func (s DirectoryResolver) FilesByGlob(patterns ...string) ([]file.Reference, er
}
// MultipleFileContentsByRef returns the file contents for all file.References relative a directory.
func (s DirectoryResolver) MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]string, error) {
refContents := make(map[file.Reference]string)
func (s DirectoryResolver) MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]io.Reader, error) {
refContents := make(map[file.Reference]io.Reader)
for _, fileRef := range f {
contents, err := fileContents(fileRef.Path)
targetFile, err := os.Open(string(fileRef.Path))
if err != nil {
return refContents, fmt.Errorf("could not read contents of file: %s", fileRef.Path)
return refContents, fmt.Errorf("could not open file=%q: %w", fileRef.Path, err)
}
refContents[fileRef] = string(contents)
refContents[fileRef] = targetFile
}
return refContents, nil
}

View File

@ -2,6 +2,7 @@ package resolvers
import (
"fmt"
"io"
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/stereoscope/pkg/image"
@ -75,6 +76,6 @@ func (r *ImageSquashResolver) FilesByGlob(patterns ...string) ([]file.Reference,
// MultipleFileContentsByRef returns the file contents for all file.References relative to the image. Note that a
// file.Reference is a path relative to a particular layer, in this case only from the squashed representation.
func (r *ImageSquashResolver) MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]string, error) {
func (r *ImageSquashResolver) MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]io.Reader, error) {
return r.img.MultipleFileContentsByRef(f...)
}