From fc8b431ea62def8a5729439d34d1bd1a3cc2f2eb Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 5 Jan 2021 13:51:07 -0500 Subject: [PATCH] duplicate reference readers for duplicate location resolutions Signed-off-by: Alex Goodman --- syft/cataloger/deb/cataloger.go | 14 +++++----- syft/presenter/cyclonedx/presenter_test.go | 4 +-- syft/presenter/json/presenter_test.go | 4 +-- syft/presenter/table/presenter_test.go | 4 +-- syft/presenter/text/presenter_test.go | 4 +-- syft/source/all_layers_resolver.go | 32 ++++++++++++++++++---- syft/source/all_layers_resolver_test.go | 8 +++--- syft/source/image_squash_resolver.go | 2 +- syft/source/image_squash_resolver_test.go | 8 +++--- syft/source/location.go | 6 ++-- 10 files changed, 53 insertions(+), 33 deletions(-) diff --git a/syft/cataloger/deb/cataloger.go b/syft/cataloger/deb/cataloger.go index 2c1bf1f7f..3efc3979f 100644 --- a/syft/cataloger/deb/cataloger.go +++ b/syft/cataloger/deb/cataloger.go @@ -56,7 +56,7 @@ func (c *Cataloger) Catalog(resolver source.Resolver) ([]pkg.Package, error) { return nil, fmt.Errorf("unable to find dpkg md5 contents: %w", err) } - copyrightContentsByName, copyrightRefsByName, err := fetchCopyrightContents(resolver, dbLocation, pkgs) + copyrightContentsByName, copyrightLocationByName, err := fetchCopyrightContents(resolver, dbLocation, pkgs) if err != nil { return nil, fmt.Errorf("unable to find dpkg copyright contents: %w", err) } @@ -90,7 +90,7 @@ func (c *Cataloger) Catalog(resolver source.Resolver) ([]pkg.Package, error) { p.Licenses = parseLicensesFromCopyright(copyrightReader) // keep a record of the file where this was discovered - if ref, ok := copyrightRefsByName[p.Name]; ok { + if ref, ok := copyrightLocationByName[p.Name]; ok { p.Locations = append(p.Locations, ref) } } @@ -115,8 +115,8 @@ func fetchMd5Contents(resolver source.Resolver, dbLocation source.Location, pkgs if md5SumLocation == nil { // the most specific key did not work, fallback to just the name // look for /var/lib/dpkg/info/NAME.md5sums - name := p.Name - md5sumPath := path.Join(parentPath, "info", name+md5sumsExt) + name = p.Name + md5sumPath = path.Join(parentPath, "info", name+md5sumsExt) md5SumLocation = resolver.RelativeFileByPath(dbLocation, md5sumPath) } // we should have at least one reference @@ -134,14 +134,14 @@ func fetchMd5Contents(resolver source.Resolver, dbLocation source.Location, pkgs // organize content results and refs by a combination of name and architecture var contentsByName = make(map[string]io.Reader) - var refsByName = make(map[string]source.Location) + var locationByName = make(map[string]source.Location) for location, contents := range md5ContentsByLocation { name := nameByRef[location] contentsByName[name] = contents - refsByName[name] = location + locationByName[name] = location } - return contentsByName, refsByName, nil + return contentsByName, locationByName, nil } func fetchCopyrightContents(resolver source.Resolver, dbLocation source.Location, pkgs []pkg.Package) (map[string]io.Reader, map[string]source.Location, error) { diff --git a/syft/presenter/cyclonedx/presenter_test.go b/syft/presenter/cyclonedx/presenter_test.go index c7909e22f..38a070697 100644 --- a/syft/presenter/cyclonedx/presenter_test.go +++ b/syft/presenter/cyclonedx/presenter_test.go @@ -104,7 +104,7 @@ func TestCycloneDxImgsPresenter(t *testing.T) { Name: "package1", Version: "1.0.1", Locations: []source.Location{ - source.NewLocationFromImage(*ref1, img), + source.NewLocationFromImage(string(ref1.RealPath), *ref1, img), }, Type: pkg.RpmPkg, FoundBy: "the-cataloger-1", @@ -114,7 +114,7 @@ func TestCycloneDxImgsPresenter(t *testing.T) { Name: "package2", Version: "2.0.1", Locations: []source.Location{ - source.NewLocationFromImage(*ref2, img), + source.NewLocationFromImage(string(ref2.RealPath), *ref2, img), }, Type: pkg.RpmPkg, FoundBy: "the-cataloger-2", diff --git a/syft/presenter/json/presenter_test.go b/syft/presenter/json/presenter_test.go index 585bad978..c23e70372 100644 --- a/syft/presenter/json/presenter_test.go +++ b/syft/presenter/json/presenter_test.go @@ -116,7 +116,7 @@ func TestJsonImgsPresenter(t *testing.T) { Name: "package-1", Version: "1.0.1", Locations: []source.Location{ - source.NewLocationFromImage(*ref1, img), + source.NewLocationFromImage(string(ref1.RealPath), *ref1, img), }, Type: pkg.PythonPkg, FoundBy: "the-cataloger-1", @@ -136,7 +136,7 @@ func TestJsonImgsPresenter(t *testing.T) { Name: "package-2", Version: "2.0.1", Locations: []source.Location{ - source.NewLocationFromImage(*ref2, img), + source.NewLocationFromImage(string(ref2.RealPath), *ref2, img), }, Type: pkg.DebPkg, FoundBy: "the-cataloger-2", diff --git a/syft/presenter/table/presenter_test.go b/syft/presenter/table/presenter_test.go index 481d4261c..fb120494b 100644 --- a/syft/presenter/table/presenter_test.go +++ b/syft/presenter/table/presenter_test.go @@ -35,7 +35,7 @@ func TestTablePresenter(t *testing.T) { Name: "package-1", Version: "1.0.1", Locations: []source.Location{ - source.NewLocationFromImage(*ref1, img), + source.NewLocationFromImage(string(ref1.RealPath), *ref1, img), }, Type: pkg.DebPkg, }) @@ -43,7 +43,7 @@ func TestTablePresenter(t *testing.T) { Name: "package-2", Version: "2.0.1", Locations: []source.Location{ - source.NewLocationFromImage(*ref2, img), + source.NewLocationFromImage(string(ref2.RealPath), *ref2, img), }, Type: pkg.DebPkg, }) diff --git a/syft/presenter/text/presenter_test.go b/syft/presenter/text/presenter_test.go index cf92fd3b3..ccac4b3d9 100644 --- a/syft/presenter/text/presenter_test.go +++ b/syft/presenter/text/presenter_test.go @@ -80,7 +80,7 @@ func TestTextImgPresenter(t *testing.T) { Name: "package-1", Version: "1.0.1", Locations: []source.Location{ - source.NewLocationFromImage(*ref1, img), + source.NewLocationFromImage(string(ref1.RealPath), *ref1, img), }, FoundBy: "dpkg", Type: pkg.DebPkg, @@ -89,7 +89,7 @@ func TestTextImgPresenter(t *testing.T) { Name: "package-2", Version: "2.0.1", Locations: []source.Location{ - source.NewLocationFromImage(*ref2, img), + source.NewLocationFromImage(string(ref2.RealPath), *ref2, img), }, FoundBy: "dpkg", Metadata: PackageInfo{Name: "package-2", Version: "1.0.2"}, diff --git a/syft/source/all_layers_resolver.go b/syft/source/all_layers_resolver.go index f95472a43..f2f248ee2 100644 --- a/syft/source/all_layers_resolver.go +++ b/syft/source/all_layers_resolver.go @@ -2,8 +2,10 @@ package source import ( "archive/tar" + "bytes" "fmt" "io" + "io/ioutil" "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/filetree" @@ -112,7 +114,7 @@ func (r *AllLayersResolver) FilesByPath(paths ...string) ([]Location, error) { return nil, err } for _, result := range results { - uniqueLocations = append(uniqueLocations, NewLocationFromImage(result, r.img)) + uniqueLocations = append(uniqueLocations, NewLocationFromImage(path, result, r.img)) } } } @@ -151,7 +153,7 @@ func (r *AllLayersResolver) FilesByGlob(patterns ...string) ([]Location, error) return nil, err } for _, refResult := range refResults { - uniqueLocations = append(uniqueLocations, NewLocationFromImage(refResult, r.img)) + uniqueLocations = append(uniqueLocations, NewLocationFromImage(string(result.MatchPath), refResult, r.img)) } } } @@ -177,7 +179,7 @@ func (r *AllLayersResolver) RelativeFileByPath(location Location, path string) * return nil } - relativeLocation := NewLocationFromImage(*relativeRef, r.img) + relativeLocation := NewLocationFromImage(path, *relativeRef, r.img) return &relativeLocation } @@ -198,11 +200,11 @@ type multiContentFetcher func(refs ...file.Reference) (map[file.Reference]io.Rea func mapLocationRefs(callback multiContentFetcher, locations []Location) (map[Location]io.ReadCloser, error) { var fileRefs = make([]file.Reference, len(locations)) - var locationByRefs = make(map[file.Reference]Location) + var locationByRefs = make(map[file.Reference][]Location) var results = make(map[Location]io.ReadCloser) for i, location := range locations { - locationByRefs[location.ref] = location + locationByRefs[location.ref] = append(locationByRefs[location.ref], location) fileRefs[i] = location.ref } @@ -211,8 +213,26 @@ func mapLocationRefs(callback multiContentFetcher, locations []Location) (map[Lo return nil, err } + // TODO: this is not tested, we need a test case that covers a mapLocationRefs which has multiple Locations with the same reference in the request. The io.Reader should be copied. for ref, content := range contentsByRef { - results[locationByRefs[ref]] = content + mappedLocations := locationByRefs[ref] + switch { + case len(mappedLocations) > 1: + // TODO: fixme... this can lead to lots of unexpected memory usage in unusual circumstances (cache is not leveraged for large files). + // stereoscope wont duplicate content requests if the caller asks for the same file multiple times... thats up to the caller + contentsBytes, err := ioutil.ReadAll(content) + if err != nil { + return nil, fmt.Errorf("unable to read ref=%+v :%w", ref, err) + } + for _, loc := range mappedLocations { + results[loc] = ioutil.NopCloser(bytes.NewReader(contentsBytes)) + } + + case len(mappedLocations) == 1: + results[locationByRefs[ref][0]] = content + default: + return nil, fmt.Errorf("unexpected ref-location count=%d for ref=%v", len(mappedLocations), ref) + } } return results, nil } diff --git a/syft/source/all_layers_resolver_test.go b/syft/source/all_layers_resolver_test.go index 5aa3c8390..d180ea3de 100644 --- a/syft/source/all_layers_resolver_test.go +++ b/syft/source/all_layers_resolver_test.go @@ -113,8 +113,8 @@ func TestAllLayersResolver_FilesByPath(t *testing.T) { for idx, actual := range refs { expected := c.resolutions[idx] - if actual.Path != expected.path { - t.Errorf("bad resolve path: '%s'!='%s'", actual.Path, expected.path) + if string(actual.ref.RealPath) != expected.path { + t.Errorf("bad resolve path: '%s'!='%s'", string(actual.ref.RealPath), expected.path) } entry, err := img.FileCatalog.Get(actual.ref) @@ -217,8 +217,8 @@ func TestAllLayersResolver_FilesByGlob(t *testing.T) { for idx, actual := range refs { expected := c.resolutions[idx] - if actual.Path != expected.path { - t.Errorf("bad resolve path: '%s'!='%s'", actual.Path, expected.path) + if string(actual.ref.RealPath) != expected.path { + t.Errorf("bad resolve path: '%s'!='%s'", string(actual.ref.RealPath), expected.path) } entry, err := img.FileCatalog.Get(actual.ref) diff --git a/syft/source/image_squash_resolver.go b/syft/source/image_squash_resolver.go index 30a426deb..137d498b5 100644 --- a/syft/source/image_squash_resolver.go +++ b/syft/source/image_squash_resolver.go @@ -66,7 +66,7 @@ func (r *ImageSquashResolver) FilesByPath(paths ...string) ([]Location, error) { if resolvedRef != nil && !uniqueFileIDs.Contains(*resolvedRef) { uniqueFileIDs.Add(*resolvedRef) - uniqueLocations = append(uniqueLocations, NewLocationFromImage(*resolvedRef, r.img)) + uniqueLocations = append(uniqueLocations, NewLocationFromImage(path, *resolvedRef, r.img)) } } diff --git a/syft/source/image_squash_resolver_test.go b/syft/source/image_squash_resolver_test.go index a78150f16..93b07caa5 100644 --- a/syft/source/image_squash_resolver_test.go +++ b/syft/source/image_squash_resolver_test.go @@ -102,8 +102,8 @@ func TestImageSquashResolver_FilesByPath(t *testing.T) { actual := refs[0] - if actual.Path != c.resolvePath { - t.Errorf("bad resolve path: '%s'!='%s'", actual.Path, c.resolvePath) + if string(actual.ref.RealPath) != c.resolvePath { + t.Errorf("bad resolve path: '%s'!='%s'", string(actual.ref.RealPath), c.resolvePath) } entry, err := img.FileCatalog.Get(actual.ref) @@ -204,8 +204,8 @@ func TestImageSquashResolver_FilesByGlob(t *testing.T) { actual := refs[0] - if actual.Path != c.resolvePath { - t.Errorf("bad resolve path: '%s'!='%s'", actual.Path, c.resolvePath) + if string(actual.ref.RealPath) != c.resolvePath { + t.Errorf("bad resolve path: '%s'!='%s'", string(actual.ref.RealPath), c.resolvePath) } entry, err := img.FileCatalog.Get(actual.ref) diff --git a/syft/source/location.go b/syft/source/location.go index b4aae7500..0bf8e7978 100644 --- a/syft/source/location.go +++ b/syft/source/location.go @@ -22,18 +22,18 @@ func NewLocation(path string) Location { } // NewLocationFromImage creates a new Location representing the given path (extracted from the ref) relative to the given image. -func NewLocationFromImage(ref file.Reference, img *image.Image) Location { +func NewLocationFromImage(path string, ref file.Reference, img *image.Image) Location { entry, err := img.FileCatalog.Get(ref) if err != nil { log.Warnf("unable to find file catalog entry for ref=%+v", ref) return Location{ - Path: string(ref.RealPath), + Path: path, ref: ref, } } return Location{ - Path: string(ref.RealPath), + Path: path, FileSystemID: entry.Layer.Metadata.Digest, ref: ref, }