Merge pull request #307 from anchore/dup-readers-on-bulk-fetch

Duplicate reference readers for duplicate location resolutions
This commit is contained in:
Alex Goodman 2021-01-05 14:12:42 -05:00 committed by GitHub
commit 0030880e74
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 53 additions and 33 deletions

View File

@ -56,7 +56,7 @@ func (c *Cataloger) Catalog(resolver source.Resolver) ([]pkg.Package, error) {
return nil, fmt.Errorf("unable to find dpkg md5 contents: %w", err) return nil, fmt.Errorf("unable to find dpkg md5 contents: %w", err)
} }
copyrightContentsByName, copyrightRefsByName, err := fetchCopyrightContents(resolver, dbLocation, pkgs) copyrightContentsByName, copyrightLocationByName, err := fetchCopyrightContents(resolver, dbLocation, pkgs)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to find dpkg copyright contents: %w", err) return nil, fmt.Errorf("unable to find dpkg copyright contents: %w", err)
} }
@ -90,7 +90,7 @@ func (c *Cataloger) Catalog(resolver source.Resolver) ([]pkg.Package, error) {
p.Licenses = parseLicensesFromCopyright(copyrightReader) p.Licenses = parseLicensesFromCopyright(copyrightReader)
// keep a record of the file where this was discovered // keep a record of the file where this was discovered
if ref, ok := copyrightRefsByName[p.Name]; ok { if ref, ok := copyrightLocationByName[p.Name]; ok {
p.Locations = append(p.Locations, ref) p.Locations = append(p.Locations, ref)
} }
} }
@ -115,8 +115,8 @@ func fetchMd5Contents(resolver source.Resolver, dbLocation source.Location, pkgs
if md5SumLocation == nil { if md5SumLocation == nil {
// the most specific key did not work, fallback to just the name // the most specific key did not work, fallback to just the name
// look for /var/lib/dpkg/info/NAME.md5sums // look for /var/lib/dpkg/info/NAME.md5sums
name := p.Name name = p.Name
md5sumPath := path.Join(parentPath, "info", name+md5sumsExt) md5sumPath = path.Join(parentPath, "info", name+md5sumsExt)
md5SumLocation = resolver.RelativeFileByPath(dbLocation, md5sumPath) md5SumLocation = resolver.RelativeFileByPath(dbLocation, md5sumPath)
} }
// we should have at least one reference // we should have at least one reference
@ -134,14 +134,14 @@ func fetchMd5Contents(resolver source.Resolver, dbLocation source.Location, pkgs
// organize content results and refs by a combination of name and architecture // organize content results and refs by a combination of name and architecture
var contentsByName = make(map[string]io.Reader) var contentsByName = make(map[string]io.Reader)
var refsByName = make(map[string]source.Location) var locationByName = make(map[string]source.Location)
for location, contents := range md5ContentsByLocation { for location, contents := range md5ContentsByLocation {
name := nameByRef[location] name := nameByRef[location]
contentsByName[name] = contents contentsByName[name] = contents
refsByName[name] = location locationByName[name] = location
} }
return contentsByName, refsByName, nil return contentsByName, locationByName, nil
} }
func fetchCopyrightContents(resolver source.Resolver, dbLocation source.Location, pkgs []pkg.Package) (map[string]io.Reader, map[string]source.Location, error) { func fetchCopyrightContents(resolver source.Resolver, dbLocation source.Location, pkgs []pkg.Package) (map[string]io.Reader, map[string]source.Location, error) {

View File

@ -104,7 +104,7 @@ func TestCycloneDxImgsPresenter(t *testing.T) {
Name: "package1", Name: "package1",
Version: "1.0.1", Version: "1.0.1",
Locations: []source.Location{ Locations: []source.Location{
source.NewLocationFromImage(*ref1, img), source.NewLocationFromImage(string(ref1.RealPath), *ref1, img),
}, },
Type: pkg.RpmPkg, Type: pkg.RpmPkg,
FoundBy: "the-cataloger-1", FoundBy: "the-cataloger-1",
@ -114,7 +114,7 @@ func TestCycloneDxImgsPresenter(t *testing.T) {
Name: "package2", Name: "package2",
Version: "2.0.1", Version: "2.0.1",
Locations: []source.Location{ Locations: []source.Location{
source.NewLocationFromImage(*ref2, img), source.NewLocationFromImage(string(ref2.RealPath), *ref2, img),
}, },
Type: pkg.RpmPkg, Type: pkg.RpmPkg,
FoundBy: "the-cataloger-2", FoundBy: "the-cataloger-2",

View File

@ -116,7 +116,7 @@ func TestJsonImgsPresenter(t *testing.T) {
Name: "package-1", Name: "package-1",
Version: "1.0.1", Version: "1.0.1",
Locations: []source.Location{ Locations: []source.Location{
source.NewLocationFromImage(*ref1, img), source.NewLocationFromImage(string(ref1.RealPath), *ref1, img),
}, },
Type: pkg.PythonPkg, Type: pkg.PythonPkg,
FoundBy: "the-cataloger-1", FoundBy: "the-cataloger-1",
@ -136,7 +136,7 @@ func TestJsonImgsPresenter(t *testing.T) {
Name: "package-2", Name: "package-2",
Version: "2.0.1", Version: "2.0.1",
Locations: []source.Location{ Locations: []source.Location{
source.NewLocationFromImage(*ref2, img), source.NewLocationFromImage(string(ref2.RealPath), *ref2, img),
}, },
Type: pkg.DebPkg, Type: pkg.DebPkg,
FoundBy: "the-cataloger-2", FoundBy: "the-cataloger-2",

View File

@ -35,7 +35,7 @@ func TestTablePresenter(t *testing.T) {
Name: "package-1", Name: "package-1",
Version: "1.0.1", Version: "1.0.1",
Locations: []source.Location{ Locations: []source.Location{
source.NewLocationFromImage(*ref1, img), source.NewLocationFromImage(string(ref1.RealPath), *ref1, img),
}, },
Type: pkg.DebPkg, Type: pkg.DebPkg,
}) })
@ -43,7 +43,7 @@ func TestTablePresenter(t *testing.T) {
Name: "package-2", Name: "package-2",
Version: "2.0.1", Version: "2.0.1",
Locations: []source.Location{ Locations: []source.Location{
source.NewLocationFromImage(*ref2, img), source.NewLocationFromImage(string(ref2.RealPath), *ref2, img),
}, },
Type: pkg.DebPkg, Type: pkg.DebPkg,
}) })

View File

@ -80,7 +80,7 @@ func TestTextImgPresenter(t *testing.T) {
Name: "package-1", Name: "package-1",
Version: "1.0.1", Version: "1.0.1",
Locations: []source.Location{ Locations: []source.Location{
source.NewLocationFromImage(*ref1, img), source.NewLocationFromImage(string(ref1.RealPath), *ref1, img),
}, },
FoundBy: "dpkg", FoundBy: "dpkg",
Type: pkg.DebPkg, Type: pkg.DebPkg,
@ -89,7 +89,7 @@ func TestTextImgPresenter(t *testing.T) {
Name: "package-2", Name: "package-2",
Version: "2.0.1", Version: "2.0.1",
Locations: []source.Location{ Locations: []source.Location{
source.NewLocationFromImage(*ref2, img), source.NewLocationFromImage(string(ref2.RealPath), *ref2, img),
}, },
FoundBy: "dpkg", FoundBy: "dpkg",
Metadata: PackageInfo{Name: "package-2", Version: "1.0.2"}, Metadata: PackageInfo{Name: "package-2", Version: "1.0.2"},

View File

@ -2,8 +2,10 @@ package source
import ( import (
"archive/tar" "archive/tar"
"bytes"
"fmt" "fmt"
"io" "io"
"io/ioutil"
"github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/stereoscope/pkg/filetree" "github.com/anchore/stereoscope/pkg/filetree"
@ -112,7 +114,7 @@ func (r *AllLayersResolver) FilesByPath(paths ...string) ([]Location, error) {
return nil, err return nil, err
} }
for _, result := range results { for _, result := range results {
uniqueLocations = append(uniqueLocations, NewLocationFromImage(result, r.img)) uniqueLocations = append(uniqueLocations, NewLocationFromImage(path, result, r.img))
} }
} }
} }
@ -151,7 +153,7 @@ func (r *AllLayersResolver) FilesByGlob(patterns ...string) ([]Location, error)
return nil, err return nil, err
} }
for _, refResult := range refResults { for _, refResult := range refResults {
uniqueLocations = append(uniqueLocations, NewLocationFromImage(refResult, r.img)) uniqueLocations = append(uniqueLocations, NewLocationFromImage(string(result.MatchPath), refResult, r.img))
} }
} }
} }
@ -177,7 +179,7 @@ func (r *AllLayersResolver) RelativeFileByPath(location Location, path string) *
return nil return nil
} }
relativeLocation := NewLocationFromImage(*relativeRef, r.img) relativeLocation := NewLocationFromImage(path, *relativeRef, r.img)
return &relativeLocation return &relativeLocation
} }
@ -198,11 +200,11 @@ type multiContentFetcher func(refs ...file.Reference) (map[file.Reference]io.Rea
func mapLocationRefs(callback multiContentFetcher, locations []Location) (map[Location]io.ReadCloser, error) { func mapLocationRefs(callback multiContentFetcher, locations []Location) (map[Location]io.ReadCloser, error) {
var fileRefs = make([]file.Reference, len(locations)) var fileRefs = make([]file.Reference, len(locations))
var locationByRefs = make(map[file.Reference]Location) var locationByRefs = make(map[file.Reference][]Location)
var results = make(map[Location]io.ReadCloser) var results = make(map[Location]io.ReadCloser)
for i, location := range locations { for i, location := range locations {
locationByRefs[location.ref] = location locationByRefs[location.ref] = append(locationByRefs[location.ref], location)
fileRefs[i] = location.ref fileRefs[i] = location.ref
} }
@ -211,8 +213,26 @@ func mapLocationRefs(callback multiContentFetcher, locations []Location) (map[Lo
return nil, err return nil, err
} }
// TODO: this is not tested, we need a test case that covers a mapLocationRefs which has multiple Locations with the same reference in the request. The io.Reader should be copied.
for ref, content := range contentsByRef { for ref, content := range contentsByRef {
results[locationByRefs[ref]] = content mappedLocations := locationByRefs[ref]
switch {
case len(mappedLocations) > 1:
// TODO: fixme... this can lead to lots of unexpected memory usage in unusual circumstances (cache is not leveraged for large files).
// stereoscope wont duplicate content requests if the caller asks for the same file multiple times... thats up to the caller
contentsBytes, err := ioutil.ReadAll(content)
if err != nil {
return nil, fmt.Errorf("unable to read ref=%+v :%w", ref, err)
}
for _, loc := range mappedLocations {
results[loc] = ioutil.NopCloser(bytes.NewReader(contentsBytes))
}
case len(mappedLocations) == 1:
results[locationByRefs[ref][0]] = content
default:
return nil, fmt.Errorf("unexpected ref-location count=%d for ref=%v", len(mappedLocations), ref)
}
} }
return results, nil return results, nil
} }

View File

@ -113,8 +113,8 @@ func TestAllLayersResolver_FilesByPath(t *testing.T) {
for idx, actual := range refs { for idx, actual := range refs {
expected := c.resolutions[idx] expected := c.resolutions[idx]
if actual.Path != expected.path { if string(actual.ref.RealPath) != expected.path {
t.Errorf("bad resolve path: '%s'!='%s'", actual.Path, expected.path) t.Errorf("bad resolve path: '%s'!='%s'", string(actual.ref.RealPath), expected.path)
} }
entry, err := img.FileCatalog.Get(actual.ref) entry, err := img.FileCatalog.Get(actual.ref)
@ -217,8 +217,8 @@ func TestAllLayersResolver_FilesByGlob(t *testing.T) {
for idx, actual := range refs { for idx, actual := range refs {
expected := c.resolutions[idx] expected := c.resolutions[idx]
if actual.Path != expected.path { if string(actual.ref.RealPath) != expected.path {
t.Errorf("bad resolve path: '%s'!='%s'", actual.Path, expected.path) t.Errorf("bad resolve path: '%s'!='%s'", string(actual.ref.RealPath), expected.path)
} }
entry, err := img.FileCatalog.Get(actual.ref) entry, err := img.FileCatalog.Get(actual.ref)

View File

@ -66,7 +66,7 @@ func (r *ImageSquashResolver) FilesByPath(paths ...string) ([]Location, error) {
if resolvedRef != nil && !uniqueFileIDs.Contains(*resolvedRef) { if resolvedRef != nil && !uniqueFileIDs.Contains(*resolvedRef) {
uniqueFileIDs.Add(*resolvedRef) uniqueFileIDs.Add(*resolvedRef)
uniqueLocations = append(uniqueLocations, NewLocationFromImage(*resolvedRef, r.img)) uniqueLocations = append(uniqueLocations, NewLocationFromImage(path, *resolvedRef, r.img))
} }
} }

View File

@ -102,8 +102,8 @@ func TestImageSquashResolver_FilesByPath(t *testing.T) {
actual := refs[0] actual := refs[0]
if actual.Path != c.resolvePath { if string(actual.ref.RealPath) != c.resolvePath {
t.Errorf("bad resolve path: '%s'!='%s'", actual.Path, c.resolvePath) t.Errorf("bad resolve path: '%s'!='%s'", string(actual.ref.RealPath), c.resolvePath)
} }
entry, err := img.FileCatalog.Get(actual.ref) entry, err := img.FileCatalog.Get(actual.ref)
@ -204,8 +204,8 @@ func TestImageSquashResolver_FilesByGlob(t *testing.T) {
actual := refs[0] actual := refs[0]
if actual.Path != c.resolvePath { if string(actual.ref.RealPath) != c.resolvePath {
t.Errorf("bad resolve path: '%s'!='%s'", actual.Path, c.resolvePath) t.Errorf("bad resolve path: '%s'!='%s'", string(actual.ref.RealPath), c.resolvePath)
} }
entry, err := img.FileCatalog.Get(actual.ref) entry, err := img.FileCatalog.Get(actual.ref)

View File

@ -22,18 +22,18 @@ func NewLocation(path string) Location {
} }
// NewLocationFromImage creates a new Location representing the given path (extracted from the ref) relative to the given image. // NewLocationFromImage creates a new Location representing the given path (extracted from the ref) relative to the given image.
func NewLocationFromImage(ref file.Reference, img *image.Image) Location { func NewLocationFromImage(path string, ref file.Reference, img *image.Image) Location {
entry, err := img.FileCatalog.Get(ref) entry, err := img.FileCatalog.Get(ref)
if err != nil { if err != nil {
log.Warnf("unable to find file catalog entry for ref=%+v", ref) log.Warnf("unable to find file catalog entry for ref=%+v", ref)
return Location{ return Location{
Path: string(ref.RealPath), Path: path,
ref: ref, ref: ref,
} }
} }
return Location{ return Location{
Path: string(ref.RealPath), Path: path,
FileSystemID: entry.Layer.Metadata.Digest, FileSystemID: entry.Layer.Metadata.Digest,
ref: ref, ref: ref,
} }