add content requested and refactor python cataloger to use it

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2020-12-11 17:10:45 -05:00
parent 82c8a8e17b
commit e4a3e433b6
No known key found for this signature in database
GPG Key ID: 5CB45AE22BAB7EA7
4 changed files with 132 additions and 73 deletions

View File

@ -18,6 +18,12 @@ const (
wheelMetadataGlob = "**/*dist-info/METADATA"
)
type pythonPackageData struct {
Metadata source.FileData
FileRecord *source.FileData
TopPackage *source.FileData
}
type PackageCataloger struct{}
// NewPythonPackageCataloger returns a new cataloger for python packages within egg or wheel installation directories.
@ -32,55 +38,43 @@ func (c *PackageCataloger) Name() string {
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing python egg and wheel installations.
func (c *PackageCataloger) Catalog(resolver source.Resolver) ([]pkg.Package, error) {
// nolint:prealloc
var fileMatches []source.Location
for _, glob := range []string{eggMetadataGlob, wheelMetadataGlob} {
matches, err := resolver.FilesByGlob(glob)
entries, err := c.getPythonPackageEntries(resolver)
if err != nil {
return nil, fmt.Errorf("failed to find files by glob: %s", glob)
}
fileMatches = append(fileMatches, matches...)
}
request, entries := filesOfInterest(resolver, fileMatches)
if err := getContents(resolver, request); err != nil {
return nil, err
}
var pkgs []pkg.Package
var packages []pkg.Package
for _, entry := range entries {
p, err := c.catalogEggOrWheel(entry)
if err != nil {
return nil, fmt.Errorf("unable to catalog python package=%+v: %w", entry.Metadata.Location.Path, err)
}
if p != nil {
pkgs = append(pkgs, *p)
packages = append(packages, *p)
}
}
return pkgs, nil
return packages, nil
}
type FileData struct {
Location source.Location
Contents string
func (c *PackageCataloger) getPythonPackageEntries(resolver source.Resolver) ([]*pythonPackageData, error) {
var metadataLocations []source.Location
// find all primary record paths
matches, err := resolver.FilesByGlob(eggMetadataGlob, wheelMetadataGlob)
if err != nil {
return nil, fmt.Errorf("failed to find files by glob: %w", err)
}
metadataLocations = append(metadataLocations, matches...)
type pythonEntry struct {
Metadata FileData
FileRecord *FileData
TopPackage *FileData
}
func filesOfInterest(resolver source.FileResolver, metadataLocations []source.Location) (map[source.Location]*FileData, []*pythonEntry) {
var request = make(map[source.Location]*FileData)
var entries []*pythonEntry
for _, metadataLocation := range metadataLocations {
// for every primary record path, craft all secondary record paths and build a request object to gather all file contents for each record
var requester = source.NewContentRequester()
var entries = make([]*pythonPackageData, len(metadataLocations))
for i, metadataLocation := range metadataLocations {
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
// to reconcile the RECORD path to the same layer (or the next adjacent lower layer).
// to reconcile the RECORD path to the same layer (or the next adjacent lower layer). The same is true with
// the top_level.txt file.
// lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure)
recordPath := filepath.Join(filepath.Dir(metadataLocation.Path), "RECORD")
@ -91,52 +85,39 @@ func filesOfInterest(resolver source.FileResolver, metadataLocations []source.Lo
topLevelPath := filepath.Join(parentDir, "top_level.txt")
topLevelLocation := resolver.RelativeFileByPath(metadataLocation, topLevelPath)
entry := &pythonEntry{
Metadata: FileData{
// build an entry that will later be populated with contents when the request is executed
entry := &pythonPackageData{
Metadata: source.FileData{
Location: metadataLocation,
},
}
request[entry.Metadata.Location] = &entry.Metadata
requester.Add(&entry.Metadata)
if recordLocation != nil {
entry.FileRecord = &FileData{
entry.FileRecord = &source.FileData{
Location: *recordLocation,
}
request[entry.FileRecord.Location] = entry.FileRecord
requester.Add(entry.FileRecord)
}
if topLevelLocation != nil {
entry.TopPackage = &FileData{
entry.TopPackage = &source.FileData{
Location: *topLevelLocation,
}
request[entry.TopPackage.Location] = entry.TopPackage
}
entries = append(entries, entry)
}
return request, entries
requester.Add(entry.TopPackage)
}
func getContents(resolver source.ContentResolver, request map[source.Location]*FileData) error {
var locations []source.Location
for l := range request {
locations = append(locations, l)
// keep the entry for processing later
entries[i] = entry
}
response, err := resolver.MultipleFileContentsByLocation(locations)
if err != nil {
return err
}
for l, contents := range response {
request[l].Contents = contents
}
return nil
// return the set of entries and execute the request for fetching contents
return entries, requester.Execute(resolver)
}
// catalogEggOrWheel takes the primary metadata file reference and returns the python package it represents.
func (c *PackageCataloger) catalogEggOrWheel(entry *pythonEntry) (*pkg.Package, error) {
func (c *PackageCataloger) catalogEggOrWheel(entry *pythonPackageData) (*pkg.Package, error) {
metadata, sources, err := c.assembleEggOrWheelMetadata(entry)
if err != nil {
return nil, err
@ -161,7 +142,7 @@ func (c *PackageCataloger) catalogEggOrWheel(entry *pythonEntry) (*pkg.Package,
}
// assembleEggOrWheelMetadata discovers and accumulates python package metadata from multiple file sources and returns a single metadata object as well as a list of files where the metadata was derived from.
func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *pythonEntry) (*pkg.PythonPackageMetadata, []source.Location, error) {
func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *pythonPackageData) (*pkg.PythonPackageMetadata, []source.Location, error) {
var sources = []source.Location{entry.Metadata.Location}
metadata, err := parseWheelOrEggMetadata(entry.Metadata.Location.Path, strings.NewReader(entry.Metadata.Contents))
@ -189,7 +170,7 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *pythonEntry) (*pkg.
}
// fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained.
func (c *PackageCataloger) fetchRecordFiles(entry *FileData) (files []pkg.PythonFileRecord, sources []source.Location, err error) {
func (c *PackageCataloger) fetchRecordFiles(entry *source.FileData) (files []pkg.PythonFileRecord, sources []source.Location, err error) {
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
// to reconcile the RECORD path to the same layer (or the next adjacent lower layer).
@ -209,9 +190,8 @@ func (c *PackageCataloger) fetchRecordFiles(entry *FileData) (files []pkg.Python
}
// fetchTopLevelPackages finds a corresponding top_level.txt file for the given python package metadata file and returns the set of package names contained.
func (c *PackageCataloger) fetchTopLevelPackages(entry *FileData) (pkgs []string, sources []source.Location, err error) {
func (c *PackageCataloger) fetchTopLevelPackages(entry *source.FileData) (pkgs []string, sources []source.Location, err error) {
if entry == nil {
// TODO
log.Warnf("missing python package top_level.txt (package=!!)")
return nil, nil, nil
}

View File

@ -8,12 +8,15 @@ import (
"strings"
"testing"
"github.com/anchore/syft/internal/file"
"github.com/anchore/syft/syft/source"
"github.com/anchore/syft/syft/pkg"
"github.com/go-test/deep"
)
// TODO: make this generic (based on maps of source.FileData) and make a generic mock to move to the source pkg
type pythonTestResolverMock struct {
metadataReader io.Reader
recordReader io.Reader
@ -68,21 +71,21 @@ func newTestResolver(metaPath, recordPath, topPath string) *pythonTestResolverMo
}
}
func (r *pythonTestResolverMock) FileContentsByLocation(ref source.Location) (string, error) {
func (r *pythonTestResolverMock) FileContentsByLocation(location source.Location) (string, error) {
switch {
case r.topLevelRef != nil && ref.Path == r.topLevelRef.Path:
case r.topLevelRef != nil && location.Path == r.topLevelRef.Path:
b, err := ioutil.ReadAll(r.topLevelReader)
if err != nil {
return "", err
}
return string(b), nil
case ref.Path == r.metadataRef.Path:
case location.Path == r.metadataRef.Path:
b, err := ioutil.ReadAll(r.metadataReader)
if err != nil {
return "", err
}
return string(b), nil
case ref.Path == r.recordRef.Path:
case location.Path == r.recordRef.Path:
b, err := ioutil.ReadAll(r.recordReader)
if err != nil {
return "", err
@ -92,16 +95,36 @@ func (r *pythonTestResolverMock) FileContentsByLocation(ref source.Location) (st
return "", fmt.Errorf("invalid value given")
}
func (r *pythonTestResolverMock) MultipleFileContentsByLocation(_ []source.Location) (map[source.Location]string, error) {
return nil, fmt.Errorf("not implemented")
func (r *pythonTestResolverMock) MultipleFileContentsByLocation(locations []source.Location) (map[source.Location]string, error) {
var results = make(map[source.Location]string)
var err error
for _, l := range locations {
results[l], err = r.FileContentsByLocation(l)
if err != nil {
return nil, err
}
}
return results, nil
}
func (r *pythonTestResolverMock) FilesByPath(_ ...string) ([]source.Location, error) {
return nil, fmt.Errorf("not implemented")
}
func (r *pythonTestResolverMock) FilesByGlob(_ ...string) ([]source.Location, error) {
return nil, fmt.Errorf("not implemented")
func (r *pythonTestResolverMock) FilesByGlob(patterns ...string) ([]source.Location, error) {
var results []source.Location
for _, pattern := range patterns {
for _, l := range []*source.Location{r.topLevelRef, r.metadataRef, r.recordRef} {
if l == nil {
continue
}
if file.GlobMatch(pattern, l.Path) {
results = append(results, *l)
}
}
}
return results, nil
}
func (r *pythonTestResolverMock) RelativeFileByPath(_ source.Location, path string) *source.Location {
switch {
@ -224,14 +247,16 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
}
// end patching expected values with runtime data...
pyPkgCataloger := NewPythonPackageCataloger()
actual, err := pyPkgCataloger.catalogEggOrWheel(resolver, *resolver.metadataRef)
actual, err := NewPythonPackageCataloger().Catalog(resolver)
if err != nil {
t.Fatalf("failed to catalog python package: %+v", err)
}
for _, d := range deep.Equal(actual, &test.ExpectedPackage) {
if len(actual) != 1 {
t.Fatalf("unexpected length: %d", len(actual))
}
for _, d := range deep.Equal(actual[0], test.ExpectedPackage) {
t.Errorf("diff: %+v", d)
}
})

View File

@ -0,0 +1,48 @@
package source
import "sync"
type ContentRequester struct {
request map[Location][]*FileData
lock sync.Mutex
}
func NewContentRequester(data ...*FileData) *ContentRequester {
requester := &ContentRequester{
request: make(map[Location][]*FileData),
}
for _, d := range data {
requester.Add(d)
}
return requester
}
func (b *ContentRequester) Add(data *FileData) {
b.lock.Lock()
defer b.lock.Unlock()
b.request[data.Location] = append(b.request[data.Location], data)
}
func (b *ContentRequester) Execute(resolver ContentResolver) error {
b.lock.Lock()
defer b.lock.Unlock()
var locations = make([]Location, len(b.request))
idx := 0
for l := range b.request {
locations[idx] = l
idx++
}
response, err := resolver.MultipleFileContentsByLocation(locations)
if err != nil {
return err
}
for l, contents := range response {
for i := range b.request[l] {
b.request[l][i].Contents = contents
}
}
return nil
}

6
syft/source/file_data.go Normal file
View File

@ -0,0 +1,6 @@
package source
type FileData struct {
Location Location
Contents string
}