mirror of
https://github.com/anchore/syft.git
synced 2025-11-18 00:43:20 +01:00
add content requested and refactor python cataloger to use it
Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
parent
82c8a8e17b
commit
e4a3e433b6
@ -18,6 +18,12 @@ const (
|
||||
wheelMetadataGlob = "**/*dist-info/METADATA"
|
||||
)
|
||||
|
||||
type pythonPackageData struct {
|
||||
Metadata source.FileData
|
||||
FileRecord *source.FileData
|
||||
TopPackage *source.FileData
|
||||
}
|
||||
|
||||
type PackageCataloger struct{}
|
||||
|
||||
// NewPythonPackageCataloger returns a new cataloger for python packages within egg or wheel installation directories.
|
||||
@ -32,55 +38,43 @@ func (c *PackageCataloger) Name() string {
|
||||
|
||||
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing python egg and wheel installations.
|
||||
func (c *PackageCataloger) Catalog(resolver source.Resolver) ([]pkg.Package, error) {
|
||||
// nolint:prealloc
|
||||
var fileMatches []source.Location
|
||||
|
||||
for _, glob := range []string{eggMetadataGlob, wheelMetadataGlob} {
|
||||
matches, err := resolver.FilesByGlob(glob)
|
||||
entries, err := c.getPythonPackageEntries(resolver)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to find files by glob: %s", glob)
|
||||
}
|
||||
fileMatches = append(fileMatches, matches...)
|
||||
}
|
||||
|
||||
request, entries := filesOfInterest(resolver, fileMatches)
|
||||
if err := getContents(resolver, request); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var pkgs []pkg.Package
|
||||
var packages []pkg.Package
|
||||
for _, entry := range entries {
|
||||
p, err := c.catalogEggOrWheel(entry)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to catalog python package=%+v: %w", entry.Metadata.Location.Path, err)
|
||||
}
|
||||
if p != nil {
|
||||
pkgs = append(pkgs, *p)
|
||||
packages = append(packages, *p)
|
||||
}
|
||||
}
|
||||
|
||||
return pkgs, nil
|
||||
return packages, nil
|
||||
}
|
||||
|
||||
type FileData struct {
|
||||
Location source.Location
|
||||
Contents string
|
||||
func (c *PackageCataloger) getPythonPackageEntries(resolver source.Resolver) ([]*pythonPackageData, error) {
|
||||
var metadataLocations []source.Location
|
||||
|
||||
// find all primary record paths
|
||||
matches, err := resolver.FilesByGlob(eggMetadataGlob, wheelMetadataGlob)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to find files by glob: %w", err)
|
||||
}
|
||||
metadataLocations = append(metadataLocations, matches...)
|
||||
|
||||
type pythonEntry struct {
|
||||
Metadata FileData
|
||||
FileRecord *FileData
|
||||
TopPackage *FileData
|
||||
}
|
||||
|
||||
func filesOfInterest(resolver source.FileResolver, metadataLocations []source.Location) (map[source.Location]*FileData, []*pythonEntry) {
|
||||
var request = make(map[source.Location]*FileData)
|
||||
var entries []*pythonEntry
|
||||
for _, metadataLocation := range metadataLocations {
|
||||
|
||||
// for every primary record path, craft all secondary record paths and build a request object to gather all file contents for each record
|
||||
var requester = source.NewContentRequester()
|
||||
var entries = make([]*pythonPackageData, len(metadataLocations))
|
||||
for i, metadataLocation := range metadataLocations {
|
||||
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
|
||||
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
|
||||
// to reconcile the RECORD path to the same layer (or the next adjacent lower layer).
|
||||
// to reconcile the RECORD path to the same layer (or the next adjacent lower layer). The same is true with
|
||||
// the top_level.txt file.
|
||||
|
||||
// lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure)
|
||||
recordPath := filepath.Join(filepath.Dir(metadataLocation.Path), "RECORD")
|
||||
@ -91,52 +85,39 @@ func filesOfInterest(resolver source.FileResolver, metadataLocations []source.Lo
|
||||
topLevelPath := filepath.Join(parentDir, "top_level.txt")
|
||||
topLevelLocation := resolver.RelativeFileByPath(metadataLocation, topLevelPath)
|
||||
|
||||
entry := &pythonEntry{
|
||||
Metadata: FileData{
|
||||
// build an entry that will later be populated with contents when the request is executed
|
||||
entry := &pythonPackageData{
|
||||
Metadata: source.FileData{
|
||||
Location: metadataLocation,
|
||||
},
|
||||
}
|
||||
|
||||
request[entry.Metadata.Location] = &entry.Metadata
|
||||
requester.Add(&entry.Metadata)
|
||||
|
||||
if recordLocation != nil {
|
||||
entry.FileRecord = &FileData{
|
||||
entry.FileRecord = &source.FileData{
|
||||
Location: *recordLocation,
|
||||
}
|
||||
request[entry.FileRecord.Location] = entry.FileRecord
|
||||
requester.Add(entry.FileRecord)
|
||||
}
|
||||
|
||||
if topLevelLocation != nil {
|
||||
entry.TopPackage = &FileData{
|
||||
entry.TopPackage = &source.FileData{
|
||||
Location: *topLevelLocation,
|
||||
}
|
||||
request[entry.TopPackage.Location] = entry.TopPackage
|
||||
}
|
||||
entries = append(entries, entry)
|
||||
|
||||
}
|
||||
return request, entries
|
||||
requester.Add(entry.TopPackage)
|
||||
}
|
||||
|
||||
func getContents(resolver source.ContentResolver, request map[source.Location]*FileData) error {
|
||||
var locations []source.Location
|
||||
for l := range request {
|
||||
locations = append(locations, l)
|
||||
// keep the entry for processing later
|
||||
entries[i] = entry
|
||||
}
|
||||
|
||||
response, err := resolver.MultipleFileContentsByLocation(locations)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for l, contents := range response {
|
||||
request[l].Contents = contents
|
||||
}
|
||||
return nil
|
||||
// return the set of entries and execute the request for fetching contents
|
||||
return entries, requester.Execute(resolver)
|
||||
}
|
||||
|
||||
// catalogEggOrWheel takes the primary metadata file reference and returns the python package it represents.
|
||||
func (c *PackageCataloger) catalogEggOrWheel(entry *pythonEntry) (*pkg.Package, error) {
|
||||
func (c *PackageCataloger) catalogEggOrWheel(entry *pythonPackageData) (*pkg.Package, error) {
|
||||
metadata, sources, err := c.assembleEggOrWheelMetadata(entry)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -161,7 +142,7 @@ func (c *PackageCataloger) catalogEggOrWheel(entry *pythonEntry) (*pkg.Package,
|
||||
}
|
||||
|
||||
// assembleEggOrWheelMetadata discovers and accumulates python package metadata from multiple file sources and returns a single metadata object as well as a list of files where the metadata was derived from.
|
||||
func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *pythonEntry) (*pkg.PythonPackageMetadata, []source.Location, error) {
|
||||
func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *pythonPackageData) (*pkg.PythonPackageMetadata, []source.Location, error) {
|
||||
var sources = []source.Location{entry.Metadata.Location}
|
||||
|
||||
metadata, err := parseWheelOrEggMetadata(entry.Metadata.Location.Path, strings.NewReader(entry.Metadata.Contents))
|
||||
@ -189,7 +170,7 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *pythonEntry) (*pkg.
|
||||
}
|
||||
|
||||
// fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained.
|
||||
func (c *PackageCataloger) fetchRecordFiles(entry *FileData) (files []pkg.PythonFileRecord, sources []source.Location, err error) {
|
||||
func (c *PackageCataloger) fetchRecordFiles(entry *source.FileData) (files []pkg.PythonFileRecord, sources []source.Location, err error) {
|
||||
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
|
||||
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
|
||||
// to reconcile the RECORD path to the same layer (or the next adjacent lower layer).
|
||||
@ -209,9 +190,8 @@ func (c *PackageCataloger) fetchRecordFiles(entry *FileData) (files []pkg.Python
|
||||
}
|
||||
|
||||
// fetchTopLevelPackages finds a corresponding top_level.txt file for the given python package metadata file and returns the set of package names contained.
|
||||
func (c *PackageCataloger) fetchTopLevelPackages(entry *FileData) (pkgs []string, sources []source.Location, err error) {
|
||||
func (c *PackageCataloger) fetchTopLevelPackages(entry *source.FileData) (pkgs []string, sources []source.Location, err error) {
|
||||
if entry == nil {
|
||||
// TODO
|
||||
log.Warnf("missing python package top_level.txt (package=!!)")
|
||||
return nil, nil, nil
|
||||
}
|
||||
|
||||
@ -8,12 +8,15 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/anchore/syft/internal/file"
|
||||
|
||||
"github.com/anchore/syft/syft/source"
|
||||
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/go-test/deep"
|
||||
)
|
||||
|
||||
// TODO: make this generic (based on maps of source.FileData) and make a generic mock to move to the source pkg
|
||||
type pythonTestResolverMock struct {
|
||||
metadataReader io.Reader
|
||||
recordReader io.Reader
|
||||
@ -68,21 +71,21 @@ func newTestResolver(metaPath, recordPath, topPath string) *pythonTestResolverMo
|
||||
}
|
||||
}
|
||||
|
||||
func (r *pythonTestResolverMock) FileContentsByLocation(ref source.Location) (string, error) {
|
||||
func (r *pythonTestResolverMock) FileContentsByLocation(location source.Location) (string, error) {
|
||||
switch {
|
||||
case r.topLevelRef != nil && ref.Path == r.topLevelRef.Path:
|
||||
case r.topLevelRef != nil && location.Path == r.topLevelRef.Path:
|
||||
b, err := ioutil.ReadAll(r.topLevelReader)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(b), nil
|
||||
case ref.Path == r.metadataRef.Path:
|
||||
case location.Path == r.metadataRef.Path:
|
||||
b, err := ioutil.ReadAll(r.metadataReader)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(b), nil
|
||||
case ref.Path == r.recordRef.Path:
|
||||
case location.Path == r.recordRef.Path:
|
||||
b, err := ioutil.ReadAll(r.recordReader)
|
||||
if err != nil {
|
||||
return "", err
|
||||
@ -92,16 +95,36 @@ func (r *pythonTestResolverMock) FileContentsByLocation(ref source.Location) (st
|
||||
return "", fmt.Errorf("invalid value given")
|
||||
}
|
||||
|
||||
func (r *pythonTestResolverMock) MultipleFileContentsByLocation(_ []source.Location) (map[source.Location]string, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
func (r *pythonTestResolverMock) MultipleFileContentsByLocation(locations []source.Location) (map[source.Location]string, error) {
|
||||
var results = make(map[source.Location]string)
|
||||
var err error
|
||||
for _, l := range locations {
|
||||
results[l], err = r.FileContentsByLocation(l)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (r *pythonTestResolverMock) FilesByPath(_ ...string) ([]source.Location, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (r *pythonTestResolverMock) FilesByGlob(_ ...string) ([]source.Location, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
func (r *pythonTestResolverMock) FilesByGlob(patterns ...string) ([]source.Location, error) {
|
||||
var results []source.Location
|
||||
for _, pattern := range patterns {
|
||||
for _, l := range []*source.Location{r.topLevelRef, r.metadataRef, r.recordRef} {
|
||||
if l == nil {
|
||||
continue
|
||||
}
|
||||
if file.GlobMatch(pattern, l.Path) {
|
||||
results = append(results, *l)
|
||||
}
|
||||
}
|
||||
}
|
||||
return results, nil
|
||||
}
|
||||
func (r *pythonTestResolverMock) RelativeFileByPath(_ source.Location, path string) *source.Location {
|
||||
switch {
|
||||
@ -224,14 +247,16 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
|
||||
}
|
||||
// end patching expected values with runtime data...
|
||||
|
||||
pyPkgCataloger := NewPythonPackageCataloger()
|
||||
|
||||
actual, err := pyPkgCataloger.catalogEggOrWheel(resolver, *resolver.metadataRef)
|
||||
actual, err := NewPythonPackageCataloger().Catalog(resolver)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to catalog python package: %+v", err)
|
||||
}
|
||||
|
||||
for _, d := range deep.Equal(actual, &test.ExpectedPackage) {
|
||||
if len(actual) != 1 {
|
||||
t.Fatalf("unexpected length: %d", len(actual))
|
||||
}
|
||||
|
||||
for _, d := range deep.Equal(actual[0], test.ExpectedPackage) {
|
||||
t.Errorf("diff: %+v", d)
|
||||
}
|
||||
})
|
||||
|
||||
48
syft/source/content_requester.go
Normal file
48
syft/source/content_requester.go
Normal file
@ -0,0 +1,48 @@
|
||||
package source
|
||||
|
||||
import "sync"
|
||||
|
||||
type ContentRequester struct {
|
||||
request map[Location][]*FileData
|
||||
lock sync.Mutex
|
||||
}
|
||||
|
||||
func NewContentRequester(data ...*FileData) *ContentRequester {
|
||||
requester := &ContentRequester{
|
||||
request: make(map[Location][]*FileData),
|
||||
}
|
||||
for _, d := range data {
|
||||
requester.Add(d)
|
||||
}
|
||||
return requester
|
||||
}
|
||||
|
||||
func (b *ContentRequester) Add(data *FileData) {
|
||||
b.lock.Lock()
|
||||
defer b.lock.Unlock()
|
||||
b.request[data.Location] = append(b.request[data.Location], data)
|
||||
}
|
||||
|
||||
func (b *ContentRequester) Execute(resolver ContentResolver) error {
|
||||
b.lock.Lock()
|
||||
defer b.lock.Unlock()
|
||||
|
||||
var locations = make([]Location, len(b.request))
|
||||
idx := 0
|
||||
for l := range b.request {
|
||||
locations[idx] = l
|
||||
idx++
|
||||
}
|
||||
|
||||
response, err := resolver.MultipleFileContentsByLocation(locations)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for l, contents := range response {
|
||||
for i := range b.request[l] {
|
||||
b.request[l][i].Contents = contents
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
6
syft/source/file_data.go
Normal file
6
syft/source/file_data.go
Normal file
@ -0,0 +1,6 @@
|
||||
package source
|
||||
|
||||
type FileData struct {
|
||||
Location Location
|
||||
Contents string
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user