mirror of
https://github.com/anchore/syft.git
synced 2025-11-18 08:53:15 +01:00
add content requested and refactor python cataloger to use it
Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
parent
82c8a8e17b
commit
e4a3e433b6
@ -18,6 +18,12 @@ const (
|
|||||||
wheelMetadataGlob = "**/*dist-info/METADATA"
|
wheelMetadataGlob = "**/*dist-info/METADATA"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type pythonPackageData struct {
|
||||||
|
Metadata source.FileData
|
||||||
|
FileRecord *source.FileData
|
||||||
|
TopPackage *source.FileData
|
||||||
|
}
|
||||||
|
|
||||||
type PackageCataloger struct{}
|
type PackageCataloger struct{}
|
||||||
|
|
||||||
// NewPythonPackageCataloger returns a new cataloger for python packages within egg or wheel installation directories.
|
// NewPythonPackageCataloger returns a new cataloger for python packages within egg or wheel installation directories.
|
||||||
@ -32,55 +38,43 @@ func (c *PackageCataloger) Name() string {
|
|||||||
|
|
||||||
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing python egg and wheel installations.
|
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing python egg and wheel installations.
|
||||||
func (c *PackageCataloger) Catalog(resolver source.Resolver) ([]pkg.Package, error) {
|
func (c *PackageCataloger) Catalog(resolver source.Resolver) ([]pkg.Package, error) {
|
||||||
// nolint:prealloc
|
entries, err := c.getPythonPackageEntries(resolver)
|
||||||
var fileMatches []source.Location
|
|
||||||
|
|
||||||
for _, glob := range []string{eggMetadataGlob, wheelMetadataGlob} {
|
|
||||||
matches, err := resolver.FilesByGlob(glob)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to find files by glob: %s", glob)
|
|
||||||
}
|
|
||||||
fileMatches = append(fileMatches, matches...)
|
|
||||||
}
|
|
||||||
|
|
||||||
request, entries := filesOfInterest(resolver, fileMatches)
|
|
||||||
if err := getContents(resolver, request); err != nil {
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var pkgs []pkg.Package
|
var packages []pkg.Package
|
||||||
for _, entry := range entries {
|
for _, entry := range entries {
|
||||||
p, err := c.catalogEggOrWheel(entry)
|
p, err := c.catalogEggOrWheel(entry)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("unable to catalog python package=%+v: %w", entry.Metadata.Location.Path, err)
|
return nil, fmt.Errorf("unable to catalog python package=%+v: %w", entry.Metadata.Location.Path, err)
|
||||||
}
|
}
|
||||||
if p != nil {
|
if p != nil {
|
||||||
pkgs = append(pkgs, *p)
|
packages = append(packages, *p)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return pkgs, nil
|
return packages, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type FileData struct {
|
func (c *PackageCataloger) getPythonPackageEntries(resolver source.Resolver) ([]*pythonPackageData, error) {
|
||||||
Location source.Location
|
var metadataLocations []source.Location
|
||||||
Contents string
|
|
||||||
|
// find all primary record paths
|
||||||
|
matches, err := resolver.FilesByGlob(eggMetadataGlob, wheelMetadataGlob)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to find files by glob: %w", err)
|
||||||
}
|
}
|
||||||
|
metadataLocations = append(metadataLocations, matches...)
|
||||||
|
|
||||||
type pythonEntry struct {
|
// for every primary record path, craft all secondary record paths and build a request object to gather all file contents for each record
|
||||||
Metadata FileData
|
var requester = source.NewContentRequester()
|
||||||
FileRecord *FileData
|
var entries = make([]*pythonPackageData, len(metadataLocations))
|
||||||
TopPackage *FileData
|
for i, metadataLocation := range metadataLocations {
|
||||||
}
|
|
||||||
|
|
||||||
func filesOfInterest(resolver source.FileResolver, metadataLocations []source.Location) (map[source.Location]*FileData, []*pythonEntry) {
|
|
||||||
var request = make(map[source.Location]*FileData)
|
|
||||||
var entries []*pythonEntry
|
|
||||||
for _, metadataLocation := range metadataLocations {
|
|
||||||
|
|
||||||
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
|
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
|
||||||
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
|
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
|
||||||
// to reconcile the RECORD path to the same layer (or the next adjacent lower layer).
|
// to reconcile the RECORD path to the same layer (or the next adjacent lower layer). The same is true with
|
||||||
|
// the top_level.txt file.
|
||||||
|
|
||||||
// lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure)
|
// lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure)
|
||||||
recordPath := filepath.Join(filepath.Dir(metadataLocation.Path), "RECORD")
|
recordPath := filepath.Join(filepath.Dir(metadataLocation.Path), "RECORD")
|
||||||
@ -91,52 +85,39 @@ func filesOfInterest(resolver source.FileResolver, metadataLocations []source.Lo
|
|||||||
topLevelPath := filepath.Join(parentDir, "top_level.txt")
|
topLevelPath := filepath.Join(parentDir, "top_level.txt")
|
||||||
topLevelLocation := resolver.RelativeFileByPath(metadataLocation, topLevelPath)
|
topLevelLocation := resolver.RelativeFileByPath(metadataLocation, topLevelPath)
|
||||||
|
|
||||||
entry := &pythonEntry{
|
// build an entry that will later be populated with contents when the request is executed
|
||||||
Metadata: FileData{
|
entry := &pythonPackageData{
|
||||||
|
Metadata: source.FileData{
|
||||||
Location: metadataLocation,
|
Location: metadataLocation,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
request[entry.Metadata.Location] = &entry.Metadata
|
requester.Add(&entry.Metadata)
|
||||||
|
|
||||||
if recordLocation != nil {
|
if recordLocation != nil {
|
||||||
entry.FileRecord = &FileData{
|
entry.FileRecord = &source.FileData{
|
||||||
Location: *recordLocation,
|
Location: *recordLocation,
|
||||||
}
|
}
|
||||||
request[entry.FileRecord.Location] = entry.FileRecord
|
requester.Add(entry.FileRecord)
|
||||||
}
|
}
|
||||||
|
|
||||||
if topLevelLocation != nil {
|
if topLevelLocation != nil {
|
||||||
entry.TopPackage = &FileData{
|
entry.TopPackage = &source.FileData{
|
||||||
Location: *topLevelLocation,
|
Location: *topLevelLocation,
|
||||||
}
|
}
|
||||||
request[entry.TopPackage.Location] = entry.TopPackage
|
requester.Add(entry.TopPackage)
|
||||||
}
|
|
||||||
entries = append(entries, entry)
|
|
||||||
|
|
||||||
}
|
|
||||||
return request, entries
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func getContents(resolver source.ContentResolver, request map[source.Location]*FileData) error {
|
// keep the entry for processing later
|
||||||
var locations []source.Location
|
entries[i] = entry
|
||||||
for l := range request {
|
|
||||||
locations = append(locations, l)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
response, err := resolver.MultipleFileContentsByLocation(locations)
|
// return the set of entries and execute the request for fetching contents
|
||||||
if err != nil {
|
return entries, requester.Execute(resolver)
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
for l, contents := range response {
|
|
||||||
request[l].Contents = contents
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// catalogEggOrWheel takes the primary metadata file reference and returns the python package it represents.
|
// catalogEggOrWheel takes the primary metadata file reference and returns the python package it represents.
|
||||||
func (c *PackageCataloger) catalogEggOrWheel(entry *pythonEntry) (*pkg.Package, error) {
|
func (c *PackageCataloger) catalogEggOrWheel(entry *pythonPackageData) (*pkg.Package, error) {
|
||||||
metadata, sources, err := c.assembleEggOrWheelMetadata(entry)
|
metadata, sources, err := c.assembleEggOrWheelMetadata(entry)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -161,7 +142,7 @@ func (c *PackageCataloger) catalogEggOrWheel(entry *pythonEntry) (*pkg.Package,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// assembleEggOrWheelMetadata discovers and accumulates python package metadata from multiple file sources and returns a single metadata object as well as a list of files where the metadata was derived from.
|
// assembleEggOrWheelMetadata discovers and accumulates python package metadata from multiple file sources and returns a single metadata object as well as a list of files where the metadata was derived from.
|
||||||
func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *pythonEntry) (*pkg.PythonPackageMetadata, []source.Location, error) {
|
func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *pythonPackageData) (*pkg.PythonPackageMetadata, []source.Location, error) {
|
||||||
var sources = []source.Location{entry.Metadata.Location}
|
var sources = []source.Location{entry.Metadata.Location}
|
||||||
|
|
||||||
metadata, err := parseWheelOrEggMetadata(entry.Metadata.Location.Path, strings.NewReader(entry.Metadata.Contents))
|
metadata, err := parseWheelOrEggMetadata(entry.Metadata.Location.Path, strings.NewReader(entry.Metadata.Contents))
|
||||||
@ -189,7 +170,7 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *pythonEntry) (*pkg.
|
|||||||
}
|
}
|
||||||
|
|
||||||
// fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained.
|
// fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained.
|
||||||
func (c *PackageCataloger) fetchRecordFiles(entry *FileData) (files []pkg.PythonFileRecord, sources []source.Location, err error) {
|
func (c *PackageCataloger) fetchRecordFiles(entry *source.FileData) (files []pkg.PythonFileRecord, sources []source.Location, err error) {
|
||||||
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
|
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
|
||||||
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
|
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
|
||||||
// to reconcile the RECORD path to the same layer (or the next adjacent lower layer).
|
// to reconcile the RECORD path to the same layer (or the next adjacent lower layer).
|
||||||
@ -209,9 +190,8 @@ func (c *PackageCataloger) fetchRecordFiles(entry *FileData) (files []pkg.Python
|
|||||||
}
|
}
|
||||||
|
|
||||||
// fetchTopLevelPackages finds a corresponding top_level.txt file for the given python package metadata file and returns the set of package names contained.
|
// fetchTopLevelPackages finds a corresponding top_level.txt file for the given python package metadata file and returns the set of package names contained.
|
||||||
func (c *PackageCataloger) fetchTopLevelPackages(entry *FileData) (pkgs []string, sources []source.Location, err error) {
|
func (c *PackageCataloger) fetchTopLevelPackages(entry *source.FileData) (pkgs []string, sources []source.Location, err error) {
|
||||||
if entry == nil {
|
if entry == nil {
|
||||||
// TODO
|
|
||||||
log.Warnf("missing python package top_level.txt (package=!!)")
|
log.Warnf("missing python package top_level.txt (package=!!)")
|
||||||
return nil, nil, nil
|
return nil, nil, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@ -8,12 +8,15 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/internal/file"
|
||||||
|
|
||||||
"github.com/anchore/syft/syft/source"
|
"github.com/anchore/syft/syft/source"
|
||||||
|
|
||||||
"github.com/anchore/syft/syft/pkg"
|
"github.com/anchore/syft/syft/pkg"
|
||||||
"github.com/go-test/deep"
|
"github.com/go-test/deep"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TODO: make this generic (based on maps of source.FileData) and make a generic mock to move to the source pkg
|
||||||
type pythonTestResolverMock struct {
|
type pythonTestResolverMock struct {
|
||||||
metadataReader io.Reader
|
metadataReader io.Reader
|
||||||
recordReader io.Reader
|
recordReader io.Reader
|
||||||
@ -68,21 +71,21 @@ func newTestResolver(metaPath, recordPath, topPath string) *pythonTestResolverMo
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *pythonTestResolverMock) FileContentsByLocation(ref source.Location) (string, error) {
|
func (r *pythonTestResolverMock) FileContentsByLocation(location source.Location) (string, error) {
|
||||||
switch {
|
switch {
|
||||||
case r.topLevelRef != nil && ref.Path == r.topLevelRef.Path:
|
case r.topLevelRef != nil && location.Path == r.topLevelRef.Path:
|
||||||
b, err := ioutil.ReadAll(r.topLevelReader)
|
b, err := ioutil.ReadAll(r.topLevelReader)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
return string(b), nil
|
return string(b), nil
|
||||||
case ref.Path == r.metadataRef.Path:
|
case location.Path == r.metadataRef.Path:
|
||||||
b, err := ioutil.ReadAll(r.metadataReader)
|
b, err := ioutil.ReadAll(r.metadataReader)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
return string(b), nil
|
return string(b), nil
|
||||||
case ref.Path == r.recordRef.Path:
|
case location.Path == r.recordRef.Path:
|
||||||
b, err := ioutil.ReadAll(r.recordReader)
|
b, err := ioutil.ReadAll(r.recordReader)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
@ -92,16 +95,36 @@ func (r *pythonTestResolverMock) FileContentsByLocation(ref source.Location) (st
|
|||||||
return "", fmt.Errorf("invalid value given")
|
return "", fmt.Errorf("invalid value given")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *pythonTestResolverMock) MultipleFileContentsByLocation(_ []source.Location) (map[source.Location]string, error) {
|
func (r *pythonTestResolverMock) MultipleFileContentsByLocation(locations []source.Location) (map[source.Location]string, error) {
|
||||||
return nil, fmt.Errorf("not implemented")
|
var results = make(map[source.Location]string)
|
||||||
|
var err error
|
||||||
|
for _, l := range locations {
|
||||||
|
results[l], err = r.FileContentsByLocation(l)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *pythonTestResolverMock) FilesByPath(_ ...string) ([]source.Location, error) {
|
func (r *pythonTestResolverMock) FilesByPath(_ ...string) ([]source.Location, error) {
|
||||||
return nil, fmt.Errorf("not implemented")
|
return nil, fmt.Errorf("not implemented")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *pythonTestResolverMock) FilesByGlob(_ ...string) ([]source.Location, error) {
|
func (r *pythonTestResolverMock) FilesByGlob(patterns ...string) ([]source.Location, error) {
|
||||||
return nil, fmt.Errorf("not implemented")
|
var results []source.Location
|
||||||
|
for _, pattern := range patterns {
|
||||||
|
for _, l := range []*source.Location{r.topLevelRef, r.metadataRef, r.recordRef} {
|
||||||
|
if l == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if file.GlobMatch(pattern, l.Path) {
|
||||||
|
results = append(results, *l)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return results, nil
|
||||||
}
|
}
|
||||||
func (r *pythonTestResolverMock) RelativeFileByPath(_ source.Location, path string) *source.Location {
|
func (r *pythonTestResolverMock) RelativeFileByPath(_ source.Location, path string) *source.Location {
|
||||||
switch {
|
switch {
|
||||||
@ -224,14 +247,16 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
|
|||||||
}
|
}
|
||||||
// end patching expected values with runtime data...
|
// end patching expected values with runtime data...
|
||||||
|
|
||||||
pyPkgCataloger := NewPythonPackageCataloger()
|
actual, err := NewPythonPackageCataloger().Catalog(resolver)
|
||||||
|
|
||||||
actual, err := pyPkgCataloger.catalogEggOrWheel(resolver, *resolver.metadataRef)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to catalog python package: %+v", err)
|
t.Fatalf("failed to catalog python package: %+v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, d := range deep.Equal(actual, &test.ExpectedPackage) {
|
if len(actual) != 1 {
|
||||||
|
t.Fatalf("unexpected length: %d", len(actual))
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, d := range deep.Equal(actual[0], test.ExpectedPackage) {
|
||||||
t.Errorf("diff: %+v", d)
|
t.Errorf("diff: %+v", d)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|||||||
48
syft/source/content_requester.go
Normal file
48
syft/source/content_requester.go
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
package source
|
||||||
|
|
||||||
|
import "sync"
|
||||||
|
|
||||||
|
type ContentRequester struct {
|
||||||
|
request map[Location][]*FileData
|
||||||
|
lock sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewContentRequester(data ...*FileData) *ContentRequester {
|
||||||
|
requester := &ContentRequester{
|
||||||
|
request: make(map[Location][]*FileData),
|
||||||
|
}
|
||||||
|
for _, d := range data {
|
||||||
|
requester.Add(d)
|
||||||
|
}
|
||||||
|
return requester
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *ContentRequester) Add(data *FileData) {
|
||||||
|
b.lock.Lock()
|
||||||
|
defer b.lock.Unlock()
|
||||||
|
b.request[data.Location] = append(b.request[data.Location], data)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *ContentRequester) Execute(resolver ContentResolver) error {
|
||||||
|
b.lock.Lock()
|
||||||
|
defer b.lock.Unlock()
|
||||||
|
|
||||||
|
var locations = make([]Location, len(b.request))
|
||||||
|
idx := 0
|
||||||
|
for l := range b.request {
|
||||||
|
locations[idx] = l
|
||||||
|
idx++
|
||||||
|
}
|
||||||
|
|
||||||
|
response, err := resolver.MultipleFileContentsByLocation(locations)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for l, contents := range response {
|
||||||
|
for i := range b.request[l] {
|
||||||
|
b.request[l][i].Contents = contents
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
6
syft/source/file_data.go
Normal file
6
syft/source/file_data.go
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
package source
|
||||||
|
|
||||||
|
type FileData struct {
|
||||||
|
Location Location
|
||||||
|
Contents string
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user