allow for python metadata fields to be optional

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2020-10-22 13:50:24 -04:00
parent 2e5ff4a995
commit 7d55bca0a0
No known key found for this signature in database
GPG Key ID: 5CB45AE22BAB7EA7
6 changed files with 97 additions and 57 deletions

View File

@ -491,6 +491,9 @@
"release": { "release": {
"type": "string" "type": "string"
}, },
"sitePackagesRootPath": {
"type": "string"
},
"size": { "size": {
"type": "integer" "type": "integer"
}, },
@ -500,6 +503,12 @@
"sourceRpm": { "sourceRpm": {
"type": "string" "type": "string"
}, },
"topLevelPackages": {
"items": {
"type": "string"
},
"type": "array"
},
"url": { "url": {
"type": "string" "type": "string"
}, },

View File

@ -25,10 +25,12 @@ func NewPythonPackageCataloger() *PackageCataloger {
return &PackageCataloger{} return &PackageCataloger{}
} }
// Name returns a string that uniquely describes a cataloger
func (c *PackageCataloger) Name() string { func (c *PackageCataloger) Name() string {
return "python-package-cataloger" return "python-package-cataloger"
} }
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing python egg and wheel installations.
func (c *PackageCataloger) Catalog(resolver scope.Resolver) ([]pkg.Package, error) { func (c *PackageCataloger) Catalog(resolver scope.Resolver) ([]pkg.Package, error) {
// nolint:prealloc // nolint:prealloc
var fileMatches []file.Reference var fileMatches []file.Reference
@ -54,19 +56,33 @@ func (c *PackageCataloger) Catalog(resolver scope.Resolver) ([]pkg.Package, erro
return pkgs, nil return pkgs, nil
} }
func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver scope.Resolver, metadataRef file.Reference) (*pkg.PythonPackageMetadata, []file.Reference, error) { // catalogEggOrWheel takes the primary metadata file reference and returns the python package it represents.
var sources = []file.Reference{metadataRef} func (c *PackageCataloger) catalogEggOrWheel(resolver scope.Resolver, metadataRef file.Reference) (*pkg.Package, error) {
metadata, sources, err := c.assembleEggOrWheelMetadata(resolver, metadataRef)
metadataContents, err := resolver.FileContentsByRef(metadataRef)
if err != nil { if err != nil {
return nil, nil, err return nil, err
} }
metadata, err := parseWheelOrEggMetadata(metadataRef.Path, strings.NewReader(metadataContents)) var licenses []string
if err != nil { if metadata.License != "" {
return nil, nil, err licenses = []string{metadata.License}
} }
return &pkg.Package{
Name: metadata.Name,
Version: metadata.Version,
FoundBy: c.Name(),
Source: sources,
Licenses: licenses,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonPackageMetadataType,
Metadata: *metadata,
}, nil
}
// fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained.
func (c *PackageCataloger) fetchRecordFiles(resolver scope.Resolver, metadataRef file.Reference) (files []pkg.PythonFileRecord, sources []file.Reference, err error) {
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory // we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important // or for an image... for an image the METADATA file may be present within multiple layers, so it is important
// to reconcile the RECORD path to the same layer (or the next adjacent lower layer). // to reconcile the RECORD path to the same layer (or the next adjacent lower layer).
@ -92,10 +108,13 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver scope.Resolver, m
return nil, nil, err return nil, nil, err
} }
// append the record files list to the metadata files = append(files, records...)
metadata.Files = records }
return files, sources, nil
} }
// fetchTopLevelPackages finds a corresponding top_level.txt file for the given python package metadata file and returns the set of package names contained.
func (c *PackageCataloger) fetchTopLevelPackages(resolver scope.Resolver, metadataRef file.Reference) (pkgs []string, sources []file.Reference, err error) {
// a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages // a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages
parentDir := filepath.Dir(string(metadataRef.Path)) parentDir := filepath.Dir(string(metadataRef.Path))
topLevelPath := filepath.Join(parentDir, "top_level.txt") topLevelPath := filepath.Join(parentDir, "top_level.txt")
@ -107,47 +126,54 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver scope.Resolver, m
return nil, nil, fmt.Errorf("missing python package top_level.txt (package=%q)", string(metadataRef.Path)) return nil, nil, fmt.Errorf("missing python package top_level.txt (package=%q)", string(metadataRef.Path))
} }
sources = append(sources, *topLevelRef)
topLevelContents, err := resolver.FileContentsByRef(*topLevelRef) topLevelContents, err := resolver.FileContentsByRef(*topLevelRef)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
// nolint:prealloc
var topLevelPackages []string
scanner := bufio.NewScanner(strings.NewReader(topLevelContents)) scanner := bufio.NewScanner(strings.NewReader(topLevelContents))
for scanner.Scan() { for scanner.Scan() {
topLevelPackages = append(topLevelPackages, scanner.Text()) pkgs = append(pkgs, scanner.Text())
} }
if err := scanner.Err(); err != nil { if err := scanner.Err(); err != nil {
return nil, nil, fmt.Errorf("could not read python package top_level.txt: %w", err) return nil, nil, fmt.Errorf("could not read python package top_level.txt: %w", err)
} }
metadata.TopLevelPackages = topLevelPackages return pkgs, sources, nil
}
// assembleEggOrWheelMetadata discovers and accumulates python package metadata from multiple file sources and returns a single metadata object as well as a list of files where the metadata was derived from.
func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver scope.Resolver, metadataRef file.Reference) (*pkg.PythonPackageMetadata, []file.Reference, error) {
var sources = []file.Reference{metadataRef}
metadataContents, err := resolver.FileContentsByRef(metadataRef)
if err != nil {
return nil, nil, err
}
metadata, err := parseWheelOrEggMetadata(metadataRef.Path, strings.NewReader(metadataContents))
if err != nil {
return nil, nil, err
}
// attach any python files found for the given wheel/egg installation
r, s, err := c.fetchRecordFiles(resolver, metadataRef)
if err != nil {
return nil, nil, err
}
sources = append(sources, s...)
metadata.Files = r
// attach any top-level package names found for the given wheel/egg installation
p, s, err := c.fetchTopLevelPackages(resolver, metadataRef)
if err != nil {
return nil, nil, err
}
sources = append(sources, s...)
metadata.TopLevelPackages = p
return &metadata, sources, nil return &metadata, sources, nil
} }
func (c *PackageCataloger) catalogEggOrWheel(resolver scope.Resolver, metadataRef file.Reference) (*pkg.Package, error) {
metadata, sources, err := c.assembleEggOrWheelMetadata(resolver, metadataRef)
if err != nil {
return nil, err
}
var licenses []string
if metadata.License != "" {
licenses = []string{metadata.License}
}
return &pkg.Package{
Name: metadata.Name,
Version: metadata.Version,
FoundBy: c.Name(),
Source: sources,
Licenses: licenses,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonPackageMetadataType,
Metadata: *metadata,
}, nil
}

View File

@ -142,12 +142,12 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
AuthorEmail: "me@kennethreitz.org", AuthorEmail: "me@kennethreitz.org",
SitePackagesRootPath: "test-fixtures", SitePackagesRootPath: "test-fixtures",
Files: []pkg.PythonFileRecord{ Files: []pkg.PythonFileRecord{
{Path: "requests-2.22.0.dist-info/INSTALLER", Digest: pkg.Digest{"sha256", "zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg"}, Size: "4"}, {Path: "requests-2.22.0.dist-info/INSTALLER", Digest: &pkg.Digest{"sha256", "zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg"}, Size: "4"},
{Path: "requests/__init__.py", Digest: pkg.Digest{"sha256", "PnKCgjcTq44LaAMzB-7--B2FdewRrE8F_vjZeaG9NhA"}, Size: "3921"}, {Path: "requests/__init__.py", Digest: &pkg.Digest{"sha256", "PnKCgjcTq44LaAMzB-7--B2FdewRrE8F_vjZeaG9NhA"}, Size: "3921"},
{Path: "requests/__pycache__/__version__.cpython-38.pyc"}, {Path: "requests/__pycache__/__version__.cpython-38.pyc"},
{Path: "requests/__pycache__/utils.cpython-38.pyc"}, {Path: "requests/__pycache__/utils.cpython-38.pyc"},
{Path: "requests/__version__.py", Digest: pkg.Digest{"sha256", "Bm-GFstQaFezsFlnmEMrJDe8JNROz9n2XXYtODdvjjc"}, Size: "436"}, {Path: "requests/__version__.py", Digest: &pkg.Digest{"sha256", "Bm-GFstQaFezsFlnmEMrJDe8JNROz9n2XXYtODdvjjc"}, Size: "436"},
{Path: "requests/utils.py", Digest: pkg.Digest{"sha256", "LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A"}, Size: "30049"}, {Path: "requests/utils.py", Digest: &pkg.Digest{"sha256", "LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A"}, Size: "30049"},
}, },
TopLevelPackages: []string{"requests"}, TopLevelPackages: []string{"requests"},
}, },
@ -174,11 +174,11 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
AuthorEmail: "georg@python.org", AuthorEmail: "georg@python.org",
SitePackagesRootPath: "test-fixtures", SitePackagesRootPath: "test-fixtures",
Files: []pkg.PythonFileRecord{ Files: []pkg.PythonFileRecord{
{Path: "../../../bin/pygmentize", Digest: pkg.Digest{"sha256", "dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8"}, Size: "220"}, {Path: "../../../bin/pygmentize", Digest: &pkg.Digest{"sha256", "dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8"}, Size: "220"},
{Path: "Pygments-2.6.1.dist-info/AUTHORS", Digest: pkg.Digest{"sha256", "PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY"}, Size: "8449"}, {Path: "Pygments-2.6.1.dist-info/AUTHORS", Digest: &pkg.Digest{"sha256", "PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY"}, Size: "8449"},
{Path: "Pygments-2.6.1.dist-info/RECORD"}, {Path: "Pygments-2.6.1.dist-info/RECORD"},
{Path: "pygments/__pycache__/__init__.cpython-38.pyc"}, {Path: "pygments/__pycache__/__init__.cpython-38.pyc"},
{Path: "pygments/util.py", Digest: pkg.Digest{"sha256", "586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA"}, Size: "10778"}, {Path: "pygments/util.py", Digest: &pkg.Digest{"sha256", "586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA"}, Size: "10778"},
}, },
TopLevelPackages: []string{"pygments", "something_else"}, TopLevelPackages: []string{"pygments", "something_else"},
}, },
@ -220,6 +220,11 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
test.ExpectedPackage.Source = append(test.ExpectedPackage.Source, *resolver.recordRef) test.ExpectedPackage.Source = append(test.ExpectedPackage.Source, *resolver.recordRef)
} }
if resolver.topLevelRef != nil {
test.ExpectedPackage.Source = append(test.ExpectedPackage.Source, *resolver.topLevelRef)
}
// end patching expected values with runtime data...
pyPkgCataloger := NewPythonPackageCataloger() pyPkgCataloger := NewPythonPackageCataloger()
actual, err := pyPkgCataloger.catalogEggOrWheel(resolver, *resolver.metadataRef) actual, err := pyPkgCataloger.catalogEggOrWheel(resolver, *resolver.metadataRef)

View File

@ -44,7 +44,7 @@ func parseWheelOrEggRecord(reader io.Reader) ([]pkg.PythonFileRecord, error) {
return nil, fmt.Errorf("unexpected python record digest: %q", item) return nil, fmt.Errorf("unexpected python record digest: %q", item)
} }
record.Digest = pkg.Digest{ record.Digest = &pkg.Digest{
Algorithm: fields[0], Algorithm: fields[0],
Value: fields[1], Value: fields[1],
} }

View File

@ -16,22 +16,22 @@ func TestParseWheelEggRecord(t *testing.T) {
{ {
Fixture: "test-fixtures/egg-info/RECORD", Fixture: "test-fixtures/egg-info/RECORD",
ExpectedMetadata: []pkg.PythonFileRecord{ ExpectedMetadata: []pkg.PythonFileRecord{
{Path: "requests-2.22.0.dist-info/INSTALLER", Digest: pkg.Digest{"sha256", "zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg"}, Size: "4"}, {Path: "requests-2.22.0.dist-info/INSTALLER", Digest: &pkg.Digest{"sha256", "zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg"}, Size: "4"},
{Path: "requests/__init__.py", Digest: pkg.Digest{"sha256", "PnKCgjcTq44LaAMzB-7--B2FdewRrE8F_vjZeaG9NhA"}, Size: "3921"}, {Path: "requests/__init__.py", Digest: &pkg.Digest{"sha256", "PnKCgjcTq44LaAMzB-7--B2FdewRrE8F_vjZeaG9NhA"}, Size: "3921"},
{Path: "requests/__pycache__/__version__.cpython-38.pyc"}, {Path: "requests/__pycache__/__version__.cpython-38.pyc"},
{Path: "requests/__pycache__/utils.cpython-38.pyc"}, {Path: "requests/__pycache__/utils.cpython-38.pyc"},
{Path: "requests/__version__.py", Digest: pkg.Digest{"sha256", "Bm-GFstQaFezsFlnmEMrJDe8JNROz9n2XXYtODdvjjc"}, Size: "436"}, {Path: "requests/__version__.py", Digest: &pkg.Digest{"sha256", "Bm-GFstQaFezsFlnmEMrJDe8JNROz9n2XXYtODdvjjc"}, Size: "436"},
{Path: "requests/utils.py", Digest: pkg.Digest{"sha256", "LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A"}, Size: "30049"}, {Path: "requests/utils.py", Digest: &pkg.Digest{"sha256", "LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A"}, Size: "30049"},
}, },
}, },
{ {
Fixture: "test-fixtures/dist-info/RECORD", Fixture: "test-fixtures/dist-info/RECORD",
ExpectedMetadata: []pkg.PythonFileRecord{ ExpectedMetadata: []pkg.PythonFileRecord{
{Path: "../../../bin/pygmentize", Digest: pkg.Digest{"sha256", "dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8"}, Size: "220"}, {Path: "../../../bin/pygmentize", Digest: &pkg.Digest{"sha256", "dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8"}, Size: "220"},
{Path: "Pygments-2.6.1.dist-info/AUTHORS", Digest: pkg.Digest{"sha256", "PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY"}, Size: "8449"}, {Path: "Pygments-2.6.1.dist-info/AUTHORS", Digest: &pkg.Digest{"sha256", "PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY"}, Size: "8449"},
{Path: "Pygments-2.6.1.dist-info/RECORD"}, {Path: "Pygments-2.6.1.dist-info/RECORD"},
{Path: "pygments/__pycache__/__init__.cpython-38.pyc"}, {Path: "pygments/__pycache__/__init__.cpython-38.pyc"},
{Path: "pygments/util.py", Digest: pkg.Digest{"sha256", "586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA"}, Size: "10778"}, {Path: "pygments/util.py", Digest: &pkg.Digest{"sha256", "586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA"}, Size: "10778"},
}, },
}, },
} }

View File

@ -8,8 +8,8 @@ type Digest struct {
// PythonFileRecord represents a single entry within a RECORD file for a python wheel or egg package // PythonFileRecord represents a single entry within a RECORD file for a python wheel or egg package
type PythonFileRecord struct { type PythonFileRecord struct {
Path string `json:"path"` Path string `json:"path"`
Digest Digest `json:"digest"` Digest *Digest `json:"digest,omitempty"`
Size string `json:"size"` Size string `json:"size,omitempty"`
} }
// PythonPackageMetadata represents all captured data for a python egg or wheel package. // PythonPackageMetadata represents all captured data for a python egg or wheel package.