From 7d55bca0a0cbaee89bf11c29a8fce16201df1229 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Thu, 22 Oct 2020 13:50:24 -0400 Subject: [PATCH] allow for python metadata fields to be optional Signed-off-by: Alex Goodman --- schema/json/schema.json | 9 ++ syft/cataloger/python/package_cataloger.go | 104 +++++++++++------- .../python/package_cataloger_test.go | 19 ++-- .../python/parse_wheel_egg_record.go | 2 +- .../python/parse_wheel_egg_record_test.go | 14 +-- syft/pkg/python_package_metadata.go | 6 +- 6 files changed, 97 insertions(+), 57 deletions(-) diff --git a/schema/json/schema.json b/schema/json/schema.json index 83d0a2508..440b271fc 100644 --- a/schema/json/schema.json +++ b/schema/json/schema.json @@ -491,6 +491,9 @@ "release": { "type": "string" }, + "sitePackagesRootPath": { + "type": "string" + }, "size": { "type": "integer" }, @@ -500,6 +503,12 @@ "sourceRpm": { "type": "string" }, + "topLevelPackages": { + "items": { + "type": "string" + }, + "type": "array" + }, "url": { "type": "string" }, diff --git a/syft/cataloger/python/package_cataloger.go b/syft/cataloger/python/package_cataloger.go index cb67a2f76..eaf76abe1 100644 --- a/syft/cataloger/python/package_cataloger.go +++ b/syft/cataloger/python/package_cataloger.go @@ -25,10 +25,12 @@ func NewPythonPackageCataloger() *PackageCataloger { return &PackageCataloger{} } +// Name returns a string that uniquely describes a cataloger func (c *PackageCataloger) Name() string { return "python-package-cataloger" } +// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing python egg and wheel installations. func (c *PackageCataloger) Catalog(resolver scope.Resolver) ([]pkg.Package, error) { // nolint:prealloc var fileMatches []file.Reference @@ -54,19 +56,33 @@ func (c *PackageCataloger) Catalog(resolver scope.Resolver) ([]pkg.Package, erro return pkgs, nil } -func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver scope.Resolver, metadataRef file.Reference) (*pkg.PythonPackageMetadata, []file.Reference, error) { - var sources = []file.Reference{metadataRef} - - metadataContents, err := resolver.FileContentsByRef(metadataRef) +// catalogEggOrWheel takes the primary metadata file reference and returns the python package it represents. +func (c *PackageCataloger) catalogEggOrWheel(resolver scope.Resolver, metadataRef file.Reference) (*pkg.Package, error) { + metadata, sources, err := c.assembleEggOrWheelMetadata(resolver, metadataRef) if err != nil { - return nil, nil, err + return nil, err } - metadata, err := parseWheelOrEggMetadata(metadataRef.Path, strings.NewReader(metadataContents)) - if err != nil { - return nil, nil, err + var licenses []string + if metadata.License != "" { + licenses = []string{metadata.License} } + return &pkg.Package{ + Name: metadata.Name, + Version: metadata.Version, + FoundBy: c.Name(), + Source: sources, + Licenses: licenses, + Language: pkg.Python, + Type: pkg.PythonPkg, + MetadataType: pkg.PythonPackageMetadataType, + Metadata: *metadata, + }, nil +} + +// fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained. +func (c *PackageCataloger) fetchRecordFiles(resolver scope.Resolver, metadataRef file.Reference) (files []pkg.PythonFileRecord, sources []file.Reference, err error) { // we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory // or for an image... for an image the METADATA file may be present within multiple layers, so it is important // to reconcile the RECORD path to the same layer (or the next adjacent lower layer). @@ -92,10 +108,13 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver scope.Resolver, m return nil, nil, err } - // append the record files list to the metadata - metadata.Files = records + files = append(files, records...) } + return files, sources, nil +} +// fetchTopLevelPackages finds a corresponding top_level.txt file for the given python package metadata file and returns the set of package names contained. +func (c *PackageCataloger) fetchTopLevelPackages(resolver scope.Resolver, metadataRef file.Reference) (pkgs []string, sources []file.Reference, err error) { // a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages parentDir := filepath.Dir(string(metadataRef.Path)) topLevelPath := filepath.Join(parentDir, "top_level.txt") @@ -107,47 +126,54 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver scope.Resolver, m return nil, nil, fmt.Errorf("missing python package top_level.txt (package=%q)", string(metadataRef.Path)) } + sources = append(sources, *topLevelRef) + topLevelContents, err := resolver.FileContentsByRef(*topLevelRef) if err != nil { return nil, nil, err } - // nolint:prealloc - var topLevelPackages []string + scanner := bufio.NewScanner(strings.NewReader(topLevelContents)) for scanner.Scan() { - topLevelPackages = append(topLevelPackages, scanner.Text()) + pkgs = append(pkgs, scanner.Text()) } if err := scanner.Err(); err != nil { return nil, nil, fmt.Errorf("could not read python package top_level.txt: %w", err) } - metadata.TopLevelPackages = topLevelPackages + return pkgs, sources, nil +} + +// assembleEggOrWheelMetadata discovers and accumulates python package metadata from multiple file sources and returns a single metadata object as well as a list of files where the metadata was derived from. +func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver scope.Resolver, metadataRef file.Reference) (*pkg.PythonPackageMetadata, []file.Reference, error) { + var sources = []file.Reference{metadataRef} + + metadataContents, err := resolver.FileContentsByRef(metadataRef) + if err != nil { + return nil, nil, err + } + + metadata, err := parseWheelOrEggMetadata(metadataRef.Path, strings.NewReader(metadataContents)) + if err != nil { + return nil, nil, err + } + + // attach any python files found for the given wheel/egg installation + r, s, err := c.fetchRecordFiles(resolver, metadataRef) + if err != nil { + return nil, nil, err + } + sources = append(sources, s...) + metadata.Files = r + + // attach any top-level package names found for the given wheel/egg installation + p, s, err := c.fetchTopLevelPackages(resolver, metadataRef) + if err != nil { + return nil, nil, err + } + sources = append(sources, s...) + metadata.TopLevelPackages = p return &metadata, sources, nil } - -func (c *PackageCataloger) catalogEggOrWheel(resolver scope.Resolver, metadataRef file.Reference) (*pkg.Package, error) { - - metadata, sources, err := c.assembleEggOrWheelMetadata(resolver, metadataRef) - if err != nil { - return nil, err - } - - var licenses []string - if metadata.License != "" { - licenses = []string{metadata.License} - } - - return &pkg.Package{ - Name: metadata.Name, - Version: metadata.Version, - FoundBy: c.Name(), - Source: sources, - Licenses: licenses, - Language: pkg.Python, - Type: pkg.PythonPkg, - MetadataType: pkg.PythonPackageMetadataType, - Metadata: *metadata, - }, nil -} diff --git a/syft/cataloger/python/package_cataloger_test.go b/syft/cataloger/python/package_cataloger_test.go index 63001a041..a3f10091e 100644 --- a/syft/cataloger/python/package_cataloger_test.go +++ b/syft/cataloger/python/package_cataloger_test.go @@ -142,12 +142,12 @@ func TestPythonPackageWheelCataloger(t *testing.T) { AuthorEmail: "me@kennethreitz.org", SitePackagesRootPath: "test-fixtures", Files: []pkg.PythonFileRecord{ - {Path: "requests-2.22.0.dist-info/INSTALLER", Digest: pkg.Digest{"sha256", "zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg"}, Size: "4"}, - {Path: "requests/__init__.py", Digest: pkg.Digest{"sha256", "PnKCgjcTq44LaAMzB-7--B2FdewRrE8F_vjZeaG9NhA"}, Size: "3921"}, + {Path: "requests-2.22.0.dist-info/INSTALLER", Digest: &pkg.Digest{"sha256", "zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg"}, Size: "4"}, + {Path: "requests/__init__.py", Digest: &pkg.Digest{"sha256", "PnKCgjcTq44LaAMzB-7--B2FdewRrE8F_vjZeaG9NhA"}, Size: "3921"}, {Path: "requests/__pycache__/__version__.cpython-38.pyc"}, {Path: "requests/__pycache__/utils.cpython-38.pyc"}, - {Path: "requests/__version__.py", Digest: pkg.Digest{"sha256", "Bm-GFstQaFezsFlnmEMrJDe8JNROz9n2XXYtODdvjjc"}, Size: "436"}, - {Path: "requests/utils.py", Digest: pkg.Digest{"sha256", "LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A"}, Size: "30049"}, + {Path: "requests/__version__.py", Digest: &pkg.Digest{"sha256", "Bm-GFstQaFezsFlnmEMrJDe8JNROz9n2XXYtODdvjjc"}, Size: "436"}, + {Path: "requests/utils.py", Digest: &pkg.Digest{"sha256", "LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A"}, Size: "30049"}, }, TopLevelPackages: []string{"requests"}, }, @@ -174,11 +174,11 @@ func TestPythonPackageWheelCataloger(t *testing.T) { AuthorEmail: "georg@python.org", SitePackagesRootPath: "test-fixtures", Files: []pkg.PythonFileRecord{ - {Path: "../../../bin/pygmentize", Digest: pkg.Digest{"sha256", "dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8"}, Size: "220"}, - {Path: "Pygments-2.6.1.dist-info/AUTHORS", Digest: pkg.Digest{"sha256", "PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY"}, Size: "8449"}, + {Path: "../../../bin/pygmentize", Digest: &pkg.Digest{"sha256", "dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8"}, Size: "220"}, + {Path: "Pygments-2.6.1.dist-info/AUTHORS", Digest: &pkg.Digest{"sha256", "PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY"}, Size: "8449"}, {Path: "Pygments-2.6.1.dist-info/RECORD"}, {Path: "pygments/__pycache__/__init__.cpython-38.pyc"}, - {Path: "pygments/util.py", Digest: pkg.Digest{"sha256", "586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA"}, Size: "10778"}, + {Path: "pygments/util.py", Digest: &pkg.Digest{"sha256", "586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA"}, Size: "10778"}, }, TopLevelPackages: []string{"pygments", "something_else"}, }, @@ -220,6 +220,11 @@ func TestPythonPackageWheelCataloger(t *testing.T) { test.ExpectedPackage.Source = append(test.ExpectedPackage.Source, *resolver.recordRef) } + if resolver.topLevelRef != nil { + test.ExpectedPackage.Source = append(test.ExpectedPackage.Source, *resolver.topLevelRef) + } + // end patching expected values with runtime data... + pyPkgCataloger := NewPythonPackageCataloger() actual, err := pyPkgCataloger.catalogEggOrWheel(resolver, *resolver.metadataRef) diff --git a/syft/cataloger/python/parse_wheel_egg_record.go b/syft/cataloger/python/parse_wheel_egg_record.go index 5a25c7e7f..42faafa3f 100644 --- a/syft/cataloger/python/parse_wheel_egg_record.go +++ b/syft/cataloger/python/parse_wheel_egg_record.go @@ -44,7 +44,7 @@ func parseWheelOrEggRecord(reader io.Reader) ([]pkg.PythonFileRecord, error) { return nil, fmt.Errorf("unexpected python record digest: %q", item) } - record.Digest = pkg.Digest{ + record.Digest = &pkg.Digest{ Algorithm: fields[0], Value: fields[1], } diff --git a/syft/cataloger/python/parse_wheel_egg_record_test.go b/syft/cataloger/python/parse_wheel_egg_record_test.go index 515ffdf9b..d14868e0f 100644 --- a/syft/cataloger/python/parse_wheel_egg_record_test.go +++ b/syft/cataloger/python/parse_wheel_egg_record_test.go @@ -16,22 +16,22 @@ func TestParseWheelEggRecord(t *testing.T) { { Fixture: "test-fixtures/egg-info/RECORD", ExpectedMetadata: []pkg.PythonFileRecord{ - {Path: "requests-2.22.0.dist-info/INSTALLER", Digest: pkg.Digest{"sha256", "zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg"}, Size: "4"}, - {Path: "requests/__init__.py", Digest: pkg.Digest{"sha256", "PnKCgjcTq44LaAMzB-7--B2FdewRrE8F_vjZeaG9NhA"}, Size: "3921"}, + {Path: "requests-2.22.0.dist-info/INSTALLER", Digest: &pkg.Digest{"sha256", "zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg"}, Size: "4"}, + {Path: "requests/__init__.py", Digest: &pkg.Digest{"sha256", "PnKCgjcTq44LaAMzB-7--B2FdewRrE8F_vjZeaG9NhA"}, Size: "3921"}, {Path: "requests/__pycache__/__version__.cpython-38.pyc"}, {Path: "requests/__pycache__/utils.cpython-38.pyc"}, - {Path: "requests/__version__.py", Digest: pkg.Digest{"sha256", "Bm-GFstQaFezsFlnmEMrJDe8JNROz9n2XXYtODdvjjc"}, Size: "436"}, - {Path: "requests/utils.py", Digest: pkg.Digest{"sha256", "LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A"}, Size: "30049"}, + {Path: "requests/__version__.py", Digest: &pkg.Digest{"sha256", "Bm-GFstQaFezsFlnmEMrJDe8JNROz9n2XXYtODdvjjc"}, Size: "436"}, + {Path: "requests/utils.py", Digest: &pkg.Digest{"sha256", "LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A"}, Size: "30049"}, }, }, { Fixture: "test-fixtures/dist-info/RECORD", ExpectedMetadata: []pkg.PythonFileRecord{ - {Path: "../../../bin/pygmentize", Digest: pkg.Digest{"sha256", "dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8"}, Size: "220"}, - {Path: "Pygments-2.6.1.dist-info/AUTHORS", Digest: pkg.Digest{"sha256", "PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY"}, Size: "8449"}, + {Path: "../../../bin/pygmentize", Digest: &pkg.Digest{"sha256", "dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8"}, Size: "220"}, + {Path: "Pygments-2.6.1.dist-info/AUTHORS", Digest: &pkg.Digest{"sha256", "PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY"}, Size: "8449"}, {Path: "Pygments-2.6.1.dist-info/RECORD"}, {Path: "pygments/__pycache__/__init__.cpython-38.pyc"}, - {Path: "pygments/util.py", Digest: pkg.Digest{"sha256", "586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA"}, Size: "10778"}, + {Path: "pygments/util.py", Digest: &pkg.Digest{"sha256", "586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA"}, Size: "10778"}, }, }, } diff --git a/syft/pkg/python_package_metadata.go b/syft/pkg/python_package_metadata.go index 8fe5340b6..637e6220c 100644 --- a/syft/pkg/python_package_metadata.go +++ b/syft/pkg/python_package_metadata.go @@ -7,9 +7,9 @@ type Digest struct { // PythonFileRecord represents a single entry within a RECORD file for a python wheel or egg package type PythonFileRecord struct { - Path string `json:"path"` - Digest Digest `json:"digest"` - Size string `json:"size"` + Path string `json:"path"` + Digest *Digest `json:"digest,omitempty"` + Size string `json:"size,omitempty"` } // PythonPackageMetadata represents all captured data for a python egg or wheel package.