add test coverage for python pacakge cataloger and update catalog interface

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2020-10-22 09:50:31 -04:00
parent 7fc926d40d
commit 1414d1fbc3
No known key found for this signature in database
GPG Key ID: 5CB45AE22BAB7EA7
30 changed files with 596 additions and 156 deletions

View File

@ -43,6 +43,9 @@
"author": { "author": {
"type": "string" "type": "string"
}, },
"authorEmail": {
"type": "string"
},
"description": { "description": {
"type": "string" "type": "string"
}, },
@ -65,6 +68,21 @@
"checksum": { "checksum": {
"type": "string" "type": "string"
}, },
"digest": {
"properties": {
"algorithm": {
"type": "string"
},
"value": {
"type": "string"
}
},
"required": [
"algorithm",
"value"
],
"type": "object"
},
"ownerGid": { "ownerGid": {
"type": "string" "type": "string"
}, },
@ -76,14 +94,13 @@
}, },
"permissions": { "permissions": {
"type": "string" "type": "string"
},
"size": {
"type": "string"
} }
}, },
"required": [ "required": [
"checksum", "path"
"ownerGid",
"ownerUid",
"path",
"permissions"
], ],
"type": "object" "type": "object"
} }
@ -403,6 +420,9 @@
], ],
"type": "object" "type": "object"
}, },
"metadataType": {
"type": "string"
},
"sources": { "sources": {
"type": "null" "type": "null"
}, },
@ -419,6 +439,7 @@
"licenses", "licenses",
"manifest", "manifest",
"metadata", "metadata",
"metadataType",
"sources", "sources",
"type", "type",
"version" "version"
@ -427,6 +448,9 @@
} }
] ]
}, },
"platform": {
"type": "string"
},
"pomProperties": { "pomProperties": {
"properties": { "properties": {
"Path": { "Path": {

View File

@ -164,6 +164,7 @@ func TestMultiplePackages(t *testing.T) {
Version: "0.7.2-r0", Version: "0.7.2-r0",
Licenses: []string{"BSD"}, Licenses: []string{"BSD"},
Type: pkg.ApkPkg, Type: pkg.ApkPkg,
MetadataType: pkg.ApkMetadataType,
Metadata: pkg.ApkMetadata{ Metadata: pkg.ApkMetadata{
Package: "libc-utils", Package: "libc-utils",
OriginPackage: "libc-dev", OriginPackage: "libc-dev",
@ -186,6 +187,7 @@ func TestMultiplePackages(t *testing.T) {
Version: "1.1.24-r2", Version: "1.1.24-r2",
Licenses: []string{"MIT", "BSD", "GPL2+"}, Licenses: []string{"MIT", "BSD", "GPL2+"},
Type: pkg.ApkPkg, Type: pkg.ApkPkg,
MetadataType: pkg.ApkMetadataType,
Metadata: pkg.ApkMetadata{ Metadata: pkg.ApkMetadata{
Package: "musl-utils", Package: "musl-utils",
OriginPackage: "musl", OriginPackage: "musl",

View File

@ -10,25 +10,25 @@ import (
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
) )
type testResolver struct { type testResolverMock struct {
contents map[file.Reference]string contents map[file.Reference]string
} }
func newTestResolver() *testResolver { func newTestResolver() *testResolverMock {
return &testResolver{ return &testResolverMock{
contents: make(map[file.Reference]string), contents: make(map[file.Reference]string),
} }
} }
func (r *testResolver) FileContentsByRef(_ file.Reference) (string, error) { func (r *testResolverMock) FileContentsByRef(_ file.Reference) (string, error) {
return "", fmt.Errorf("not implemented") return "", fmt.Errorf("not implemented")
} }
func (r *testResolver) MultipleFileContentsByRef(_ ...file.Reference) (map[file.Reference]string, error) { func (r *testResolverMock) MultipleFileContentsByRef(_ ...file.Reference) (map[file.Reference]string, error) {
return r.contents, nil return r.contents, nil
} }
func (r *testResolver) FilesByPath(paths ...file.Path) ([]file.Reference, error) { func (r *testResolverMock) FilesByPath(paths ...file.Path) ([]file.Reference, error) {
results := make([]file.Reference, len(paths)) results := make([]file.Reference, len(paths))
for idx, p := range paths { for idx, p := range paths {
@ -39,13 +39,17 @@ func (r *testResolver) FilesByPath(paths ...file.Path) ([]file.Reference, error)
return results, nil return results, nil
} }
func (r *testResolver) FilesByGlob(_ ...string) ([]file.Reference, error) { func (r *testResolverMock) FilesByGlob(_ ...string) ([]file.Reference, error) {
path := "/a-path.txt" path := "/a-path.txt"
ref := file.NewFileReference(file.Path(path)) ref := file.NewFileReference(file.Path(path))
r.contents[ref] = fmt.Sprintf("%s file contents!", path) r.contents[ref] = fmt.Sprintf("%s file contents!", path)
return []file.Reference{ref}, nil return []file.Reference{ref}, nil
} }
func (r *testResolverMock) RelativeFileByPath(_ file.Reference, _ string) (*file.Reference, error) {
return nil, fmt.Errorf("not implemented")
}
func parser(_ string, reader io.Reader) ([]pkg.Package, error) { func parser(_ string, reader io.Reader) ([]pkg.Package, error) {
contents, err := ioutil.ReadAll(reader) contents, err := ioutil.ReadAll(reader)
if err != nil { if err != nil {

View File

@ -141,6 +141,7 @@ func TestParseJar(t *testing.T) {
Version: "1.0-SNAPSHOT", Version: "1.0-SNAPSHOT",
Language: pkg.Java, Language: pkg.Java,
Type: pkg.JenkinsPluginPkg, Type: pkg.JenkinsPluginPkg,
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{ Metadata: pkg.JavaMetadata{
Manifest: &pkg.JavaManifest{ Manifest: &pkg.JavaManifest{
ManifestVersion: "1.0", ManifestVersion: "1.0",
@ -185,6 +186,7 @@ func TestParseJar(t *testing.T) {
Version: "0.1.0", Version: "0.1.0",
Language: pkg.Java, Language: pkg.Java,
Type: pkg.JavaPkg, Type: pkg.JavaPkg,
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{ Metadata: pkg.JavaMetadata{
Manifest: &pkg.JavaManifest{ Manifest: &pkg.JavaManifest{
ManifestVersion: "1.0", ManifestVersion: "1.0",
@ -204,6 +206,7 @@ func TestParseJar(t *testing.T) {
Version: "0.1.0", Version: "0.1.0",
Language: pkg.Java, Language: pkg.Java,
Type: pkg.JavaPkg, Type: pkg.JavaPkg,
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{ Metadata: pkg.JavaMetadata{
Manifest: &pkg.JavaManifest{ Manifest: &pkg.JavaManifest{
ManifestVersion: "1.0", ManifestVersion: "1.0",
@ -228,6 +231,7 @@ func TestParseJar(t *testing.T) {
Version: "2.9.2", Version: "2.9.2",
Language: pkg.Java, Language: pkg.Java,
Type: pkg.JavaPkg, Type: pkg.JavaPkg,
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{ Metadata: pkg.JavaMetadata{
PomProperties: &pkg.PomProperties{ PomProperties: &pkg.PomProperties{
Path: "META-INF/maven/joda-time/joda-time/pom.properties", Path: "META-INF/maven/joda-time/joda-time/pom.properties",

View File

@ -43,8 +43,8 @@ func parsePackageJSON(_ string, reader io.Reader) ([]pkg.Package, error) {
Licenses: []string{p.License}, Licenses: []string{p.License},
Language: pkg.JavaScript, Language: pkg.JavaScript,
Type: pkg.NpmPkg, Type: pkg.NpmPkg,
MetadataType: pkg.NpmPackageJsonMetadataType, MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJsonMetadata{ Metadata: pkg.NpmPackageJSONMetadata{
Author: p.Author, Author: p.Author,
Homepage: p.Homepage, Homepage: p.Homepage,
}, },

View File

@ -15,7 +15,8 @@ func TestParsePackageJSON(t *testing.T) {
Type: pkg.NpmPkg, Type: pkg.NpmPkg,
Licenses: []string{"Artistic-2.0"}, Licenses: []string{"Artistic-2.0"},
Language: pkg.JavaScript, Language: pkg.JavaScript,
Metadata: pkg.NpmPackageJsonMetadata{ MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJSONMetadata{
Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)", Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
Homepage: "https://docs.npmjs.com/", Homepage: "https://docs.npmjs.com/",
}, },

View File

@ -3,6 +3,7 @@ package python
import ( import (
"fmt" "fmt"
"path/filepath" "path/filepath"
"strings"
"github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/file"
@ -11,19 +12,15 @@ import (
"github.com/anchore/syft/syft/scope" "github.com/anchore/syft/syft/scope"
) )
const wheelGlob = "**/*dist-info/METADATA" const (
eggMetadataGlob = "**/*egg-info/PKG-INFO"
wheelMetadataGlob = "**/*dist-info/METADATA"
)
type PackageCataloger struct { type PackageCataloger struct{}
globs []string
}
// NewPythonPackageCataloger returns a new cataloger for python packages within egg or wheel installation directories. // NewPythonPackageCataloger returns a new cataloger for python packages within egg or wheel installation directories.
func NewPythonPackageCataloger() *PackageCataloger { func NewPythonPackageCataloger() *PackageCataloger {
//globParsers := map[string]common.ParserFn{
// "**/*egg-info/PKG-INFO": parseWheelOrEggMetadata,
// "**/*dist-info/METADATA": parseWheelOrEggMetadata,
//}
return &PackageCataloger{} return &PackageCataloger{}
} }
@ -32,33 +29,88 @@ func (c *PackageCataloger) Name() string {
} }
func (c *PackageCataloger) Catalog(resolver scope.Resolver) ([]pkg.Package, error) { func (c *PackageCataloger) Catalog(resolver scope.Resolver) ([]pkg.Package, error) {
return c.catalogWheels(resolver) // nolint:prealloc
} var fileMatches []file.Reference
func (c *PackageCataloger) catalogWheels(resolver scope.Resolver) ([]pkg.Package, error) { for _, glob := range []string{eggMetadataGlob, wheelMetadataGlob} {
fileMatches, err := resolver.FilesByGlob(wheelGlob) matches, err := resolver.FilesByGlob(glob)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to find files by glob: %s", wheelGlob) return nil, fmt.Errorf("failed to find files by glob: %s", glob)
}
fileMatches = append(fileMatches, matches...)
} }
var pkgs []pkg.Package var pkgs []pkg.Package
for _, ref := range fileMatches { for _, ref := range fileMatches {
p, err := c.catalogWheel(resolver, ref) p, err := c.catalogEggOrWheel(resolver, ref)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to catalog python wheel=%+v: %w", ref.Path, err) return nil, fmt.Errorf("unable to catalog python package=%+v: %w", ref.Path, err)
}
if p != nil {
pkgs = append(pkgs, *p)
} }
pkgs = append(pkgs, p)
} }
return pkgs, nil return pkgs, nil
} }
func (c *PackageCataloger) catalogWheel(resolver scope.Resolver, wheelRef file.Reference) (pkg.Package, error) { func (c *PackageCataloger) catalogEggOrWheel(resolver scope.Resolver, metadataRef file.Reference) (*pkg.Package, error) {
var sources = []file.Reference{metadataRef}
metadataContents, err := resolver.FileContentsByRef(metadataRef)
if err != nil {
return nil, err
}
metadata, err := parseWheelOrEggMetadata(strings.NewReader(metadataContents))
if err != nil {
return nil, err
}
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory // we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important // or for an image... for an image the METADATA file may be present within multiple layers, so it is important
// to reconcile the RECORD path to the same layer (or the next adjacent lower layer). // to reconcile the RECORD path to the same layer (or the next adjacent lower layer).
recordPath := filepath.Join(filepath.Dir(string(wheelRef.Path)), "RECORD")
// problem! we don't know which is the right discovered path relative to the given METADATA file! (which layer?)
discoveredPaths, err := resolver.FilesByPath(file.Path(recordPath))
// lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure)
recordPath := filepath.Join(filepath.Dir(string(metadataRef.Path)), "RECORD")
recordRef, err := resolver.RelativeFileByPath(metadataRef, recordPath)
if err != nil {
return nil, err
}
if recordRef != nil {
sources = append(sources, *recordRef)
recordContents, err := resolver.FileContentsByRef(*recordRef)
if err != nil {
return nil, err
}
// parse the record contents
records, err := parseWheelOrEggRecord(strings.NewReader(recordContents))
if err != nil {
return nil, err
}
// append the record files list to the metadata
metadata.Files = records
}
// assemble the package
var licenses []string
if metadata.License != "" {
licenses = []string{metadata.License}
}
return &pkg.Package{
Name: metadata.Name,
Version: metadata.Version,
FoundBy: c.Name(),
Source: sources,
Licenses: licenses,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonPackageMetadataType,
Metadata: metadata,
}, nil
} }

View File

@ -1,45 +1,155 @@
package python package python
import ( import (
"fmt"
"io"
"io/ioutil"
"os" "os"
"testing" "testing"
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/go-test/deep" "github.com/go-test/deep"
) )
func TestPythonPackageCataloger(t *testing.T) { type pythonTestResolverMock struct {
metadataReader io.Reader
recordReader io.Reader
metadataRef *file.Reference
recordRef *file.Reference
contents map[file.Reference]string
}
func newTestResolver(recordReader, metadataReader io.Reader) *pythonTestResolverMock {
var recordRef *file.Reference
if recordReader != nil {
ref := file.NewFileReference("record-path")
recordRef = &ref
}
metadataRef := file.NewFileReference("metadata-path")
return &pythonTestResolverMock{
recordReader: recordReader,
metadataReader: metadataReader,
metadataRef: &metadataRef,
recordRef: recordRef,
contents: make(map[file.Reference]string),
}
}
func (r *pythonTestResolverMock) FileContentsByRef(ref file.Reference) (string, error) {
switch ref.Path {
case r.metadataRef.Path:
b, err := ioutil.ReadAll(r.metadataReader)
if err != nil {
return "", err
}
return string(b), nil
case r.recordRef.Path:
b, err := ioutil.ReadAll(r.recordReader)
if err != nil {
return "", err
}
return string(b), nil
}
return "", fmt.Errorf("invalid value given")
}
func (r *pythonTestResolverMock) MultipleFileContentsByRef(_ ...file.Reference) (map[file.Reference]string, error) {
return nil, fmt.Errorf("not implemented")
}
func (r *pythonTestResolverMock) FilesByPath(_ ...file.Path) ([]file.Reference, error) {
return nil, fmt.Errorf("not implemented")
}
func (r *pythonTestResolverMock) FilesByGlob(_ ...string) ([]file.Reference, error) {
return nil, fmt.Errorf("not implemented")
}
func (r *pythonTestResolverMock) RelativeFileByPath(reference file.Reference, _ string) (*file.Reference, error) {
switch reference.Path {
case r.metadataRef.Path:
return r.recordRef, nil
default:
return nil, fmt.Errorf("invalid value given")
}
}
func TestPythonPackageWheelCataloger(t *testing.T) {
tests := []struct { tests := []struct {
Fixture string MetadataFixture string
ExpectedMetadata []pkg.Package RecordFixture string
ExpectedPackage pkg.Package
}{ }{
{ {
Fixture: "test-fixtures/", MetadataFixture: "test-fixtures/egg-info/PKG-INFO",
ExpectedMetadata: []pkg.Package{ RecordFixture: "test-fixtures/egg-info/RECORD",
{ ExpectedPackage: pkg.Package{
Name: "requests", Name: "requests",
Version: "2.22.0", Version: "2.22.0",
Type: pkg.PythonPkg, Type: pkg.PythonPkg,
Language: pkg.Python, Language: pkg.Python,
Licenses: []string{"Apache 2.0"}, Licenses: []string{"Apache 2.0"},
MetadataType: pkg.PythonEggWheelMetadataType, FoundBy: "python-package-cataloger",
Metadata: pkg.EggWheelMetadata{ MetadataType: pkg.PythonPackageMetadataType,
Metadata: pkg.PythonPackageMetadata{
Name: "requests", Name: "requests",
Version: "2.22.0", Version: "2.22.0",
License: "Apache 2.0", License: "Apache 2.0",
Platform: "UNKNOWN", Platform: "UNKNOWN",
Author: "Kenneth Reitz", Author: "Kenneth Reitz",
AuthorEmail: "me@kennethreitz.org", AuthorEmail: "me@kennethreitz.org",
Files: []pkg.PythonFileRecord{
{Path: "requests-2.22.0.dist-info/INSTALLER", Digest: pkg.Digest{"sha256", "zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg"}, Size: "4"},
{Path: "requests/__init__.py", Digest: pkg.Digest{"sha256", "PnKCgjcTq44LaAMzB-7--B2FdewRrE8F_vjZeaG9NhA"}, Size: "3921"},
{Path: "requests/__pycache__/__version__.cpython-38.pyc"},
{Path: "requests/__pycache__/utils.cpython-38.pyc"},
{Path: "requests/__version__.py", Digest: pkg.Digest{"sha256", "Bm-GFstQaFezsFlnmEMrJDe8JNROz9n2XXYtODdvjjc"}, Size: "436"},
{Path: "requests/utils.py", Digest: pkg.Digest{"sha256", "LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A"}, Size: "30049"},
},
},
}, },
}, },
{ {
MetadataFixture: "test-fixtures/dist-info/METADATA",
RecordFixture: "test-fixtures/dist-info/RECORD",
ExpectedPackage: pkg.Package{
Name: "Pygments", Name: "Pygments",
Version: "2.6.1", Version: "2.6.1",
Type: pkg.PythonPkg, Type: pkg.PythonPkg,
Language: pkg.Python, Language: pkg.Python,
Licenses: []string{"BSD License"}, Licenses: []string{"BSD License"},
MetadataType: pkg.PythonEggWheelMetadataType, FoundBy: "python-package-cataloger",
Metadata: pkg.EggWheelMetadata{ MetadataType: pkg.PythonPackageMetadataType,
Metadata: pkg.PythonPackageMetadata{
Name: "Pygments",
Version: "2.6.1",
License: "BSD License",
Platform: "any",
Author: "Georg Brandl",
AuthorEmail: "georg@python.org",
Files: []pkg.PythonFileRecord{
{Path: "../../../bin/pygmentize", Digest: pkg.Digest{"sha256", "dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8"}, Size: "220"},
{Path: "Pygments-2.6.1.dist-info/AUTHORS", Digest: pkg.Digest{"sha256", "PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY"}, Size: "8449"},
{Path: "Pygments-2.6.1.dist-info/RECORD"},
{Path: "pygments/__pycache__/__init__.cpython-38.pyc"},
{Path: "pygments/util.py", Digest: pkg.Digest{"sha256", "586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA"}, Size: "10778"},
},
},
},
},
{
// in casses where the metadata file is available and the record is not we should still record there is a package
MetadataFixture: "test-fixtures/partial.dist-info/METADATA",
ExpectedPackage: pkg.Package{
Name: "Pygments",
Version: "2.6.1",
Type: pkg.PythonPkg,
Language: pkg.Python,
Licenses: []string{"BSD License"},
FoundBy: "python-package-cataloger",
MetadataType: pkg.PythonPackageMetadataType,
Metadata: pkg.PythonPackageMetadata{
Name: "Pygments", Name: "Pygments",
Version: "2.6.1", Version: "2.6.1",
License: "BSD License", License: "BSD License",
@ -49,22 +159,39 @@ func TestPythonPackageCataloger(t *testing.T) {
}, },
}, },
}, },
},
} }
for _, test := range tests { for _, test := range tests {
t.Run(test.Fixture, func(t *testing.T) { t.Run(test.MetadataFixture, func(t *testing.T) {
fixture, err := os.Open(test.Fixture) metadata, err := os.Open(test.MetadataFixture)
if err != nil { if err != nil {
t.Fatalf("failed to open fixture: %+v", err) t.Fatalf("failed to open record: %+v", err)
} }
actual, err := parseWheelOrEggMetadata(fixture.Name(), fixture) var record io.Reader
if test.RecordFixture != "" {
record, err = os.Open(test.RecordFixture)
if err != nil { if err != nil {
t.Fatalf("failed to parse python package: %+v", err) t.Fatalf("failed to open record: %+v", err)
}
} }
for _, d := range deep.Equal(actual, &test.ExpectedMetadata) { resolver := newTestResolver(record, metadata)
// note that the source is the record ref created by the resolver mock... attach the expected values
test.ExpectedPackage.Source = []file.Reference{*resolver.metadataRef}
if resolver.recordRef != nil {
test.ExpectedPackage.Source = append(test.ExpectedPackage.Source, *resolver.recordRef)
}
pyPkgCataloger := NewPythonPackageCataloger()
actual, err := pyPkgCataloger.catalogEggOrWheel(resolver, *resolver.metadataRef)
if err != nil {
t.Fatalf("failed to catalog python package: %+v", err)
}
for _, d := range deep.Equal(actual, &test.ExpectedPackage) {
t.Errorf("diff: %+v", d) t.Errorf("diff: %+v", d)
} }
}) })

View File

@ -37,14 +37,12 @@ func TestParseRequirementsTxt(t *testing.T) {
Version: "1.0.0", Version: "1.0.0",
Language: pkg.Python, Language: pkg.Python,
Type: pkg.PythonPkg, Type: pkg.PythonPkg,
Licenses: []string{},
}, },
"flask": { "flask": {
Name: "flask", Name: "flask",
Version: "4.0.0", Version: "4.0.0",
Language: pkg.Python, Language: pkg.Python,
Type: pkg.PythonPkg, Type: pkg.PythonPkg,
Licenses: []string{},
}, },
} }
fixture, err := os.Open("test-fixtures/requires/requirements.txt") fixture, err := os.Open("test-fixtures/requires/requirements.txt")

View File

@ -14,35 +14,30 @@ func TestParseSetup(t *testing.T) {
Version: "2.2.0", Version: "2.2.0",
Language: pkg.Python, Language: pkg.Python,
Type: pkg.PythonPkg, Type: pkg.PythonPkg,
Licenses: []string{},
}, },
"mypy": { "mypy": {
Name: "mypy", Name: "mypy",
Version: "v0.770", Version: "v0.770",
Language: pkg.Python, Language: pkg.Python,
Type: pkg.PythonPkg, Type: pkg.PythonPkg,
Licenses: []string{},
}, },
"mypy1": { "mypy1": {
Name: "mypy1", Name: "mypy1",
Version: "v0.770", Version: "v0.770",
Language: pkg.Python, Language: pkg.Python,
Type: pkg.PythonPkg, Type: pkg.PythonPkg,
Licenses: []string{},
}, },
"mypy2": { "mypy2": {
Name: "mypy2", Name: "mypy2",
Version: "v0.770", Version: "v0.770",
Language: pkg.Python, Language: pkg.Python,
Type: pkg.PythonPkg, Type: pkg.PythonPkg,
Licenses: []string{},
}, },
"mypy3": { "mypy3": {
Name: "mypy3", Name: "mypy3",
Version: "v0.770", Version: "v0.770",
Language: pkg.Python, Language: pkg.Python,
Type: pkg.PythonPkg, Type: pkg.PythonPkg,
Licenses: []string{},
}, },
} }
fixture, err := os.Open("test-fixtures/setup/setup.py") fixture, err := os.Open("test-fixtures/setup/setup.py")

View File

@ -13,7 +13,7 @@ import (
// parseWheelOrEggMetadata takes a Python Egg or Wheel (which share the same format and values for our purposes), // parseWheelOrEggMetadata takes a Python Egg or Wheel (which share the same format and values for our purposes),
// returning all Python packages listed. // returning all Python packages listed.
func parseWheelOrEggMetadata(_ string, reader io.Reader) (*pkg.EggWheelMetadata, error) { func parseWheelOrEggMetadata(reader io.Reader) (pkg.PythonPackageMetadata, error) {
fields := make(map[string]string) fields := make(map[string]string)
var key string var key string
@ -35,12 +35,12 @@ func parseWheelOrEggMetadata(_ string, reader io.Reader) (*pkg.EggWheelMetadata,
case strings.HasPrefix(line, " "): case strings.HasPrefix(line, " "):
// a field-body continuation // a field-body continuation
if len(key) == 0 { if len(key) == 0 {
return nil, fmt.Errorf("no match for continuation: line: '%s'", line) return pkg.PythonPackageMetadata{}, fmt.Errorf("no match for continuation: line: '%s'", line)
} }
val, ok := fields[key] val, ok := fields[key]
if !ok { if !ok {
return nil, fmt.Errorf("no previous key exists, expecting: %s", key) return pkg.PythonPackageMetadata{}, fmt.Errorf("no previous key exists, expecting: %s", key)
} }
// concatenate onto previous value // concatenate onto previous value
val = fmt.Sprintf("%s\n %s", val, strings.TrimSpace(line)) val = fmt.Sprintf("%s\n %s", val, strings.TrimSpace(line))
@ -48,25 +48,25 @@ func parseWheelOrEggMetadata(_ string, reader io.Reader) (*pkg.EggWheelMetadata,
default: default:
// parse a new key (note, duplicate keys are overridden) // parse a new key (note, duplicate keys are overridden)
if i := strings.Index(line, ":"); i > 0 { if i := strings.Index(line, ":"); i > 0 {
key = strings.TrimSpace(line[0:i]) // mapstruct cannot map keys with dashes, and we are expected to persist the "Author-email" field
key = strings.ReplaceAll(strings.TrimSpace(line[0:i]), "-", "")
val := strings.TrimSpace(line[i+1:]) val := strings.TrimSpace(line[i+1:])
fields[key] = val fields[key] = val
} else { } else {
return nil, fmt.Errorf("cannot parse field from line: '%s'", line) return pkg.PythonPackageMetadata{}, fmt.Errorf("cannot parse field from line: '%s'", line)
} }
} }
} }
if err := scanner.Err(); err != nil { if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("failed to parse python wheel/egg: %w", err) return pkg.PythonPackageMetadata{}, fmt.Errorf("failed to parse python wheel/egg: %w", err)
} }
var metadata pkg.EggWheelMetadata var metadata pkg.PythonPackageMetadata
if err := mapstructure.Decode(fields, &metadata); err != nil { if err := mapstructure.Decode(fields, &metadata); err != nil {
return nil, fmt.Errorf("unable to parse APK metadata: %w", err) return pkg.PythonPackageMetadata{}, fmt.Errorf("unable to parse APK metadata: %w", err)
} }
return &metadata, nil return metadata, nil
} }

View File

@ -8,14 +8,14 @@ import (
"github.com/go-test/deep" "github.com/go-test/deep"
) )
func TestParseEggMetadata(t *testing.T) { func TestParseWheelEggMetadata(t *testing.T) {
tests := []struct { tests := []struct {
Fixture string Fixture string
ExpectedMetadata pkg.EggWheelMetadata ExpectedMetadata pkg.PythonPackageMetadata
}{ }{
{ {
Fixture: "test-fixtures/egg-info/PKG-INFO", Fixture: "test-fixtures/egg-info/PKG-INFO",
ExpectedMetadata: pkg.EggWheelMetadata{ ExpectedMetadata: pkg.PythonPackageMetadata{
Name: "requests", Name: "requests",
Version: "2.22.0", Version: "2.22.0",
License: "Apache 2.0", License: "Apache 2.0",
@ -26,7 +26,7 @@ func TestParseEggMetadata(t *testing.T) {
}, },
{ {
Fixture: "test-fixtures/dist-info/METADATA", Fixture: "test-fixtures/dist-info/METADATA",
ExpectedMetadata: pkg.EggWheelMetadata{ ExpectedMetadata: pkg.PythonPackageMetadata{
Name: "Pygments", Name: "Pygments",
Version: "2.6.1", Version: "2.6.1",
License: "BSD License", License: "BSD License",
@ -44,12 +44,12 @@ func TestParseEggMetadata(t *testing.T) {
t.Fatalf("failed to open fixture: %+v", err) t.Fatalf("failed to open fixture: %+v", err)
} }
actual, err := parseWheelOrEggMetadata(fixture.Name(), fixture) actual, err := parseWheelOrEggMetadata(fixture)
if err != nil { if err != nil {
t.Fatalf("failed to parse egg-info: %+v", err) t.Fatalf("failed to parse: %+v", err)
} }
for _, d := range deep.Equal(actual, &test.ExpectedMetadata) { for _, d := range deep.Equal(actual, test.ExpectedMetadata) {
t.Errorf("diff: %+v", d) t.Errorf("diff: %+v", d)
} }
}) })

View File

@ -0,0 +1,60 @@
package python
import (
"encoding/csv"
"fmt"
"io"
"strings"
"github.com/anchore/syft/syft/pkg"
)
// parseWheelOrEggMetadata takes a Python Egg or Wheel (which share the same format and values for our purposes),
// returning all Python packages listed.
func parseWheelOrEggRecord(reader io.Reader) ([]pkg.PythonFileRecord, error) {
var records []pkg.PythonFileRecord
r := csv.NewReader(reader)
for {
recordList, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
return nil, fmt.Errorf("unable to read python record file: %w", err)
}
if len(recordList) != 3 {
return nil, fmt.Errorf("python record an unexpected length=%d: %q", len(recordList), recordList)
}
var record pkg.PythonFileRecord
for idx, item := range recordList {
switch idx {
case 0:
record.Path = item
case 1:
if item == "" {
continue
}
fields := strings.Split(item, "=")
if len(fields) != 2 {
return nil, fmt.Errorf("unexpected python record digest: %q", item)
}
record.Digest = pkg.Digest{
Algorithm: fields[0],
Value: fields[1],
}
case 2:
record.Size = item
}
}
records = append(records, record)
}
return records, nil
}

View File

@ -0,0 +1,57 @@
package python
import (
"os"
"testing"
"github.com/anchore/syft/syft/pkg"
"github.com/go-test/deep"
)
func TestParseWheelEggRecord(t *testing.T) {
tests := []struct {
Fixture string
ExpectedMetadata []pkg.PythonFileRecord
}{
{
Fixture: "test-fixtures/egg-info/RECORD",
ExpectedMetadata: []pkg.PythonFileRecord{
{Path: "requests-2.22.0.dist-info/INSTALLER", Digest: pkg.Digest{"sha256", "zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg"}, Size: "4"},
{Path: "requests/__init__.py", Digest: pkg.Digest{"sha256", "PnKCgjcTq44LaAMzB-7--B2FdewRrE8F_vjZeaG9NhA"}, Size: "3921"},
{Path: "requests/__pycache__/__version__.cpython-38.pyc"},
{Path: "requests/__pycache__/utils.cpython-38.pyc"},
{Path: "requests/__version__.py", Digest: pkg.Digest{"sha256", "Bm-GFstQaFezsFlnmEMrJDe8JNROz9n2XXYtODdvjjc"}, Size: "436"},
{Path: "requests/utils.py", Digest: pkg.Digest{"sha256", "LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A"}, Size: "30049"},
},
},
{
Fixture: "test-fixtures/dist-info/RECORD",
ExpectedMetadata: []pkg.PythonFileRecord{
{Path: "../../../bin/pygmentize", Digest: pkg.Digest{"sha256", "dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8"}, Size: "220"},
{Path: "Pygments-2.6.1.dist-info/AUTHORS", Digest: pkg.Digest{"sha256", "PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY"}, Size: "8449"},
{Path: "Pygments-2.6.1.dist-info/RECORD"},
{Path: "pygments/__pycache__/__init__.cpython-38.pyc"},
{Path: "pygments/util.py", Digest: pkg.Digest{"sha256", "586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA"}, Size: "10778"},
},
},
}
for _, test := range tests {
t.Run(test.Fixture, func(t *testing.T) {
fixture, err := os.Open(test.Fixture)
if err != nil {
t.Fatalf("failed to open fixture: %+v", err)
}
actual, err := parseWheelOrEggRecord(fixture)
if err != nil {
t.Fatalf("failed to parse: %+v", err)
}
for _, d := range deep.Equal(actual, test.ExpectedMetadata) {
t.Errorf("diff: %+v", d)
}
})
}
}

View File

@ -0,0 +1,5 @@
../../../bin/pygmentize,sha256=dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8,220
Pygments-2.6.1.dist-info/AUTHORS,sha256=PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY,8449
Pygments-2.6.1.dist-info/RECORD,,
pygments/__pycache__/__init__.cpython-38.pyc,,
pygments/util.py,sha256=586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA,10778

View File

@ -0,0 +1,6 @@
requests-2.22.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
requests/__init__.py,sha256=PnKCgjcTq44LaAMzB-7--B2FdewRrE8F_vjZeaG9NhA,3921
requests/__pycache__/__version__.cpython-38.pyc,,
requests/__pycache__/utils.cpython-38.pyc,,
requests/__version__.py,sha256=Bm-GFstQaFezsFlnmEMrJDe8JNROz9n2XXYtODdvjjc,436
requests/utils.py,sha256=LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A,30049

View File

@ -0,0 +1,47 @@
Metadata-Version: 2.1
Name: Pygments
Version: 2.6.1
Summary: Pygments is a syntax highlighting package written in Python.
Home-page: https://pygments.org/
Author: Georg Brandl
Author-email: georg@python.org
License: BSD License
Keywords: syntax highlighting
Platform: any
Classifier: License :: OSI Approved :: BSD License
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: End Users/Desktop
Classifier: Intended Audience :: System Administrators
Classifier: Development Status :: 6 - Mature
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.5
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Operating System :: OS Independent
Classifier: Topic :: Text Processing :: Filters
Classifier: Topic :: Utilities
Requires-Python: >=3.5
Pygments
~~~~~~~~
Pygments is a syntax highlighting package written in Python.
It is a generic syntax highlighter suitable for use in code hosting, forums,
wikis or other applications that need to prettify source code. Highlights
are:
* a wide range of over 500 languages and other text formats is supported
* special attention is paid to details, increasing quality by a fair amount
* support for new languages and formats are added easily
* a number of output formats, presently HTML, LaTeX, RTF, SVG, all image formats that PIL supports and ANSI sequences
* it is usable as a command-line tool and as a library
:copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.

View File

@ -14,6 +14,7 @@ func TestParseRpmDB(t *testing.T) {
Name: "dive", Name: "dive",
Version: "0.9.2-1", Version: "0.9.2-1",
Type: pkg.RpmPkg, Type: pkg.RpmPkg,
MetadataType: pkg.RpmdbMetadataType,
Metadata: pkg.RpmdbMetadata{ Metadata: pkg.RpmdbMetadata{
Name: "dive", Name: "dive",
Epoch: 0, Epoch: 0,

View File

@ -15,6 +15,7 @@ func TestParseGemspec(t *testing.T) {
Type: pkg.GemPkg, Type: pkg.GemPkg,
Licenses: []string{"MIT"}, Licenses: []string{"MIT"},
Language: pkg.Ruby, Language: pkg.Ruby,
MetadataType: pkg.GemMetadataType,
Metadata: pkg.GemMetadata{ Metadata: pkg.GemMetadata{
Name: "bundler", Name: "bundler",
Version: "2.1.4", Version: "2.1.4",

View File

@ -1,11 +0,0 @@
package pkg
// EggWheelMetadata represents all captured data for a python egg or wheel package.
type EggWheelMetadata struct {
Name string `json:"name" mapstruct:"Name"`
Version string `json:"version" mapstruct:"Version"`
License string `json:"license" mapstruct:"License"`
Author string `json:"author" mapstruct:"Author"`
AuthorEmail string `json:"authorEmail" mapstruct:"Author-email"`
Platform string `json:"platform" mapstruct:"Platform"`
}

View File

@ -8,7 +8,7 @@ const (
DpkgMetadataType MetadataType = "dpkg-metadata" DpkgMetadataType MetadataType = "dpkg-metadata"
GemMetadataType MetadataType = "gem-metadata" GemMetadataType MetadataType = "gem-metadata"
JavaMetadataType MetadataType = "java-metadata" JavaMetadataType MetadataType = "java-metadata"
NpmPackageJsonMetadataType MetadataType = "npm-package-json-metadata" NpmPackageJSONMetadataType MetadataType = "npm-package-json-metadata"
RpmdbMetadataType MetadataType = "rpmdb-metadata" RpmdbMetadataType MetadataType = "rpmdb-metadata"
PythonEggWheelMetadataType MetadataType = "python-egg-wheel-metadata" PythonPackageMetadataType MetadataType = "python-package-metadata"
) )

View File

@ -1,7 +1,7 @@
package pkg package pkg
// NpmPackageJsonMetadata holds extra information that is used in pkg.Package // NpmPackageJSONMetadata holds extra information that is used in pkg.Package
type NpmPackageJsonMetadata struct { type NpmPackageJSONMetadata struct {
Name string `mapstructure:"name" json:"name"` Name string `mapstructure:"name" json:"name"`
Version string `mapstructure:"version" json:"version"` Version string `mapstructure:"version" json:"version"`
Files []string `mapstructure:"files" json:"files"` Files []string `mapstructure:"files" json:"files"`

View File

@ -0,0 +1,23 @@
package pkg
type Digest struct {
Algorithm string `json:"algorithm"`
Value string `json:"value"`
}
type PythonFileRecord struct {
Path string `json:"path"`
Digest Digest `json:"digest"`
Size string `json:"size"`
}
// PythonPackageMetadata represents all captured data for a python egg or wheel package.
type PythonPackageMetadata struct {
Name string `json:"name" mapstruct:"Name"`
Version string `json:"version" mapstruct:"Version"`
License string `json:"license" mapstruct:"License"`
Author string `json:"author" mapstruct:"Author"`
AuthorEmail string `json:"authorEmail" mapstruct:"Authoremail"`
Platform string `json:"platform" mapstruct:"Platform"`
Files []PythonFileRecord `json:"files,omitempty"`
}

View File

@ -23,8 +23,13 @@ type ContentResolver interface {
// FileResolver knows how to get file.References for given string paths and globs // FileResolver knows how to get file.References for given string paths and globs
type FileResolver interface { type FileResolver interface {
// FilesByPath fetches a set of file references which have the given path (for an image, there may be multiple matches)
FilesByPath(paths ...file.Path) ([]file.Reference, error) FilesByPath(paths ...file.Path) ([]file.Reference, error)
// FilesByGlob fetches a set of file references which the given glob matches
FilesByGlob(patterns ...string) ([]file.Reference, error) FilesByGlob(patterns ...string) ([]file.Reference, error)
// RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference.
// This is helpful when attempting to find a file that is in the same layer or lower as another file.
RelativeFileByPath(reference file.Reference, path string) (*file.Reference, error)
} }
// getImageResolver returns the appropriate resolve for a container image given the scope option // getImageResolver returns the appropriate resolve for a container image given the scope option

View File

@ -109,6 +109,15 @@ func (r *AllLayersResolver) FilesByGlob(patterns ...string) ([]file.Reference, e
return uniqueFiles, nil return uniqueFiles, nil
} }
func (r *AllLayersResolver) RelativeFileByPath(reference file.Reference, path string) (*file.Reference, error) {
entry, err := r.img.FileCatalog.Get(reference)
if err != nil {
return nil, err
}
return entry.Source.SquashedTree.File(file.Path(path)), nil
}
// MultipleFileContentsByRef returns the file contents for all file.References relative to the image. Note that a // MultipleFileContentsByRef returns the file contents for all file.References relative to the image. Note that a
// file.Reference is a path relative to a particular layer. // file.Reference is a path relative to a particular layer.
func (r *AllLayersResolver) MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]string, error) { func (r *AllLayersResolver) MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]string, error) {

View File

@ -75,6 +75,18 @@ func (s DirectoryResolver) FilesByGlob(patterns ...string) ([]file.Reference, er
return result, nil return result, nil
} }
func (s *DirectoryResolver) RelativeFileByPath(_ file.Reference, path string) (*file.Reference, error) {
paths, err := s.FilesByPath(file.Path(path))
if err != nil {
return nil, err
}
if len(paths) == 0 {
return nil, nil
}
return &paths[0], nil
}
// MultipleFileContentsByRef returns the file contents for all file.References relative a directory. // MultipleFileContentsByRef returns the file contents for all file.References relative a directory.
func (s DirectoryResolver) MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]string, error) { func (s DirectoryResolver) MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]string, error) {
refContents := make(map[file.Reference]string) refContents := make(map[file.Reference]string)
@ -91,10 +103,10 @@ func (s DirectoryResolver) MultipleFileContentsByRef(f ...file.Reference) (map[f
// FileContentsByRef fetches file contents for a single file reference relative to a directory. // FileContentsByRef fetches file contents for a single file reference relative to a directory.
// If the path does not exist an error is returned. // If the path does not exist an error is returned.
func (s DirectoryResolver) FileContentsByRef(ref file.Reference) (string, error) { func (s DirectoryResolver) FileContentsByRef(reference file.Reference) (string, error) {
contents, err := fileContents(ref.Path) contents, err := fileContents(reference.Path)
if err != nil { if err != nil {
return "", fmt.Errorf("could not read contents of file: %s", ref.Path) return "", fmt.Errorf("could not read contents of file: %s", reference.Path)
} }
return string(contents), nil return string(contents), nil

View File

@ -73,6 +73,18 @@ func (r *ImageSquashResolver) FilesByGlob(patterns ...string) ([]file.Reference,
return uniqueFiles, nil return uniqueFiles, nil
} }
func (r *ImageSquashResolver) RelativeFileByPath(reference file.Reference, path string) (*file.Reference, error) {
paths, err := r.FilesByPath(file.Path(path))
if err != nil {
return nil, err
}
if len(paths) == 0 {
return nil, nil
}
return &paths[0], nil
}
// MultipleFileContentsByRef returns the file contents for all file.References relative to the image. Note that a // MultipleFileContentsByRef returns the file contents for all file.References relative to the image. Note that a
// file.Reference is a path relative to a particular layer, in this case only from the squashed representation. // file.Reference is a path relative to a particular layer, in this case only from the squashed representation.
func (r *ImageSquashResolver) MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]string, error) { func (r *ImageSquashResolver) MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]string, error) {

View File

@ -68,6 +68,7 @@ func TestPkgCoverageImage(t *testing.T) {
} }
if pkgCount != len(c.pkgInfo) { if pkgCount != len(c.pkgInfo) {
t.Logf("Discovered packages of type %+v", c.pkgType)
for a := range catalog.Enumerate(c.pkgType) { for a := range catalog.Enumerate(c.pkgType) {
t.Log(" ", a) t.Log(" ", a)
} }

View File

@ -0,0 +1,5 @@
../../../bin/pygmentize,sha256=dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8,220
Pygments-2.6.1.dist-info/AUTHORS,sha256=PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY,8449
Pygments-2.6.1.dist-info/RECORD,,
pygments/__pycache__/__init__.cpython-38.pyc,,
pygments/util.py,sha256=586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA,10778