add top_level.txt processing to python package cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2020-10-22 12:39:03 -04:00
parent 1414d1fbc3
commit 2e5ff4a995
No known key found for this signature in database
GPG Key ID: 5CB45AE22BAB7EA7
15 changed files with 220 additions and 96 deletions

View File

@ -1,6 +1,7 @@
package python
import (
"bufio"
"fmt"
"path/filepath"
"strings"
@ -53,17 +54,17 @@ func (c *PackageCataloger) Catalog(resolver scope.Resolver) ([]pkg.Package, erro
return pkgs, nil
}
func (c *PackageCataloger) catalogEggOrWheel(resolver scope.Resolver, metadataRef file.Reference) (*pkg.Package, error) {
func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver scope.Resolver, metadataRef file.Reference) (*pkg.PythonPackageMetadata, []file.Reference, error) {
var sources = []file.Reference{metadataRef}
metadataContents, err := resolver.FileContentsByRef(metadataRef)
if err != nil {
return nil, err
return nil, nil, err
}
metadata, err := parseWheelOrEggMetadata(strings.NewReader(metadataContents))
metadata, err := parseWheelOrEggMetadata(metadataRef.Path, strings.NewReader(metadataContents))
if err != nil {
return nil, err
return nil, nil, err
}
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
@ -74,7 +75,7 @@ func (c *PackageCataloger) catalogEggOrWheel(resolver scope.Resolver, metadataRe
recordPath := filepath.Join(filepath.Dir(string(metadataRef.Path)), "RECORD")
recordRef, err := resolver.RelativeFileByPath(metadataRef, recordPath)
if err != nil {
return nil, err
return nil, nil, err
}
if recordRef != nil {
@ -82,20 +83,56 @@ func (c *PackageCataloger) catalogEggOrWheel(resolver scope.Resolver, metadataRe
recordContents, err := resolver.FileContentsByRef(*recordRef)
if err != nil {
return nil, err
return nil, nil, err
}
// parse the record contents
records, err := parseWheelOrEggRecord(strings.NewReader(recordContents))
if err != nil {
return nil, err
return nil, nil, err
}
// append the record files list to the metadata
metadata.Files = records
}
// assemble the package
// a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages
parentDir := filepath.Dir(string(metadataRef.Path))
topLevelPath := filepath.Join(parentDir, "top_level.txt")
topLevelRef, err := resolver.RelativeFileByPath(metadataRef, topLevelPath)
if err != nil {
return nil, nil, err
}
if topLevelRef == nil {
return nil, nil, fmt.Errorf("missing python package top_level.txt (package=%q)", string(metadataRef.Path))
}
topLevelContents, err := resolver.FileContentsByRef(*topLevelRef)
if err != nil {
return nil, nil, err
}
// nolint:prealloc
var topLevelPackages []string
scanner := bufio.NewScanner(strings.NewReader(topLevelContents))
for scanner.Scan() {
topLevelPackages = append(topLevelPackages, scanner.Text())
}
if err := scanner.Err(); err != nil {
return nil, nil, fmt.Errorf("could not read python package top_level.txt: %w", err)
}
metadata.TopLevelPackages = topLevelPackages
return &metadata, sources, nil
}
func (c *PackageCataloger) catalogEggOrWheel(resolver scope.Resolver, metadataRef file.Reference) (*pkg.Package, error) {
metadata, sources, err := c.assembleEggOrWheelMetadata(resolver, metadataRef)
if err != nil {
return nil, err
}
var licenses []string
if metadata.License != "" {
@ -111,6 +148,6 @@ func (c *PackageCataloger) catalogEggOrWheel(resolver scope.Resolver, metadataRe
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonPackageMetadataType,
Metadata: metadata,
Metadata: *metadata,
}, nil
}

View File

@ -5,6 +5,7 @@ import (
"io"
"io/ioutil"
"os"
"strings"
"testing"
"github.com/anchore/stereoscope/pkg/file"
@ -16,29 +17,65 @@ import (
type pythonTestResolverMock struct {
metadataReader io.Reader
recordReader io.Reader
topLevelReader io.Reader
metadataRef *file.Reference
recordRef *file.Reference
topLevelRef *file.Reference
contents map[file.Reference]string
}
func newTestResolver(recordReader, metadataReader io.Reader) *pythonTestResolverMock {
func newTestResolver(metaPath, recordPath, topPath string) *pythonTestResolverMock {
metadataReader, err := os.Open(metaPath)
if err != nil {
panic(fmt.Errorf("failed to open metadata: %+v", err))
}
var recordReader io.Reader
if recordPath != "" {
recordReader, err = os.Open(recordPath)
if err != nil {
panic(fmt.Errorf("failed to open record: %+v", err))
}
}
var topLevelReader io.Reader
if topPath != "" {
topLevelReader, err = os.Open(topPath)
if err != nil {
panic(fmt.Errorf("failed to open top level: %+v", err))
}
}
var recordRef *file.Reference
if recordReader != nil {
ref := file.NewFileReference("record-path")
ref := file.NewFileReference("test-fixtures/dist-info/RECORD")
recordRef = &ref
}
metadataRef := file.NewFileReference("metadata-path")
var topLevelRef *file.Reference
if topLevelReader != nil {
ref := file.NewFileReference("test-fixtures/dist-info/top_level.txt")
topLevelRef = &ref
}
metadataRef := file.NewFileReference("test-fixtures/dist-info/METADATA")
return &pythonTestResolverMock{
recordReader: recordReader,
metadataReader: metadataReader,
topLevelReader: topLevelReader,
metadataRef: &metadataRef,
recordRef: recordRef,
topLevelRef: topLevelRef,
contents: make(map[file.Reference]string),
}
}
func (r *pythonTestResolverMock) FileContentsByRef(ref file.Reference) (string, error) {
switch ref.Path {
case r.topLevelRef.Path:
b, err := ioutil.ReadAll(r.topLevelReader)
if err != nil {
return "", err
}
return string(b), nil
case r.metadataRef.Path:
b, err := ioutil.ReadAll(r.metadataReader)
if err != nil {
@ -66,12 +103,14 @@ func (r *pythonTestResolverMock) FilesByPath(_ ...file.Path) ([]file.Reference,
func (r *pythonTestResolverMock) FilesByGlob(_ ...string) ([]file.Reference, error) {
return nil, fmt.Errorf("not implemented")
}
func (r *pythonTestResolverMock) RelativeFileByPath(reference file.Reference, _ string) (*file.Reference, error) {
switch reference.Path {
case r.metadataRef.Path:
func (r *pythonTestResolverMock) RelativeFileByPath(_ file.Reference, path string) (*file.Reference, error) {
switch {
case strings.Contains(path, "RECORD"):
return r.recordRef, nil
case strings.Contains(path, "top_level.txt"):
return r.topLevelRef, nil
default:
return nil, fmt.Errorf("invalid value given")
return nil, fmt.Errorf("invalid RelativeFileByPath value given: %q", path)
}
}
@ -79,11 +118,13 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
tests := []struct {
MetadataFixture string
RecordFixture string
TopLevelFixture string
ExpectedPackage pkg.Package
}{
{
MetadataFixture: "test-fixtures/egg-info/PKG-INFO",
RecordFixture: "test-fixtures/egg-info/RECORD",
TopLevelFixture: "test-fixtures/egg-info/top_level.txt",
ExpectedPackage: pkg.Package{
Name: "requests",
Version: "2.22.0",
@ -99,6 +140,7 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
Platform: "UNKNOWN",
Author: "Kenneth Reitz",
AuthorEmail: "me@kennethreitz.org",
SitePackagesRootPath: "test-fixtures",
Files: []pkg.PythonFileRecord{
{Path: "requests-2.22.0.dist-info/INSTALLER", Digest: pkg.Digest{"sha256", "zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg"}, Size: "4"},
{Path: "requests/__init__.py", Digest: pkg.Digest{"sha256", "PnKCgjcTq44LaAMzB-7--B2FdewRrE8F_vjZeaG9NhA"}, Size: "3921"},
@ -107,12 +149,14 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
{Path: "requests/__version__.py", Digest: pkg.Digest{"sha256", "Bm-GFstQaFezsFlnmEMrJDe8JNROz9n2XXYtODdvjjc"}, Size: "436"},
{Path: "requests/utils.py", Digest: pkg.Digest{"sha256", "LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A"}, Size: "30049"},
},
TopLevelPackages: []string{"requests"},
},
},
},
{
MetadataFixture: "test-fixtures/dist-info/METADATA",
RecordFixture: "test-fixtures/dist-info/RECORD",
TopLevelFixture: "test-fixtures/dist-info/top_level.txt",
ExpectedPackage: pkg.Package{
Name: "Pygments",
Version: "2.6.1",
@ -128,6 +172,7 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
Platform: "any",
Author: "Georg Brandl",
AuthorEmail: "georg@python.org",
SitePackagesRootPath: "test-fixtures",
Files: []pkg.PythonFileRecord{
{Path: "../../../bin/pygmentize", Digest: pkg.Digest{"sha256", "dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8"}, Size: "220"},
{Path: "Pygments-2.6.1.dist-info/AUTHORS", Digest: pkg.Digest{"sha256", "PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY"}, Size: "8449"},
@ -135,12 +180,15 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
{Path: "pygments/__pycache__/__init__.cpython-38.pyc"},
{Path: "pygments/util.py", Digest: pkg.Digest{"sha256", "586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA"}, Size: "10778"},
},
TopLevelPackages: []string{"pygments", "something_else"},
},
},
},
{
// in casses where the metadata file is available and the record is not we should still record there is a package
// in cases where the metadata file is available and the record is not we should still record there is a package
// additionally empty top_level.txt files should not result in an error
MetadataFixture: "test-fixtures/partial.dist-info/METADATA",
TopLevelFixture: "test-fixtures/partial.dist-info/top_level.txt",
ExpectedPackage: pkg.Package{
Name: "Pygments",
Version: "2.6.1",
@ -156,6 +204,7 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
Platform: "any",
Author: "Georg Brandl",
AuthorEmail: "georg@python.org",
SitePackagesRootPath: "test-fixtures",
},
},
},
@ -163,20 +212,7 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
for _, test := range tests {
t.Run(test.MetadataFixture, func(t *testing.T) {
metadata, err := os.Open(test.MetadataFixture)
if err != nil {
t.Fatalf("failed to open record: %+v", err)
}
var record io.Reader
if test.RecordFixture != "" {
record, err = os.Open(test.RecordFixture)
if err != nil {
t.Fatalf("failed to open record: %+v", err)
}
}
resolver := newTestResolver(record, metadata)
resolver := newTestResolver(test.MetadataFixture, test.RecordFixture, test.TopLevelFixture)
// note that the source is the record ref created by the resolver mock... attach the expected values
test.ExpectedPackage.Source = []file.Reference{*resolver.metadataRef}

View File

@ -4,8 +4,11 @@ import (
"bufio"
"fmt"
"io"
"path/filepath"
"strings"
"github.com/anchore/stereoscope/pkg/file"
"github.com/mitchellh/mapstructure"
"github.com/anchore/syft/syft/pkg"
@ -13,7 +16,7 @@ import (
// parseWheelOrEggMetadata takes a Python Egg or Wheel (which share the same format and values for our purposes),
// returning all Python packages listed.
func parseWheelOrEggMetadata(reader io.Reader) (pkg.PythonPackageMetadata, error) {
func parseWheelOrEggMetadata(path file.Path, reader io.Reader) (pkg.PythonPackageMetadata, error) {
fields := make(map[string]string)
var key string
@ -68,5 +71,10 @@ func parseWheelOrEggMetadata(reader io.Reader) (pkg.PythonPackageMetadata, error
return pkg.PythonPackageMetadata{}, fmt.Errorf("unable to parse APK metadata: %w", err)
}
// add additional metadata not stored in the egg/wheel metadata file
sitePackagesRoot := filepath.Clean(filepath.Join(filepath.Dir(string(path)), ".."))
metadata.SitePackagesRootPath = sitePackagesRoot
return metadata, nil
}

View File

@ -4,6 +4,8 @@ import (
"os"
"testing"
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/syft/syft/pkg"
"github.com/go-test/deep"
)
@ -22,6 +24,7 @@ func TestParseWheelEggMetadata(t *testing.T) {
Platform: "UNKNOWN",
Author: "Kenneth Reitz",
AuthorEmail: "me@kennethreitz.org",
SitePackagesRootPath: "test-fixtures",
},
},
{
@ -33,6 +36,7 @@ func TestParseWheelEggMetadata(t *testing.T) {
Platform: "any",
Author: "Georg Brandl",
AuthorEmail: "georg@python.org",
SitePackagesRootPath: "test-fixtures",
},
},
}
@ -44,7 +48,7 @@ func TestParseWheelEggMetadata(t *testing.T) {
t.Fatalf("failed to open fixture: %+v", err)
}
actual, err := parseWheelOrEggMetadata(fixture)
actual, err := parseWheelOrEggMetadata(file.Path(test.Fixture), fixture)
if err != nil {
t.Fatalf("failed to parse: %+v", err)
}

View File

@ -0,0 +1,2 @@
pygments
something_else

View File

@ -0,0 +1 @@
requests

View File

@ -5,6 +5,7 @@ type Digest struct {
Value string `json:"value"`
}
// PythonFileRecord represents a single entry within a RECORD file for a python wheel or egg package
type PythonFileRecord struct {
Path string `json:"path"`
Digest Digest `json:"digest"`
@ -20,4 +21,6 @@ type PythonPackageMetadata struct {
AuthorEmail string `json:"authorEmail" mapstruct:"Authoremail"`
Platform string `json:"platform" mapstruct:"Platform"`
Files []PythonFileRecord `json:"files,omitempty"`
SitePackagesRootPath string `json:"sitePackagesRootPath"`
TopLevelPackages []string `json:"topLevelPackages,omitempty"`
}

View File

@ -5,6 +5,7 @@ import (
"io/ioutil"
"os"
"path"
"path/filepath"
"github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/syft/internal/log"
@ -18,7 +19,7 @@ type DirectoryResolver struct {
// Stringer to represent a directory path data source
func (s DirectoryResolver) String() string {
return fmt.Sprintf("dir://%s", s.Path)
return fmt.Sprintf("dir:%s", s.Path)
}
// FilesByPath returns all file.References that match the given paths from the directory.
@ -26,15 +27,19 @@ func (s DirectoryResolver) FilesByPath(userPaths ...file.Path) ([]file.Reference
var references = make([]file.Reference, 0)
for _, userPath := range userPaths {
resolvedPath := path.Join(s.Path, string(userPath))
_, err := os.Stat(resolvedPath)
userStrPath := string(userPath)
if filepath.IsAbs(userStrPath) {
// a path relative to root should be prefixed with the resolvers directory path, otherwise it should be left as is
userStrPath = path.Join(s.Path, userStrPath)
}
_, err := os.Stat(userStrPath)
if os.IsNotExist(err) {
continue
} else if err != nil {
log.Errorf("path (%s) is not valid: %v", resolvedPath, err)
log.Errorf("path (%s) is not valid: %v", userStrPath, err)
}
filePath := file.Path(resolvedPath)
references = append(references, file.NewFileReference(filePath))
references = append(references, file.NewFileReference(file.Path(userStrPath)))
}
return references, nil

View File

@ -1,7 +1,6 @@
package resolvers
import (
"path"
"testing"
"github.com/anchore/stereoscope/pkg/file"
@ -10,24 +9,49 @@ import (
func TestDirectoryResolver_FilesByPath(t *testing.T) {
cases := []struct {
name string
root string
input string
expected string
refCount int
}{
{
name: "finds a file",
input: "image-symlinks/file-1.txt",
name: "finds a file (relative)",
root: "./test-fixtures/",
input: "test-fixtures/image-symlinks/file-1.txt",
expected: "test-fixtures/image-symlinks/file-1.txt",
refCount: 1,
},
{
name: "managed non-existing files",
input: "image-symlinks/bogus.txt",
name: "finds a file with relative indirection",
root: "./test-fixtures/../test-fixtures",
input: "test-fixtures/image-symlinks/file-1.txt",
expected: "test-fixtures/image-symlinks/file-1.txt",
refCount: 1,
},
{
// note: this is asserting the old behavior is not supported
name: "relative lookup with wrong path fails",
root: "./test-fixtures/",
input: "image-symlinks/file-1.txt",
refCount: 0,
},
{
name: "managed non-existing files (relative)",
root: "./test-fixtures/",
input: "test-fixtures/image-symlinks/bogus.txt",
refCount: 0,
},
{
name: "finds a file (absolute)",
root: "./test-fixtures/",
input: "/image-symlinks/file-1.txt",
expected: "test-fixtures/image-symlinks/file-1.txt",
refCount: 1,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
resolver := DirectoryResolver{"test-fixtures"}
expected := path.Join("test-fixtures", c.input)
resolver := DirectoryResolver{c.root}
refs, err := resolver.FilesByPath(file.Path(c.input))
if err != nil {
t.Fatalf("could not use resolver: %+v, %+v", err, refs)
@ -38,8 +62,8 @@ func TestDirectoryResolver_FilesByPath(t *testing.T) {
}
for _, actual := range refs {
if actual.Path != file.Path(expected) {
t.Errorf("bad resolve path: '%s'!='%s'", actual.Path, c.input)
if actual.Path != file.Path(c.expected) {
t.Errorf("bad resolve path: '%s'!='%s'", actual.Path, c.expected)
}
}
})
@ -54,17 +78,17 @@ func TestDirectoryResolver_MultipleFilesByPath(t *testing.T) {
}{
{
name: "finds multiple files",
input: []file.Path{file.Path("image-symlinks/file-1.txt"), file.Path("image-symlinks/file-2.txt")},
input: []file.Path{file.Path("test-fixtures/image-symlinks/file-1.txt"), file.Path("test-fixtures/image-symlinks/file-2.txt")},
refCount: 2,
},
{
name: "skips non-existing files",
input: []file.Path{file.Path("image-symlinks/bogus.txt"), file.Path("image-symlinks/file-1.txt")},
input: []file.Path{file.Path("test-fixtures/image-symlinks/bogus.txt"), file.Path("test-fixtures/image-symlinks/file-1.txt")},
refCount: 1,
},
{
name: "does not return anything for non-existing directories",
input: []file.Path{file.Path("non-existing/bogus.txt"), file.Path("non-existing/file-1.txt")},
input: []file.Path{file.Path("test-fixtures/non-existing/bogus.txt"), file.Path("test-fixtures/non-existing/file-1.txt")},
refCount: 0,
},
}
@ -93,17 +117,17 @@ func TestDirectoryResolver_MultipleFileContentsByRef(t *testing.T) {
}{
{
name: "gets multiple file contents",
input: []file.Path{file.Path("image-symlinks/file-1.txt"), file.Path("image-symlinks/file-2.txt")},
input: []file.Path{file.Path("test-fixtures/image-symlinks/file-1.txt"), file.Path("test-fixtures/image-symlinks/file-2.txt")},
refCount: 2,
},
{
name: "skips non-existing files",
input: []file.Path{file.Path("image-symlinks/bogus.txt"), file.Path("image-symlinks/file-1.txt")},
input: []file.Path{file.Path("test-fixtures/image-symlinks/bogus.txt"), file.Path("test-fixtures/image-symlinks/file-1.txt")},
refCount: 1,
},
{
name: "does not return anything for non-existing directories",
input: []file.Path{file.Path("non-existing/bogus.txt"), file.Path("non-existing/file-1.txt")},
input: []file.Path{file.Path("test-fixtures/non-existing/bogus.txt"), file.Path("test-fixtures/non-existing/file-1.txt")},
refCount: 0,
},
}

View File

@ -61,13 +61,13 @@ func TestDirectoryScope(t *testing.T) {
{
desc: "path detected",
input: "test-fixtures",
inputPaths: []file.Path{file.Path("path-detected")},
inputPaths: []file.Path{file.Path("test-fixtures/path-detected")},
expRefs: 1,
},
{
desc: "no files-by-path detected",
input: "test-fixtures",
inputPaths: []file.Path{file.Path("no-path-detected")},
inputPaths: []file.Path{file.Path("test-fixtures/no-path-detected")},
expRefs: 0,
},
}
@ -105,13 +105,13 @@ func TestMultipleFileContentsByRefContents(t *testing.T) {
{
input: "test-fixtures/path-detected",
desc: "empty file",
path: "empty",
path: "test-fixtures/path-detected/empty",
expected: "",
},
{
input: "test-fixtures/path-detected",
desc: "file has contents",
path: ".vimrc",
path: "test-fixtures/path-detected/.vimrc",
expected: "\" A .vimrc file\n",
},
}
@ -127,7 +127,7 @@ func TestMultipleFileContentsByRefContents(t *testing.T) {
}
if len(refs) != 1 {
t.Errorf("expected a single ref to be generated but got: %d", len(refs))
t.Fatalf("expected a single ref to be generated but got: %d", len(refs))
}
ref := refs[0]

View File

@ -0,0 +1 @@
top-level-pkg

View File

@ -0,0 +1 @@
top-level-pkg