diff --git a/Makefile b/Makefile index 04b3913c4..8871c8965 100644 --- a/Makefile +++ b/Makefile @@ -122,7 +122,7 @@ validate-cyclonedx-schema: .PHONY: unit unit: fixtures ## Run unit tests (with coverage) $(call title,Running unit tests) - go test -coverprofile $(COVER_REPORT) ./... + go test -coverprofile $(COVER_REPORT) $(shell go list ./... | grep -v anchore/syft/test) @go tool cover -func $(COVER_REPORT) | grep total | awk '{print substr($$3, 1, length($$3)-1)}' > $(COVER_TOTAL) @echo "Coverage: $$(cat $(COVER_TOTAL))" @if [ $$(echo "$$(cat $(COVER_TOTAL)) >= $(COVERAGE_THRESHOLD)" | bc -l) -ne 1 ]; then echo "$(RED)$(BOLD)Failed coverage quality gate (> $(COVERAGE_THRESHOLD)%)$(RESET)" && false; fi diff --git a/schema/json/schema.json b/schema/json/schema.json index dde279232..440b271fc 100644 --- a/schema/json/schema.json +++ b/schema/json/schema.json @@ -43,6 +43,9 @@ "author": { "type": "string" }, + "authorEmail": { + "type": "string" + }, "description": { "type": "string" }, @@ -65,6 +68,21 @@ "checksum": { "type": "string" }, + "digest": { + "properties": { + "algorithm": { + "type": "string" + }, + "value": { + "type": "string" + } + }, + "required": [ + "algorithm", + "value" + ], + "type": "object" + }, "ownerGid": { "type": "string" }, @@ -76,14 +94,13 @@ }, "permissions": { "type": "string" + }, + "size": { + "type": "string" } }, "required": [ - "checksum", - "ownerGid", - "ownerUid", - "path", - "permissions" + "path" ], "type": "object" } @@ -403,6 +420,9 @@ ], "type": "object" }, + "metadataType": { + "type": "string" + }, "sources": { "type": "null" }, @@ -419,6 +439,7 @@ "licenses", "manifest", "metadata", + "metadataType", "sources", "type", "version" @@ -427,6 +448,9 @@ } ] }, + "platform": { + "type": "string" + }, "pomProperties": { "properties": { "Path": { @@ -467,6 +491,9 @@ "release": { "type": "string" }, + "sitePackagesRootPath": { + "type": "string" + }, "size": { "type": "integer" }, @@ -476,6 +503,12 @@ "sourceRpm": { "type": "string" }, + "topLevelPackages": { + "items": { + "type": "string" + }, + "type": "array" + }, "url": { "type": "string" }, diff --git a/syft/cataloger/apkdb/parse_apk_db.go b/syft/cataloger/apkdb/parse_apk_db.go index 7cc9ec16e..3ce1f921d 100644 --- a/syft/cataloger/apkdb/parse_apk_db.go +++ b/syft/cataloger/apkdb/parse_apk_db.go @@ -49,11 +49,12 @@ func parseApkDB(_ string, reader io.Reader) ([]pkg.Package, error) { } if metadata != nil { packages = append(packages, pkg.Package{ - Name: metadata.Package, - Version: metadata.Version, - Licenses: strings.Split(metadata.License, " "), - Type: pkg.ApkPkg, - Metadata: *metadata, + Name: metadata.Package, + Version: metadata.Version, + Licenses: strings.Split(metadata.License, " "), + Type: pkg.ApkPkg, + MetadataType: pkg.ApkMetadataType, + Metadata: *metadata, }) } } diff --git a/syft/cataloger/apkdb/parse_apk_db_test.go b/syft/cataloger/apkdb/parse_apk_db_test.go index 3b22cc11d..c8846ac24 100644 --- a/syft/cataloger/apkdb/parse_apk_db_test.go +++ b/syft/cataloger/apkdb/parse_apk_db_test.go @@ -160,10 +160,11 @@ func TestMultiplePackages(t *testing.T) { fixture: "test-fixtures/multiple", expected: []pkg.Package{ { - Name: "libc-utils", - Version: "0.7.2-r0", - Licenses: []string{"BSD"}, - Type: pkg.ApkPkg, + Name: "libc-utils", + Version: "0.7.2-r0", + Licenses: []string{"BSD"}, + Type: pkg.ApkPkg, + MetadataType: pkg.ApkMetadataType, Metadata: pkg.ApkMetadata{ Package: "libc-utils", OriginPackage: "libc-dev", @@ -182,10 +183,11 @@ func TestMultiplePackages(t *testing.T) { }, }, { - Name: "musl-utils", - Version: "1.1.24-r2", - Licenses: []string{"MIT", "BSD", "GPL2+"}, - Type: pkg.ApkPkg, + Name: "musl-utils", + Version: "1.1.24-r2", + Licenses: []string{"MIT", "BSD", "GPL2+"}, + Type: pkg.ApkPkg, + MetadataType: pkg.ApkMetadataType, Metadata: pkg.ApkMetadata{ Package: "musl-utils", OriginPackage: "musl", diff --git a/syft/cataloger/cataloger.go b/syft/cataloger/cataloger.go index e10de959b..0ee9f5be2 100644 --- a/syft/cataloger/cataloger.go +++ b/syft/cataloger/cataloger.go @@ -32,7 +32,7 @@ type Cataloger interface { func ImageCatalogers() []Cataloger { return []Cataloger{ ruby.NewGemSpecCataloger(), - python.NewPythonCataloger(), // TODO: split and replace me + python.NewPythonPackageCataloger(), javascript.NewJavascriptPackageCataloger(), deb.NewDpkgdbCataloger(), rpmdb.NewRpmdbCataloger(), @@ -46,7 +46,8 @@ func ImageCatalogers() []Cataloger { func DirectoryCatalogers() []Cataloger { return []Cataloger{ ruby.NewGemFileLockCataloger(), - python.NewPythonCataloger(), // TODO: split and replace me + python.NewPythonIndexCataloger(), + python.NewPythonPackageCataloger(), javascript.NewJavascriptLockCataloger(), deb.NewDpkgdbCataloger(), rpmdb.NewRpmdbCataloger(), diff --git a/syft/cataloger/common/generic_cataloger_test.go b/syft/cataloger/common/generic_cataloger_test.go index 39298ec71..6083d390a 100644 --- a/syft/cataloger/common/generic_cataloger_test.go +++ b/syft/cataloger/common/generic_cataloger_test.go @@ -10,25 +10,25 @@ import ( "github.com/anchore/syft/syft/pkg" ) -type testResolver struct { +type testResolverMock struct { contents map[file.Reference]string } -func newTestResolver() *testResolver { - return &testResolver{ +func newTestResolver() *testResolverMock { + return &testResolverMock{ contents: make(map[file.Reference]string), } } -func (r *testResolver) FileContentsByRef(_ file.Reference) (string, error) { +func (r *testResolverMock) FileContentsByRef(_ file.Reference) (string, error) { return "", fmt.Errorf("not implemented") } -func (r *testResolver) MultipleFileContentsByRef(_ ...file.Reference) (map[file.Reference]string, error) { +func (r *testResolverMock) MultipleFileContentsByRef(_ ...file.Reference) (map[file.Reference]string, error) { return r.contents, nil } -func (r *testResolver) FilesByPath(paths ...file.Path) ([]file.Reference, error) { +func (r *testResolverMock) FilesByPath(paths ...file.Path) ([]file.Reference, error) { results := make([]file.Reference, len(paths)) for idx, p := range paths { @@ -39,13 +39,17 @@ func (r *testResolver) FilesByPath(paths ...file.Path) ([]file.Reference, error) return results, nil } -func (r *testResolver) FilesByGlob(_ ...string) ([]file.Reference, error) { +func (r *testResolverMock) FilesByGlob(_ ...string) ([]file.Reference, error) { path := "/a-path.txt" ref := file.NewFileReference(file.Path(path)) r.contents[ref] = fmt.Sprintf("%s file contents!", path) return []file.Reference{ref}, nil } +func (r *testResolverMock) RelativeFileByPath(_ file.Reference, _ string) (*file.Reference, error) { + return nil, fmt.Errorf("not implemented") +} + func parser(_ string, reader io.Reader) ([]pkg.Package, error) { contents, err := ioutil.ReadAll(reader) if err != nil { diff --git a/syft/cataloger/deb/parse_dpkg_status.go b/syft/cataloger/deb/parse_dpkg_status.go index 20c3b205d..cd5b3c132 100644 --- a/syft/cataloger/deb/parse_dpkg_status.go +++ b/syft/cataloger/deb/parse_dpkg_status.go @@ -30,10 +30,11 @@ func parseDpkgStatus(_ string, reader io.Reader) ([]pkg.Package, error) { return nil, err } packages = append(packages, pkg.Package{ - Name: entry.Package, - Version: entry.Version, - Type: pkg.DebPkg, - Metadata: entry, + Name: entry.Package, + Version: entry.Version, + Type: pkg.DebPkg, + MetadataType: pkg.DpkgMetadataType, + Metadata: entry, }) } diff --git a/syft/cataloger/java/archive_parser.go b/syft/cataloger/java/archive_parser.go index 8681ffe57..b64c43adf 100644 --- a/syft/cataloger/java/archive_parser.go +++ b/syft/cataloger/java/archive_parser.go @@ -142,10 +142,11 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) { } return &pkg.Package{ - Name: selectName(manifest, j.fileInfo), - Version: selectVersion(manifest, j.fileInfo), - Language: pkg.Java, - Type: pkg.JavaPkg, + Name: selectName(manifest, j.fileInfo), + Version: selectVersion(manifest, j.fileInfo), + Language: pkg.Java, + Type: pkg.JavaPkg, + MetadataType: pkg.JavaMetadataType, Metadata: pkg.JavaMetadata{ Manifest: manifest, }, @@ -177,10 +178,11 @@ func (j *archiveParser) discoverPkgsFromPomProperties(parentPkg *pkg.Package) ([ // discovered props = new package p := pkg.Package{ - Name: propsObj.ArtifactID, - Version: propsObj.Version, - Language: pkg.Java, - Type: pkg.JavaPkg, + Name: propsObj.ArtifactID, + Version: propsObj.Version, + Language: pkg.Java, + Type: pkg.JavaPkg, + MetadataType: pkg.JavaMetadataType, Metadata: pkg.JavaMetadata{ PomProperties: propsObj, Parent: parentPkg, diff --git a/syft/cataloger/java/archive_parser_test.go b/syft/cataloger/java/archive_parser_test.go index df290195b..bd402af77 100644 --- a/syft/cataloger/java/archive_parser_test.go +++ b/syft/cataloger/java/archive_parser_test.go @@ -137,10 +137,11 @@ func TestParseJar(t *testing.T) { }, expected: map[string]pkg.Package{ "example-jenkins-plugin": { - Name: "example-jenkins-plugin", - Version: "1.0-SNAPSHOT", - Language: pkg.Java, - Type: pkg.JenkinsPluginPkg, + Name: "example-jenkins-plugin", + Version: "1.0-SNAPSHOT", + Language: pkg.Java, + Type: pkg.JenkinsPluginPkg, + MetadataType: pkg.JavaMetadataType, Metadata: pkg.JavaMetadata{ Manifest: &pkg.JavaManifest{ ManifestVersion: "1.0", @@ -181,10 +182,11 @@ func TestParseJar(t *testing.T) { fixture: "test-fixtures/java-builds/packages/example-java-app-gradle-0.1.0.jar", expected: map[string]pkg.Package{ "example-java-app-gradle": { - Name: "example-java-app-gradle", - Version: "0.1.0", - Language: pkg.Java, - Type: pkg.JavaPkg, + Name: "example-java-app-gradle", + Version: "0.1.0", + Language: pkg.Java, + Type: pkg.JavaPkg, + MetadataType: pkg.JavaMetadataType, Metadata: pkg.JavaMetadata{ Manifest: &pkg.JavaManifest{ ManifestVersion: "1.0", @@ -200,10 +202,11 @@ func TestParseJar(t *testing.T) { }, expected: map[string]pkg.Package{ "example-java-app-maven": { - Name: "example-java-app-maven", - Version: "0.1.0", - Language: pkg.Java, - Type: pkg.JavaPkg, + Name: "example-java-app-maven", + Version: "0.1.0", + Language: pkg.Java, + Type: pkg.JavaPkg, + MetadataType: pkg.JavaMetadataType, Metadata: pkg.JavaMetadata{ Manifest: &pkg.JavaManifest{ ManifestVersion: "1.0", @@ -224,10 +227,11 @@ func TestParseJar(t *testing.T) { }, }, "joda-time": { - Name: "joda-time", - Version: "2.9.2", - Language: pkg.Java, - Type: pkg.JavaPkg, + Name: "joda-time", + Version: "2.9.2", + Language: pkg.Java, + Type: pkg.JavaPkg, + MetadataType: pkg.JavaMetadataType, Metadata: pkg.JavaMetadata{ PomProperties: &pkg.PomProperties{ Path: "META-INF/maven/joda-time/joda-time/pom.properties", diff --git a/syft/cataloger/javascript/parse_package_json.go b/syft/cataloger/javascript/parse_package_json.go index 83642e451..bcdc68801 100644 --- a/syft/cataloger/javascript/parse_package_json.go +++ b/syft/cataloger/javascript/parse_package_json.go @@ -38,12 +38,13 @@ func parsePackageJSON(_ string, reader io.Reader) ([]pkg.Package, error) { } packages = append(packages, pkg.Package{ - Name: p.Name, - Version: p.Version, - Licenses: []string{p.License}, - Language: pkg.JavaScript, - Type: pkg.NpmPkg, - Metadata: pkg.NpmMetadata{ + Name: p.Name, + Version: p.Version, + Licenses: []string{p.License}, + Language: pkg.JavaScript, + Type: pkg.NpmPkg, + MetadataType: pkg.NpmPackageJSONMetadataType, + Metadata: pkg.NpmPackageJSONMetadata{ Author: p.Author, Homepage: p.Homepage, }, diff --git a/syft/cataloger/javascript/parse_package_json_test.go b/syft/cataloger/javascript/parse_package_json_test.go index f8e7b92fb..bfe46f4a0 100644 --- a/syft/cataloger/javascript/parse_package_json_test.go +++ b/syft/cataloger/javascript/parse_package_json_test.go @@ -10,12 +10,13 @@ import ( func TestParsePackageJSON(t *testing.T) { expected := pkg.Package{ - Name: "npm", - Version: "6.14.6", - Type: pkg.NpmPkg, - Licenses: []string{"Artistic-2.0"}, - Language: pkg.JavaScript, - Metadata: pkg.NpmMetadata{ + Name: "npm", + Version: "6.14.6", + Type: pkg.NpmPkg, + Licenses: []string{"Artistic-2.0"}, + Language: pkg.JavaScript, + MetadataType: pkg.NpmPackageJSONMetadataType, + Metadata: pkg.NpmPackageJSONMetadata{ Author: "Isaac Z. Schlueter (http://blog.izs.me)", Homepage: "https://docs.npmjs.com/", }, diff --git a/syft/cataloger/python/cataloger.go b/syft/cataloger/python/cataloger.go deleted file mode 100644 index b4e9f1329..000000000 --- a/syft/cataloger/python/cataloger.go +++ /dev/null @@ -1,21 +0,0 @@ -/* -Package python provides a concrete Cataloger implementation for Python ecosystem files (egg, wheel, requirements.txt). -*/ -package python - -import ( - "github.com/anchore/syft/syft/cataloger/common" -) - -// NewPythonCataloger returns a new Python cataloger object. -func NewPythonCataloger() *common.GenericCataloger { - globParsers := map[string]common.ParserFn{ - "**/*egg-info/PKG-INFO": parseEggMetadata, - "**/*dist-info/METADATA": parseWheelMetadata, - "**/*requirements*.txt": parseRequirementsTxt, - "**/poetry.lock": parsePoetryLock, - "**/setup.py": parseSetup, - } - - return common.NewGenericCataloger(nil, globParsers, "python-cataloger") -} diff --git a/syft/cataloger/python/index_cataloger.go b/syft/cataloger/python/index_cataloger.go new file mode 100644 index 000000000..620892b22 --- /dev/null +++ b/syft/cataloger/python/index_cataloger.go @@ -0,0 +1,19 @@ +/* +Package python provides a concrete Cataloger implementation for Python ecosystem files (egg, wheel, requirements.txt). +*/ +package python + +import ( + "github.com/anchore/syft/syft/cataloger/common" +) + +// NewPythonIndexCataloger returns a new cataloger for python packages referenced from poetry lock files, requirements.txt files, and setup.py files. +func NewPythonIndexCataloger() *common.GenericCataloger { + globParsers := map[string]common.ParserFn{ + "**/*requirements*.txt": parseRequirementsTxt, + "**/poetry.lock": parsePoetryLock, + "**/setup.py": parseSetup, + } + + return common.NewGenericCataloger(nil, globParsers, "python-index-cataloger") +} diff --git a/syft/cataloger/python/package_cataloger.go b/syft/cataloger/python/package_cataloger.go new file mode 100644 index 000000000..eaf76abe1 --- /dev/null +++ b/syft/cataloger/python/package_cataloger.go @@ -0,0 +1,179 @@ +package python + +import ( + "bufio" + "fmt" + "path/filepath" + "strings" + + "github.com/anchore/stereoscope/pkg/file" + + "github.com/anchore/syft/syft/pkg" + + "github.com/anchore/syft/syft/scope" +) + +const ( + eggMetadataGlob = "**/*egg-info/PKG-INFO" + wheelMetadataGlob = "**/*dist-info/METADATA" +) + +type PackageCataloger struct{} + +// NewPythonPackageCataloger returns a new cataloger for python packages within egg or wheel installation directories. +func NewPythonPackageCataloger() *PackageCataloger { + return &PackageCataloger{} +} + +// Name returns a string that uniquely describes a cataloger +func (c *PackageCataloger) Name() string { + return "python-package-cataloger" +} + +// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing python egg and wheel installations. +func (c *PackageCataloger) Catalog(resolver scope.Resolver) ([]pkg.Package, error) { + // nolint:prealloc + var fileMatches []file.Reference + + for _, glob := range []string{eggMetadataGlob, wheelMetadataGlob} { + matches, err := resolver.FilesByGlob(glob) + if err != nil { + return nil, fmt.Errorf("failed to find files by glob: %s", glob) + } + fileMatches = append(fileMatches, matches...) + } + + var pkgs []pkg.Package + for _, ref := range fileMatches { + p, err := c.catalogEggOrWheel(resolver, ref) + if err != nil { + return nil, fmt.Errorf("unable to catalog python package=%+v: %w", ref.Path, err) + } + if p != nil { + pkgs = append(pkgs, *p) + } + } + return pkgs, nil +} + +// catalogEggOrWheel takes the primary metadata file reference and returns the python package it represents. +func (c *PackageCataloger) catalogEggOrWheel(resolver scope.Resolver, metadataRef file.Reference) (*pkg.Package, error) { + metadata, sources, err := c.assembleEggOrWheelMetadata(resolver, metadataRef) + if err != nil { + return nil, err + } + + var licenses []string + if metadata.License != "" { + licenses = []string{metadata.License} + } + + return &pkg.Package{ + Name: metadata.Name, + Version: metadata.Version, + FoundBy: c.Name(), + Source: sources, + Licenses: licenses, + Language: pkg.Python, + Type: pkg.PythonPkg, + MetadataType: pkg.PythonPackageMetadataType, + Metadata: *metadata, + }, nil +} + +// fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained. +func (c *PackageCataloger) fetchRecordFiles(resolver scope.Resolver, metadataRef file.Reference) (files []pkg.PythonFileRecord, sources []file.Reference, err error) { + // we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory + // or for an image... for an image the METADATA file may be present within multiple layers, so it is important + // to reconcile the RECORD path to the same layer (or the next adjacent lower layer). + + // lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure) + recordPath := filepath.Join(filepath.Dir(string(metadataRef.Path)), "RECORD") + recordRef, err := resolver.RelativeFileByPath(metadataRef, recordPath) + if err != nil { + return nil, nil, err + } + + if recordRef != nil { + sources = append(sources, *recordRef) + + recordContents, err := resolver.FileContentsByRef(*recordRef) + if err != nil { + return nil, nil, err + } + + // parse the record contents + records, err := parseWheelOrEggRecord(strings.NewReader(recordContents)) + if err != nil { + return nil, nil, err + } + + files = append(files, records...) + } + return files, sources, nil +} + +// fetchTopLevelPackages finds a corresponding top_level.txt file for the given python package metadata file and returns the set of package names contained. +func (c *PackageCataloger) fetchTopLevelPackages(resolver scope.Resolver, metadataRef file.Reference) (pkgs []string, sources []file.Reference, err error) { + // a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages + parentDir := filepath.Dir(string(metadataRef.Path)) + topLevelPath := filepath.Join(parentDir, "top_level.txt") + topLevelRef, err := resolver.RelativeFileByPath(metadataRef, topLevelPath) + if err != nil { + return nil, nil, err + } + if topLevelRef == nil { + return nil, nil, fmt.Errorf("missing python package top_level.txt (package=%q)", string(metadataRef.Path)) + } + + sources = append(sources, *topLevelRef) + + topLevelContents, err := resolver.FileContentsByRef(*topLevelRef) + if err != nil { + return nil, nil, err + } + + scanner := bufio.NewScanner(strings.NewReader(topLevelContents)) + for scanner.Scan() { + pkgs = append(pkgs, scanner.Text()) + } + + if err := scanner.Err(); err != nil { + return nil, nil, fmt.Errorf("could not read python package top_level.txt: %w", err) + } + + return pkgs, sources, nil +} + +// assembleEggOrWheelMetadata discovers and accumulates python package metadata from multiple file sources and returns a single metadata object as well as a list of files where the metadata was derived from. +func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver scope.Resolver, metadataRef file.Reference) (*pkg.PythonPackageMetadata, []file.Reference, error) { + var sources = []file.Reference{metadataRef} + + metadataContents, err := resolver.FileContentsByRef(metadataRef) + if err != nil { + return nil, nil, err + } + + metadata, err := parseWheelOrEggMetadata(metadataRef.Path, strings.NewReader(metadataContents)) + if err != nil { + return nil, nil, err + } + + // attach any python files found for the given wheel/egg installation + r, s, err := c.fetchRecordFiles(resolver, metadataRef) + if err != nil { + return nil, nil, err + } + sources = append(sources, s...) + metadata.Files = r + + // attach any top-level package names found for the given wheel/egg installation + p, s, err := c.fetchTopLevelPackages(resolver, metadataRef) + if err != nil { + return nil, nil, err + } + sources = append(sources, s...) + metadata.TopLevelPackages = p + + return &metadata, sources, nil +} diff --git a/syft/cataloger/python/package_cataloger_test.go b/syft/cataloger/python/package_cataloger_test.go new file mode 100644 index 000000000..a3f10091e --- /dev/null +++ b/syft/cataloger/python/package_cataloger_test.go @@ -0,0 +1,241 @@ +package python + +import ( + "fmt" + "io" + "io/ioutil" + "os" + "strings" + "testing" + + "github.com/anchore/stereoscope/pkg/file" + + "github.com/anchore/syft/syft/pkg" + "github.com/go-test/deep" +) + +type pythonTestResolverMock struct { + metadataReader io.Reader + recordReader io.Reader + topLevelReader io.Reader + metadataRef *file.Reference + recordRef *file.Reference + topLevelRef *file.Reference + contents map[file.Reference]string +} + +func newTestResolver(metaPath, recordPath, topPath string) *pythonTestResolverMock { + metadataReader, err := os.Open(metaPath) + if err != nil { + panic(fmt.Errorf("failed to open metadata: %+v", err)) + } + + var recordReader io.Reader + if recordPath != "" { + recordReader, err = os.Open(recordPath) + if err != nil { + panic(fmt.Errorf("failed to open record: %+v", err)) + } + } + + var topLevelReader io.Reader + if topPath != "" { + topLevelReader, err = os.Open(topPath) + if err != nil { + panic(fmt.Errorf("failed to open top level: %+v", err)) + } + } + + var recordRef *file.Reference + if recordReader != nil { + ref := file.NewFileReference("test-fixtures/dist-info/RECORD") + recordRef = &ref + } + var topLevelRef *file.Reference + if topLevelReader != nil { + ref := file.NewFileReference("test-fixtures/dist-info/top_level.txt") + topLevelRef = &ref + } + metadataRef := file.NewFileReference("test-fixtures/dist-info/METADATA") + return &pythonTestResolverMock{ + recordReader: recordReader, + metadataReader: metadataReader, + topLevelReader: topLevelReader, + metadataRef: &metadataRef, + recordRef: recordRef, + topLevelRef: topLevelRef, + contents: make(map[file.Reference]string), + } +} + +func (r *pythonTestResolverMock) FileContentsByRef(ref file.Reference) (string, error) { + switch ref.Path { + case r.topLevelRef.Path: + b, err := ioutil.ReadAll(r.topLevelReader) + if err != nil { + return "", err + } + return string(b), nil + case r.metadataRef.Path: + b, err := ioutil.ReadAll(r.metadataReader) + if err != nil { + return "", err + } + return string(b), nil + case r.recordRef.Path: + b, err := ioutil.ReadAll(r.recordReader) + if err != nil { + return "", err + } + return string(b), nil + } + return "", fmt.Errorf("invalid value given") +} + +func (r *pythonTestResolverMock) MultipleFileContentsByRef(_ ...file.Reference) (map[file.Reference]string, error) { + return nil, fmt.Errorf("not implemented") +} + +func (r *pythonTestResolverMock) FilesByPath(_ ...file.Path) ([]file.Reference, error) { + return nil, fmt.Errorf("not implemented") +} + +func (r *pythonTestResolverMock) FilesByGlob(_ ...string) ([]file.Reference, error) { + return nil, fmt.Errorf("not implemented") +} +func (r *pythonTestResolverMock) RelativeFileByPath(_ file.Reference, path string) (*file.Reference, error) { + switch { + case strings.Contains(path, "RECORD"): + return r.recordRef, nil + case strings.Contains(path, "top_level.txt"): + return r.topLevelRef, nil + default: + return nil, fmt.Errorf("invalid RelativeFileByPath value given: %q", path) + } +} + +func TestPythonPackageWheelCataloger(t *testing.T) { + tests := []struct { + MetadataFixture string + RecordFixture string + TopLevelFixture string + ExpectedPackage pkg.Package + }{ + { + MetadataFixture: "test-fixtures/egg-info/PKG-INFO", + RecordFixture: "test-fixtures/egg-info/RECORD", + TopLevelFixture: "test-fixtures/egg-info/top_level.txt", + ExpectedPackage: pkg.Package{ + Name: "requests", + Version: "2.22.0", + Type: pkg.PythonPkg, + Language: pkg.Python, + Licenses: []string{"Apache 2.0"}, + FoundBy: "python-package-cataloger", + MetadataType: pkg.PythonPackageMetadataType, + Metadata: pkg.PythonPackageMetadata{ + Name: "requests", + Version: "2.22.0", + License: "Apache 2.0", + Platform: "UNKNOWN", + Author: "Kenneth Reitz", + AuthorEmail: "me@kennethreitz.org", + SitePackagesRootPath: "test-fixtures", + Files: []pkg.PythonFileRecord{ + {Path: "requests-2.22.0.dist-info/INSTALLER", Digest: &pkg.Digest{"sha256", "zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg"}, Size: "4"}, + {Path: "requests/__init__.py", Digest: &pkg.Digest{"sha256", "PnKCgjcTq44LaAMzB-7--B2FdewRrE8F_vjZeaG9NhA"}, Size: "3921"}, + {Path: "requests/__pycache__/__version__.cpython-38.pyc"}, + {Path: "requests/__pycache__/utils.cpython-38.pyc"}, + {Path: "requests/__version__.py", Digest: &pkg.Digest{"sha256", "Bm-GFstQaFezsFlnmEMrJDe8JNROz9n2XXYtODdvjjc"}, Size: "436"}, + {Path: "requests/utils.py", Digest: &pkg.Digest{"sha256", "LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A"}, Size: "30049"}, + }, + TopLevelPackages: []string{"requests"}, + }, + }, + }, + { + MetadataFixture: "test-fixtures/dist-info/METADATA", + RecordFixture: "test-fixtures/dist-info/RECORD", + TopLevelFixture: "test-fixtures/dist-info/top_level.txt", + ExpectedPackage: pkg.Package{ + Name: "Pygments", + Version: "2.6.1", + Type: pkg.PythonPkg, + Language: pkg.Python, + Licenses: []string{"BSD License"}, + FoundBy: "python-package-cataloger", + MetadataType: pkg.PythonPackageMetadataType, + Metadata: pkg.PythonPackageMetadata{ + Name: "Pygments", + Version: "2.6.1", + License: "BSD License", + Platform: "any", + Author: "Georg Brandl", + AuthorEmail: "georg@python.org", + SitePackagesRootPath: "test-fixtures", + Files: []pkg.PythonFileRecord{ + {Path: "../../../bin/pygmentize", Digest: &pkg.Digest{"sha256", "dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8"}, Size: "220"}, + {Path: "Pygments-2.6.1.dist-info/AUTHORS", Digest: &pkg.Digest{"sha256", "PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY"}, Size: "8449"}, + {Path: "Pygments-2.6.1.dist-info/RECORD"}, + {Path: "pygments/__pycache__/__init__.cpython-38.pyc"}, + {Path: "pygments/util.py", Digest: &pkg.Digest{"sha256", "586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA"}, Size: "10778"}, + }, + TopLevelPackages: []string{"pygments", "something_else"}, + }, + }, + }, + { + // in cases where the metadata file is available and the record is not we should still record there is a package + // additionally empty top_level.txt files should not result in an error + MetadataFixture: "test-fixtures/partial.dist-info/METADATA", + TopLevelFixture: "test-fixtures/partial.dist-info/top_level.txt", + ExpectedPackage: pkg.Package{ + Name: "Pygments", + Version: "2.6.1", + Type: pkg.PythonPkg, + Language: pkg.Python, + Licenses: []string{"BSD License"}, + FoundBy: "python-package-cataloger", + MetadataType: pkg.PythonPackageMetadataType, + Metadata: pkg.PythonPackageMetadata{ + Name: "Pygments", + Version: "2.6.1", + License: "BSD License", + Platform: "any", + Author: "Georg Brandl", + AuthorEmail: "georg@python.org", + SitePackagesRootPath: "test-fixtures", + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.MetadataFixture, func(t *testing.T) { + resolver := newTestResolver(test.MetadataFixture, test.RecordFixture, test.TopLevelFixture) + + // note that the source is the record ref created by the resolver mock... attach the expected values + test.ExpectedPackage.Source = []file.Reference{*resolver.metadataRef} + if resolver.recordRef != nil { + test.ExpectedPackage.Source = append(test.ExpectedPackage.Source, *resolver.recordRef) + } + + if resolver.topLevelRef != nil { + test.ExpectedPackage.Source = append(test.ExpectedPackage.Source, *resolver.topLevelRef) + } + // end patching expected values with runtime data... + + pyPkgCataloger := NewPythonPackageCataloger() + + actual, err := pyPkgCataloger.catalogEggOrWheel(resolver, *resolver.metadataRef) + if err != nil { + t.Fatalf("failed to catalog python package: %+v", err) + } + + for _, d := range deep.Equal(actual, &test.ExpectedPackage) { + t.Errorf("diff: %+v", d) + } + }) + } + +} diff --git a/syft/cataloger/python/parse_poetry_lock_test.go b/syft/cataloger/python/parse_poetry_lock_test.go index e96c47836..80cc6b625 100644 --- a/syft/cataloger/python/parse_poetry_lock_test.go +++ b/syft/cataloger/python/parse_poetry_lock_test.go @@ -1,10 +1,11 @@ package python import ( - "github.com/anchore/syft/syft/pkg" - "github.com/go-test/deep" "os" "testing" + + "github.com/anchore/syft/syft/pkg" + "github.com/go-test/deep" ) func TestParsePoetryLock(t *testing.T) { @@ -13,28 +14,28 @@ func TestParsePoetryLock(t *testing.T) { Name: "added-value", Version: "0.14.2", Language: pkg.Python, - Type: pkg.PoetryPkg, + Type: pkg.PythonPkg, Licenses: nil, }, { Name: "alabaster", Version: "0.7.12", Language: pkg.Python, - Type: pkg.PoetryPkg, + Type: pkg.PythonPkg, Licenses: nil, }, { Name: "appnope", Version: "0.1.0", Language: pkg.Python, - Type: pkg.PoetryPkg, + Type: pkg.PythonPkg, Licenses: nil, }, { Name: "asciitree", Version: "0.3.3", Language: pkg.Python, - Type: pkg.PoetryPkg, + Type: pkg.PythonPkg, Licenses: nil, }, } diff --git a/syft/cataloger/python/parse_requirements.go b/syft/cataloger/python/parse_requirements.go index 80c95eecc..27ec43cc8 100644 --- a/syft/cataloger/python/parse_requirements.go +++ b/syft/cataloger/python/parse_requirements.go @@ -47,7 +47,7 @@ func parseRequirementsTxt(_ string, reader io.Reader) ([]pkg.Package, error) { Name: name, Version: version, Language: pkg.Python, - Type: pkg.PythonRequirementsPkg, + Type: pkg.PythonPkg, }) default: continue diff --git a/syft/cataloger/python/parse_requirements_test.go b/syft/cataloger/python/parse_requirements_test.go index 96fd429af..8dd66092a 100644 --- a/syft/cataloger/python/parse_requirements_test.go +++ b/syft/cataloger/python/parse_requirements_test.go @@ -4,24 +4,45 @@ import ( "os" "testing" + "github.com/go-test/deep" + "github.com/anchore/syft/syft/pkg" ) +func assertPackagesEqual(t *testing.T, actual []pkg.Package, expected map[string]pkg.Package) { + t.Helper() + if len(actual) != len(expected) { + for _, a := range actual { + t.Log(" ", a) + } + t.Fatalf("unexpected package count: %d!=%d", len(actual), len(expected)) + } + + for _, a := range actual { + expectedPkg, ok := expected[a.Name] + if !ok { + t.Errorf("unexpected package found: '%s'", a.Name) + } + + for _, d := range deep.Equal(a, expectedPkg) { + t.Errorf("diff: %+v", d) + } + } +} + func TestParseRequirementsTxt(t *testing.T) { expected := map[string]pkg.Package{ "foo": { Name: "foo", Version: "1.0.0", Language: pkg.Python, - Type: pkg.PythonRequirementsPkg, - Licenses: []string{}, + Type: pkg.PythonPkg, }, "flask": { Name: "flask", Version: "4.0.0", Language: pkg.Python, - Type: pkg.PythonRequirementsPkg, - Licenses: []string{}, + Type: pkg.PythonPkg, }, } fixture, err := os.Open("test-fixtures/requires/requirements.txt") @@ -34,6 +55,6 @@ func TestParseRequirementsTxt(t *testing.T) { t.Fatalf("failed to parse requirements: %+v", err) } - assertPkgsEqual(t, actual, expected) + assertPackagesEqual(t, actual, expected) } diff --git a/syft/cataloger/python/parse_setup.go b/syft/cataloger/python/parse_setup.go index 7851ccb81..337c436e7 100644 --- a/syft/cataloger/python/parse_setup.go +++ b/syft/cataloger/python/parse_setup.go @@ -41,7 +41,7 @@ func parseSetup(_ string, reader io.Reader) ([]pkg.Package, error) { Name: strings.Trim(name, "'\""), Version: strings.Trim(version, "'\""), Language: pkg.Python, - Type: pkg.PythonSetupPkg, + Type: pkg.PythonPkg, }) } } diff --git a/syft/cataloger/python/parse_setup_test.go b/syft/cataloger/python/parse_setup_test.go index 6ac911f8c..3b8fa8edc 100644 --- a/syft/cataloger/python/parse_setup_test.go +++ b/syft/cataloger/python/parse_setup_test.go @@ -13,36 +13,31 @@ func TestParseSetup(t *testing.T) { Name: "pathlib3", Version: "2.2.0", Language: pkg.Python, - Type: pkg.PythonSetupPkg, - Licenses: []string{}, + Type: pkg.PythonPkg, }, "mypy": { Name: "mypy", Version: "v0.770", Language: pkg.Python, - Type: pkg.PythonSetupPkg, - Licenses: []string{}, + Type: pkg.PythonPkg, }, "mypy1": { Name: "mypy1", Version: "v0.770", Language: pkg.Python, - Type: pkg.PythonSetupPkg, - Licenses: []string{}, + Type: pkg.PythonPkg, }, "mypy2": { Name: "mypy2", Version: "v0.770", Language: pkg.Python, - Type: pkg.PythonSetupPkg, - Licenses: []string{}, + Type: pkg.PythonPkg, }, "mypy3": { Name: "mypy3", Version: "v0.770", Language: pkg.Python, - Type: pkg.PythonSetupPkg, - Licenses: []string{}, + Type: pkg.PythonPkg, }, } fixture, err := os.Open("test-fixtures/setup/setup.py") @@ -55,6 +50,6 @@ func TestParseSetup(t *testing.T) { t.Fatalf("failed to parse requirements: %+v", err) } - assertPkgsEqual(t, actual, expected) + assertPackagesEqual(t, actual, expected) } diff --git a/syft/cataloger/python/parse_wheel_egg.go b/syft/cataloger/python/parse_wheel_egg.go deleted file mode 100644 index fc5100687..000000000 --- a/syft/cataloger/python/parse_wheel_egg.go +++ /dev/null @@ -1,100 +0,0 @@ -package python - -import ( - "bufio" - "fmt" - "io" - "strings" - - "github.com/anchore/syft/syft/cataloger/common" - "github.com/anchore/syft/syft/pkg" -) - -// integrity check -var _ common.ParserFn = parseWheelMetadata -var _ common.ParserFn = parseEggMetadata - -// parseWheelMetadata is a parser function for individual Python Wheel metadata file contents, returning all Python -// packages listed. -func parseWheelMetadata(_ string, reader io.Reader) ([]pkg.Package, error) { - packages, err := parseWheelOrEggMetadata(reader) - for idx := range packages { - packages[idx].Type = pkg.WheelPkg - } - return packages, err -} - -// parseEggMetadata is a parser function for individual Python Egg metadata file contents, returning all Python -// packages listed. -func parseEggMetadata(_ string, reader io.Reader) ([]pkg.Package, error) { - packages, err := parseWheelOrEggMetadata(reader) - for idx := range packages { - packages[idx].Type = pkg.EggPkg - } - return packages, err -} - -// parseWheelOrEggMetadata takes a Python Egg or Wheel (which share the same format and values for our purposes), -// returning all Python packages listed. -func parseWheelOrEggMetadata(reader io.Reader) ([]pkg.Package, error) { - fields := make(map[string]string) - var key string - - scanner := bufio.NewScanner(reader) - for scanner.Scan() { - line := scanner.Text() - - line = strings.TrimRight(line, "\n") - - // empty line indicates end of entry - if len(line) == 0 { - // if the entry has not started, keep parsing lines - if len(fields) == 0 { - continue - } - break - } - - switch { - case strings.HasPrefix(line, " "): - // a field-body continuation - if len(key) == 0 { - return nil, fmt.Errorf("no match for continuation: line: '%s'", line) - } - - val, ok := fields[key] - if !ok { - return nil, fmt.Errorf("no previous key exists, expecting: %s", key) - } - // concatenate onto previous value - val = fmt.Sprintf("%s\n %s", val, strings.TrimSpace(line)) - fields[key] = val - default: - // parse a new key (note, duplicate keys are overridden) - if i := strings.Index(line, ":"); i > 0 { - key = strings.TrimSpace(line[0:i]) - val := strings.TrimSpace(line[i+1:]) - - fields[key] = val - } else { - return nil, fmt.Errorf("cannot parse field from line: '%s'", line) - } - } - } - - if err := scanner.Err(); err != nil { - return nil, fmt.Errorf("failed to parse python wheel/egg: %w", err) - } - - p := pkg.Package{ - Name: fields["Name"], - Version: fields["Version"], - Language: pkg.Python, - } - - if license, ok := fields["License"]; ok && license != "" { - p.Licenses = []string{license} - } - - return []pkg.Package{p}, nil -} diff --git a/syft/cataloger/python/parse_wheel_egg_metadata.go b/syft/cataloger/python/parse_wheel_egg_metadata.go new file mode 100644 index 000000000..dcb90a14c --- /dev/null +++ b/syft/cataloger/python/parse_wheel_egg_metadata.go @@ -0,0 +1,80 @@ +package python + +import ( + "bufio" + "fmt" + "io" + "path/filepath" + "strings" + + "github.com/anchore/stereoscope/pkg/file" + + "github.com/mitchellh/mapstructure" + + "github.com/anchore/syft/syft/pkg" +) + +// parseWheelOrEggMetadata takes a Python Egg or Wheel (which share the same format and values for our purposes), +// returning all Python packages listed. +func parseWheelOrEggMetadata(path file.Path, reader io.Reader) (pkg.PythonPackageMetadata, error) { + fields := make(map[string]string) + var key string + + scanner := bufio.NewScanner(reader) + for scanner.Scan() { + line := scanner.Text() + line = strings.TrimRight(line, "\n") + + // empty line indicates end of entry + if len(line) == 0 { + // if the entry has not started, keep parsing lines + if len(fields) == 0 { + continue + } + break + } + + switch { + case strings.HasPrefix(line, " "): + // a field-body continuation + if len(key) == 0 { + return pkg.PythonPackageMetadata{}, fmt.Errorf("no match for continuation: line: '%s'", line) + } + + val, ok := fields[key] + if !ok { + return pkg.PythonPackageMetadata{}, fmt.Errorf("no previous key exists, expecting: %s", key) + } + // concatenate onto previous value + val = fmt.Sprintf("%s\n %s", val, strings.TrimSpace(line)) + fields[key] = val + default: + // parse a new key (note, duplicate keys are overridden) + if i := strings.Index(line, ":"); i > 0 { + // mapstruct cannot map keys with dashes, and we are expected to persist the "Author-email" field + key = strings.ReplaceAll(strings.TrimSpace(line[0:i]), "-", "") + val := strings.TrimSpace(line[i+1:]) + + fields[key] = val + } else { + return pkg.PythonPackageMetadata{}, fmt.Errorf("cannot parse field from line: '%s'", line) + } + } + } + + if err := scanner.Err(); err != nil { + return pkg.PythonPackageMetadata{}, fmt.Errorf("failed to parse python wheel/egg: %w", err) + } + + var metadata pkg.PythonPackageMetadata + if err := mapstructure.Decode(fields, &metadata); err != nil { + return pkg.PythonPackageMetadata{}, fmt.Errorf("unable to parse APK metadata: %w", err) + } + + // add additional metadata not stored in the egg/wheel metadata file + + sitePackagesRoot := filepath.Clean(filepath.Join(filepath.Dir(string(path)), "..")) + metadata.SitePackagesRootPath = sitePackagesRoot + + return metadata, nil +} diff --git a/syft/cataloger/python/parse_wheel_egg_metadata_test.go b/syft/cataloger/python/parse_wheel_egg_metadata_test.go new file mode 100644 index 000000000..98896eef2 --- /dev/null +++ b/syft/cataloger/python/parse_wheel_egg_metadata_test.go @@ -0,0 +1,62 @@ +package python + +import ( + "os" + "testing" + + "github.com/anchore/stereoscope/pkg/file" + + "github.com/anchore/syft/syft/pkg" + "github.com/go-test/deep" +) + +func TestParseWheelEggMetadata(t *testing.T) { + tests := []struct { + Fixture string + ExpectedMetadata pkg.PythonPackageMetadata + }{ + { + Fixture: "test-fixtures/egg-info/PKG-INFO", + ExpectedMetadata: pkg.PythonPackageMetadata{ + Name: "requests", + Version: "2.22.0", + License: "Apache 2.0", + Platform: "UNKNOWN", + Author: "Kenneth Reitz", + AuthorEmail: "me@kennethreitz.org", + SitePackagesRootPath: "test-fixtures", + }, + }, + { + Fixture: "test-fixtures/dist-info/METADATA", + ExpectedMetadata: pkg.PythonPackageMetadata{ + Name: "Pygments", + Version: "2.6.1", + License: "BSD License", + Platform: "any", + Author: "Georg Brandl", + AuthorEmail: "georg@python.org", + SitePackagesRootPath: "test-fixtures", + }, + }, + } + + for _, test := range tests { + t.Run(test.Fixture, func(t *testing.T) { + fixture, err := os.Open(test.Fixture) + if err != nil { + t.Fatalf("failed to open fixture: %+v", err) + } + + actual, err := parseWheelOrEggMetadata(file.Path(test.Fixture), fixture) + if err != nil { + t.Fatalf("failed to parse: %+v", err) + } + + for _, d := range deep.Equal(actual, test.ExpectedMetadata) { + t.Errorf("diff: %+v", d) + } + }) + } + +} diff --git a/syft/cataloger/python/parse_wheel_egg_record.go b/syft/cataloger/python/parse_wheel_egg_record.go new file mode 100644 index 000000000..42faafa3f --- /dev/null +++ b/syft/cataloger/python/parse_wheel_egg_record.go @@ -0,0 +1,60 @@ +package python + +import ( + "encoding/csv" + "fmt" + "io" + "strings" + + "github.com/anchore/syft/syft/pkg" +) + +// parseWheelOrEggMetadata takes a Python Egg or Wheel (which share the same format and values for our purposes), +// returning all Python packages listed. +func parseWheelOrEggRecord(reader io.Reader) ([]pkg.PythonFileRecord, error) { + var records []pkg.PythonFileRecord + r := csv.NewReader(reader) + + for { + recordList, err := r.Read() + if err == io.EOF { + break + } + if err != nil { + return nil, fmt.Errorf("unable to read python record file: %w", err) + } + + if len(recordList) != 3 { + return nil, fmt.Errorf("python record an unexpected length=%d: %q", len(recordList), recordList) + } + + var record pkg.PythonFileRecord + + for idx, item := range recordList { + switch idx { + case 0: + record.Path = item + case 1: + if item == "" { + continue + } + fields := strings.Split(item, "=") + + if len(fields) != 2 { + return nil, fmt.Errorf("unexpected python record digest: %q", item) + } + + record.Digest = &pkg.Digest{ + Algorithm: fields[0], + Value: fields[1], + } + case 2: + record.Size = item + } + } + + records = append(records, record) + } + + return records, nil +} diff --git a/syft/cataloger/python/parse_wheel_egg_record_test.go b/syft/cataloger/python/parse_wheel_egg_record_test.go new file mode 100644 index 000000000..d14868e0f --- /dev/null +++ b/syft/cataloger/python/parse_wheel_egg_record_test.go @@ -0,0 +1,57 @@ +package python + +import ( + "os" + "testing" + + "github.com/anchore/syft/syft/pkg" + "github.com/go-test/deep" +) + +func TestParseWheelEggRecord(t *testing.T) { + tests := []struct { + Fixture string + ExpectedMetadata []pkg.PythonFileRecord + }{ + { + Fixture: "test-fixtures/egg-info/RECORD", + ExpectedMetadata: []pkg.PythonFileRecord{ + {Path: "requests-2.22.0.dist-info/INSTALLER", Digest: &pkg.Digest{"sha256", "zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg"}, Size: "4"}, + {Path: "requests/__init__.py", Digest: &pkg.Digest{"sha256", "PnKCgjcTq44LaAMzB-7--B2FdewRrE8F_vjZeaG9NhA"}, Size: "3921"}, + {Path: "requests/__pycache__/__version__.cpython-38.pyc"}, + {Path: "requests/__pycache__/utils.cpython-38.pyc"}, + {Path: "requests/__version__.py", Digest: &pkg.Digest{"sha256", "Bm-GFstQaFezsFlnmEMrJDe8JNROz9n2XXYtODdvjjc"}, Size: "436"}, + {Path: "requests/utils.py", Digest: &pkg.Digest{"sha256", "LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A"}, Size: "30049"}, + }, + }, + { + Fixture: "test-fixtures/dist-info/RECORD", + ExpectedMetadata: []pkg.PythonFileRecord{ + {Path: "../../../bin/pygmentize", Digest: &pkg.Digest{"sha256", "dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8"}, Size: "220"}, + {Path: "Pygments-2.6.1.dist-info/AUTHORS", Digest: &pkg.Digest{"sha256", "PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY"}, Size: "8449"}, + {Path: "Pygments-2.6.1.dist-info/RECORD"}, + {Path: "pygments/__pycache__/__init__.cpython-38.pyc"}, + {Path: "pygments/util.py", Digest: &pkg.Digest{"sha256", "586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA"}, Size: "10778"}, + }, + }, + } + + for _, test := range tests { + t.Run(test.Fixture, func(t *testing.T) { + fixture, err := os.Open(test.Fixture) + if err != nil { + t.Fatalf("failed to open fixture: %+v", err) + } + + actual, err := parseWheelOrEggRecord(fixture) + if err != nil { + t.Fatalf("failed to parse: %+v", err) + } + + for _, d := range deep.Equal(actual, test.ExpectedMetadata) { + t.Errorf("diff: %+v", d) + } + }) + } + +} diff --git a/syft/cataloger/python/parse_wheel_egg_test.go b/syft/cataloger/python/parse_wheel_egg_test.go deleted file mode 100644 index 118573562..000000000 --- a/syft/cataloger/python/parse_wheel_egg_test.go +++ /dev/null @@ -1,93 +0,0 @@ -package python - -import ( - "os" - "testing" - - "github.com/anchore/syft/syft/pkg" -) - -func assertPkgsEqual(t *testing.T, actual []pkg.Package, expected map[string]pkg.Package) { - t.Helper() - if len(actual) != len(expected) { - for _, a := range actual { - t.Log(" ", a) - } - t.Fatalf("unexpected package count: %d!=%d", len(actual), len(expected)) - } - - for _, a := range actual { - expectedPkg, ok := expected[a.Name] - if !ok { - t.Errorf("unexpected package found: '%s'", a.Name) - } - - if expectedPkg.Version != a.Version { - t.Errorf("unexpected package version: '%s'", a.Version) - } - - if a.Language != expectedPkg.Language { - t.Errorf("bad language: '%+v'", a.Language) - } - - if a.Type != expectedPkg.Type { - t.Errorf("bad package type: %+v", a.Type) - } - - if len(a.Licenses) < len(expectedPkg.Licenses) { - t.Errorf("bad package licenses count: '%+v'", a.Licenses) - } - if len(a.Licenses) > 0 { - if a.Licenses[0] != expectedPkg.Licenses[0] { - t.Errorf("bad package licenses: '%+v'", a.Licenses) - } - } - - } -} - -func TestParseEggMetadata(t *testing.T) { - expected := map[string]pkg.Package{ - "requests": { - Name: "requests", - Version: "2.22.0", - Language: pkg.Python, - Type: pkg.EggPkg, - Licenses: []string{"Apache 2.0"}, - }, - } - fixture, err := os.Open("test-fixtures/egg-info/PKG-INFO") - if err != nil { - t.Fatalf("failed to open fixture: %+v", err) - } - - actual, err := parseEggMetadata(fixture.Name(), fixture) - if err != nil { - t.Fatalf("failed to parse egg-info: %+v", err) - } - - assertPkgsEqual(t, actual, expected) -} - -func TestParseWheelMetadata(t *testing.T) { - expected := map[string]pkg.Package{ - "Pygments": { - Name: "Pygments", - Version: "2.6.1", - Language: pkg.Python, - Type: pkg.WheelPkg, - Licenses: []string{"BSD License"}, - }, - } - fixture, err := os.Open("test-fixtures/dist-info/METADATA") - if err != nil { - t.Fatalf("failed to open fixture: %+v", err) - } - - actual, err := parseWheelMetadata(fixture.Name(), fixture) - if err != nil { - t.Fatalf("failed to parse dist-info: %+v", err) - } - - assertPkgsEqual(t, actual, expected) -} diff --git a/syft/cataloger/python/poetry_metadata_package.go b/syft/cataloger/python/poetry_metadata_package.go index 20a42ae05..5e9e454b1 100644 --- a/syft/cataloger/python/poetry_metadata_package.go +++ b/syft/cataloger/python/poetry_metadata_package.go @@ -16,6 +16,6 @@ func (p PoetryMetadataPackage) Pkg() pkg.Package { Name: p.Name, Version: p.Version, Language: pkg.Python, - Type: pkg.PoetryPkg, + Type: pkg.PythonPkg, } } diff --git a/syft/cataloger/python/test-fixtures/dist-info/RECORD b/syft/cataloger/python/test-fixtures/dist-info/RECORD new file mode 100644 index 000000000..af233f390 --- /dev/null +++ b/syft/cataloger/python/test-fixtures/dist-info/RECORD @@ -0,0 +1,5 @@ +../../../bin/pygmentize,sha256=dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8,220 +Pygments-2.6.1.dist-info/AUTHORS,sha256=PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY,8449 +Pygments-2.6.1.dist-info/RECORD,, +pygments/__pycache__/__init__.cpython-38.pyc,, +pygments/util.py,sha256=586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA,10778 \ No newline at end of file diff --git a/syft/cataloger/python/test-fixtures/dist-info/top_level.txt b/syft/cataloger/python/test-fixtures/dist-info/top_level.txt new file mode 100644 index 000000000..2c30fc0c4 --- /dev/null +++ b/syft/cataloger/python/test-fixtures/dist-info/top_level.txt @@ -0,0 +1,2 @@ +pygments +something_else \ No newline at end of file diff --git a/syft/cataloger/python/test-fixtures/egg-info/RECORD b/syft/cataloger/python/test-fixtures/egg-info/RECORD new file mode 100644 index 000000000..a2c2f97c3 --- /dev/null +++ b/syft/cataloger/python/test-fixtures/egg-info/RECORD @@ -0,0 +1,6 @@ +requests-2.22.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +requests/__init__.py,sha256=PnKCgjcTq44LaAMzB-7--B2FdewRrE8F_vjZeaG9NhA,3921 +requests/__pycache__/__version__.cpython-38.pyc,, +requests/__pycache__/utils.cpython-38.pyc,, +requests/__version__.py,sha256=Bm-GFstQaFezsFlnmEMrJDe8JNROz9n2XXYtODdvjjc,436 +requests/utils.py,sha256=LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A,30049 diff --git a/syft/cataloger/python/test-fixtures/egg-info/top_level.txt b/syft/cataloger/python/test-fixtures/egg-info/top_level.txt new file mode 100644 index 000000000..663bd1f6a --- /dev/null +++ b/syft/cataloger/python/test-fixtures/egg-info/top_level.txt @@ -0,0 +1 @@ +requests \ No newline at end of file diff --git a/syft/cataloger/python/test-fixtures/partial.dist-info/METADATA b/syft/cataloger/python/test-fixtures/partial.dist-info/METADATA new file mode 100644 index 000000000..924780dfd --- /dev/null +++ b/syft/cataloger/python/test-fixtures/partial.dist-info/METADATA @@ -0,0 +1,47 @@ +Metadata-Version: 2.1 +Name: Pygments +Version: 2.6.1 +Summary: Pygments is a syntax highlighting package written in Python. +Home-page: https://pygments.org/ +Author: Georg Brandl +Author-email: georg@python.org +License: BSD License +Keywords: syntax highlighting +Platform: any +Classifier: License :: OSI Approved :: BSD License +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: End Users/Desktop +Classifier: Intended Audience :: System Administrators +Classifier: Development Status :: 6 - Mature +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Operating System :: OS Independent +Classifier: Topic :: Text Processing :: Filters +Classifier: Topic :: Utilities +Requires-Python: >=3.5 + + +Pygments +~~~~~~~~ + +Pygments is a syntax highlighting package written in Python. + +It is a generic syntax highlighter suitable for use in code hosting, forums, +wikis or other applications that need to prettify source code. Highlights +are: + +* a wide range of over 500 languages and other text formats is supported +* special attention is paid to details, increasing quality by a fair amount +* support for new languages and formats are added easily +* a number of output formats, presently HTML, LaTeX, RTF, SVG, all image formats that PIL supports and ANSI sequences +* it is usable as a command-line tool and as a library + +:copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS. +:license: BSD, see LICENSE for details. + diff --git a/syft/cataloger/python/test-fixtures/partial.dist-info/top_level.txt b/syft/cataloger/python/test-fixtures/partial.dist-info/top_level.txt new file mode 100644 index 000000000..e69de29bb diff --git a/syft/cataloger/rpmdb/parse_rpmdb.go b/syft/cataloger/rpmdb/parse_rpmdb.go index 1fcf5e12a..efebae2a4 100644 --- a/syft/cataloger/rpmdb/parse_rpmdb.go +++ b/syft/cataloger/rpmdb/parse_rpmdb.go @@ -52,8 +52,9 @@ func parseRpmDB(_ string, reader io.Reader) ([]pkg.Package, error) { Name: entry.Name, Version: fmt.Sprintf("%s-%s", entry.Version, entry.Release), // this is what engine does //Version: fmt.Sprintf("%d:%s-%s.%s", entry.Epoch, entry.Version, entry.Release, entry.Arch), - Type: pkg.RpmPkg, - Metadata: pkg.RpmMetadata{ + Type: pkg.RpmPkg, + MetadataType: pkg.RpmdbMetadataType, + Metadata: pkg.RpmdbMetadata{ Name: entry.Name, Version: entry.Version, Epoch: entry.Epoch, diff --git a/syft/cataloger/rpmdb/parse_rpmdb_test.go b/syft/cataloger/rpmdb/parse_rpmdb_test.go index 2a4100f94..bf81f111f 100644 --- a/syft/cataloger/rpmdb/parse_rpmdb_test.go +++ b/syft/cataloger/rpmdb/parse_rpmdb_test.go @@ -1,19 +1,21 @@ package rpmdb import ( - "github.com/anchore/syft/syft/pkg" - "github.com/go-test/deep" "os" "testing" + + "github.com/anchore/syft/syft/pkg" + "github.com/go-test/deep" ) func TestParseRpmDB(t *testing.T) { expected := map[string]pkg.Package{ "dive": { - Name: "dive", - Version: "0.9.2-1", - Type: pkg.RpmPkg, - Metadata: pkg.RpmMetadata{ + Name: "dive", + Version: "0.9.2-1", + Type: pkg.RpmPkg, + MetadataType: pkg.RpmdbMetadataType, + Metadata: pkg.RpmdbMetadata{ Name: "dive", Epoch: 0, Arch: "x86_64", diff --git a/syft/cataloger/ruby/parse_gemspec.go b/syft/cataloger/ruby/parse_gemspec.go index 7fe310265..e8f33f56d 100644 --- a/syft/cataloger/ruby/parse_gemspec.go +++ b/syft/cataloger/ruby/parse_gemspec.go @@ -96,12 +96,13 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) { } pkgs = append(pkgs, pkg.Package{ - Name: metadata.Name, - Version: metadata.Version, - Licenses: metadata.Licenses, - Language: pkg.Ruby, - Type: pkg.GemPkg, - Metadata: metadata, + Name: metadata.Name, + Version: metadata.Version, + Licenses: metadata.Licenses, + Language: pkg.Ruby, + Type: pkg.GemPkg, + MetadataType: pkg.GemMetadataType, + Metadata: metadata, }) } diff --git a/syft/cataloger/ruby/parse_gemspec_test.go b/syft/cataloger/ruby/parse_gemspec_test.go index 47bc9fc44..2a32ae0b3 100644 --- a/syft/cataloger/ruby/parse_gemspec_test.go +++ b/syft/cataloger/ruby/parse_gemspec_test.go @@ -10,11 +10,12 @@ import ( func TestParseGemspec(t *testing.T) { var expectedPkg = pkg.Package{ - Name: "bundler", - Version: "2.1.4", - Type: pkg.GemPkg, - Licenses: []string{"MIT"}, - Language: pkg.Ruby, + Name: "bundler", + Version: "2.1.4", + Type: pkg.GemPkg, + Licenses: []string{"MIT"}, + Language: pkg.Ruby, + MetadataType: pkg.GemMetadataType, Metadata: pkg.GemMetadata{ Name: "bundler", Version: "2.1.4", diff --git a/syft/pkg/metadata.go b/syft/pkg/metadata.go new file mode 100644 index 000000000..c1e402934 --- /dev/null +++ b/syft/pkg/metadata.go @@ -0,0 +1,14 @@ +package pkg + +type MetadataType string + +const ( + UnknownMetadataType MetadataType = "UnknownMetadata" + ApkMetadataType MetadataType = "apk-metadata" + DpkgMetadataType MetadataType = "dpkg-metadata" + GemMetadataType MetadataType = "gem-metadata" + JavaMetadataType MetadataType = "java-metadata" + NpmPackageJSONMetadataType MetadataType = "npm-package-json-metadata" + RpmdbMetadataType MetadataType = "rpmdb-metadata" + PythonPackageMetadataType MetadataType = "python-package-metadata" +) diff --git a/syft/pkg/npm_metadata.go b/syft/pkg/npm_metadata.go index af683120b..6df2152d0 100644 --- a/syft/pkg/npm_metadata.go +++ b/syft/pkg/npm_metadata.go @@ -1,7 +1,7 @@ package pkg -// NpmMetadata holds extra information that is used in pkg.Package -type NpmMetadata struct { +// NpmPackageJSONMetadata holds extra information that is used in pkg.Package +type NpmPackageJSONMetadata struct { Name string `mapstructure:"name" json:"name"` Version string `mapstructure:"version" json:"version"` Files []string `mapstructure:"files" json:"files"` diff --git a/syft/pkg/package.go b/syft/pkg/package.go index c84b26fb5..c3224a07f 100644 --- a/syft/pkg/package.go +++ b/syft/pkg/package.go @@ -23,10 +23,11 @@ type Package struct { FoundBy string `json:"foundBy"` // the specific cataloger that discovered this package Source []file.Reference `json:"sources"` // the locations that lead to the discovery of this package (note: this is not necessarily the locations that make up this package) // TODO: should we move licenses into metadata? - Licenses []string `json:"licenses"` // licenses discovered with the package metadata - Language Language `json:"language"` // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc) - Type Type `json:"type"` // the package type (e.g. Npm, Yarn, Egg, Wheel, Rpm, Deb, etc) - Metadata interface{} `json:"metadata,omitempty"` // additional data found while parsing the package source + Licenses []string `json:"licenses"` // licenses discovered with the package metadata + Language Language `json:"language"` // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc) + Type Type `json:"type"` // the package type (e.g. Npm, Yarn, Python, Rpm, Deb, etc) + MetadataType MetadataType `json:"metadataType"` // the shape of the additional data in the "metadata" field + Metadata interface{} `json:"metadata,omitempty"` // additional data found while parsing the package source } // ID returns the package ID, which is unique relative to a package catalog. diff --git a/syft/pkg/package_test.go b/syft/pkg/package_test.go index 1fec05b4b..471fe06f7 100644 --- a/syft/pkg/package_test.go +++ b/syft/pkg/package_test.go @@ -25,7 +25,7 @@ func TestPackage_pURL(t *testing.T) { pkg: Package{ Name: "name", Version: "v0.1.0", - Type: WheelPkg, + Type: PythonPkg, }, expected: "pkg:pypi/name@v0.1.0", }, @@ -33,7 +33,7 @@ func TestPackage_pURL(t *testing.T) { pkg: Package{ Name: "name", Version: "v0.1.0", - Type: EggPkg, + Type: PythonPkg, }, expected: "pkg:pypi/name@v0.1.0", }, @@ -41,7 +41,7 @@ func TestPackage_pURL(t *testing.T) { pkg: Package{ Name: "name", Version: "v0.1.0", - Type: PythonSetupPkg, + Type: PythonPkg, }, expected: "pkg:pypi/name@v0.1.0", }, @@ -49,7 +49,7 @@ func TestPackage_pURL(t *testing.T) { pkg: Package{ Name: "name", Version: "v0.1.0", - Type: PythonRequirementsPkg, + Type: PythonPkg, }, expected: "pkg:pypi/name@v0.1.0", }, @@ -93,7 +93,7 @@ func TestPackage_pURL(t *testing.T) { Name: "bad-name", Version: "bad-v0.1.0", Type: RpmPkg, - Metadata: RpmMetadata{ + Metadata: RpmdbMetadata{ Name: "name", Version: "v0.1.0", Epoch: 2, diff --git a/syft/pkg/python_package_metadata.go b/syft/pkg/python_package_metadata.go new file mode 100644 index 000000000..637e6220c --- /dev/null +++ b/syft/pkg/python_package_metadata.go @@ -0,0 +1,26 @@ +package pkg + +type Digest struct { + Algorithm string `json:"algorithm"` + Value string `json:"value"` +} + +// PythonFileRecord represents a single entry within a RECORD file for a python wheel or egg package +type PythonFileRecord struct { + Path string `json:"path"` + Digest *Digest `json:"digest,omitempty"` + Size string `json:"size,omitempty"` +} + +// PythonPackageMetadata represents all captured data for a python egg or wheel package. +type PythonPackageMetadata struct { + Name string `json:"name" mapstruct:"Name"` + Version string `json:"version" mapstruct:"Version"` + License string `json:"license" mapstruct:"License"` + Author string `json:"author" mapstruct:"Author"` + AuthorEmail string `json:"authorEmail" mapstruct:"Authoremail"` + Platform string `json:"platform" mapstruct:"Platform"` + Files []PythonFileRecord `json:"files,omitempty"` + SitePackagesRootPath string `json:"sitePackagesRootPath"` + TopLevelPackages []string `json:"topLevelPackages,omitempty"` +} diff --git a/syft/pkg/rpm_metadata.go b/syft/pkg/rpmdb_metadata.go similarity index 80% rename from syft/pkg/rpm_metadata.go rename to syft/pkg/rpmdb_metadata.go index fed840d1b..85b7d9bda 100644 --- a/syft/pkg/rpm_metadata.go +++ b/syft/pkg/rpmdb_metadata.go @@ -7,8 +7,8 @@ import ( "github.com/package-url/packageurl-go" ) -// RpmMetadata represents all captured data for a RPM DB package entry. -type RpmMetadata struct { +// RpmdbMetadata represents all captured data for a RPM DB package entry. +type RpmdbMetadata struct { Name string `json:"name"` Version string `json:"version"` Epoch int `json:"epoch"` @@ -20,7 +20,7 @@ type RpmMetadata struct { Vendor string `json:"vendor"` } -func (m RpmMetadata) PackageURL(d distro.Distro) string { +func (m RpmdbMetadata) PackageURL(d distro.Distro) string { pURL := packageurl.NewPackageURL( packageurl.TypeRPM, d.Type.String(), diff --git a/syft/pkg/rpm_metadata_test.go b/syft/pkg/rpmdb_metadata_test.go similarity index 92% rename from syft/pkg/rpm_metadata_test.go rename to syft/pkg/rpmdb_metadata_test.go index 8b1c99d81..a38eaec05 100644 --- a/syft/pkg/rpm_metadata_test.go +++ b/syft/pkg/rpmdb_metadata_test.go @@ -1,22 +1,23 @@ package pkg import ( + "testing" + "github.com/anchore/syft/syft/distro" "github.com/sergi/go-diff/diffmatchpatch" - "testing" ) func TestRpmMetadata_pURL(t *testing.T) { tests := []struct { distro distro.Distro - metadata RpmMetadata + metadata RpmdbMetadata expected string }{ { distro: distro.Distro{ Type: distro.CentOS, }, - metadata: RpmMetadata{ + metadata: RpmdbMetadata{ Name: "p", Version: "v", Arch: "a", @@ -29,7 +30,7 @@ func TestRpmMetadata_pURL(t *testing.T) { distro: distro.Distro{ Type: distro.RedHat, }, - metadata: RpmMetadata{ + metadata: RpmdbMetadata{ Name: "p", Version: "v", Arch: "a", diff --git a/syft/pkg/type.go b/syft/pkg/type.go index 3d7ff672b..85dc83159 100644 --- a/syft/pkg/type.go +++ b/syft/pkg/type.go @@ -6,32 +6,25 @@ import "github.com/package-url/packageurl-go" type Type string const ( - UnknownPkg Type = "UnknownPackage" - ApkPkg Type = "apk" - GemPkg Type = "gem" - DebPkg Type = "deb" - EggPkg Type = "egg" - RpmPkg Type = "rpm" - WheelPkg Type = "wheel" - PoetryPkg Type = "poetry" - NpmPkg Type = "npm" - PythonRequirementsPkg Type = "python-requirements" - PythonSetupPkg Type = "python-setup" - JavaPkg Type = "java-archive" - JenkinsPluginPkg Type = "jenkins-plugin" - GoModulePkg Type = "go-module" + UnknownPkg Type = "UnknownPackage" + ApkPkg Type = "apk" + GemPkg Type = "gem" + DebPkg Type = "deb" + RpmPkg Type = "rpm" + NpmPkg Type = "npm" + PythonPkg Type = "python" + JavaPkg Type = "java-archive" + JenkinsPluginPkg Type = "jenkins-plugin" + GoModulePkg Type = "go-module" ) var AllPkgs = []Type{ ApkPkg, GemPkg, DebPkg, - EggPkg, RpmPkg, - WheelPkg, NpmPkg, - PythonRequirementsPkg, - PythonSetupPkg, + PythonPkg, JavaPkg, JenkinsPluginPkg, GoModulePkg, @@ -45,7 +38,7 @@ func (t Type) PackageURLType() string { return packageurl.TypeGem case DebPkg: return "deb" - case EggPkg, WheelPkg, PythonRequirementsPkg, PythonSetupPkg: + case PythonPkg: return packageurl.TypePyPi case NpmPkg: return packageurl.TypeNPM diff --git a/syft/presenter/cyclonedx/presenter_test.go b/syft/presenter/cyclonedx/presenter_test.go index 2b3536866..d600d5309 100644 --- a/syft/presenter/cyclonedx/presenter_test.go +++ b/syft/presenter/cyclonedx/presenter_test.go @@ -3,11 +3,12 @@ package cyclonedx import ( "bytes" "flag" - "github.com/anchore/stereoscope/pkg/imagetest" - "github.com/anchore/syft/syft/distro" "regexp" "testing" + "github.com/anchore/stereoscope/pkg/imagetest" + "github.com/anchore/syft/syft/distro" + "github.com/anchore/go-testutils" "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/syft/pkg" @@ -109,7 +110,7 @@ func TestCycloneDxImgsPresenter(t *testing.T) { }, Type: pkg.RpmPkg, FoundBy: "the-cataloger-1", - Metadata: pkg.RpmMetadata{ + Metadata: pkg.RpmdbMetadata{ Name: "package1", Epoch: 0, Arch: "x86_64", @@ -133,7 +134,7 @@ func TestCycloneDxImgsPresenter(t *testing.T) { "MIT", "Apache-v2", }, - Metadata: pkg.RpmMetadata{ + Metadata: pkg.RpmdbMetadata{ Name: "package2", Epoch: 0, Arch: "x86_64", diff --git a/syft/scope/resolver.go b/syft/scope/resolver.go index 74a20783a..d32740f3b 100644 --- a/syft/scope/resolver.go +++ b/syft/scope/resolver.go @@ -23,8 +23,13 @@ type ContentResolver interface { // FileResolver knows how to get file.References for given string paths and globs type FileResolver interface { + // FilesByPath fetches a set of file references which have the given path (for an image, there may be multiple matches) FilesByPath(paths ...file.Path) ([]file.Reference, error) + // FilesByGlob fetches a set of file references which the given glob matches FilesByGlob(patterns ...string) ([]file.Reference, error) + // RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference. + // This is helpful when attempting to find a file that is in the same layer or lower as another file. + RelativeFileByPath(reference file.Reference, path string) (*file.Reference, error) } // getImageResolver returns the appropriate resolve for a container image given the scope option diff --git a/syft/scope/resolvers/all_layers_resolver.go b/syft/scope/resolvers/all_layers_resolver.go index 757d8129f..049d4a19d 100644 --- a/syft/scope/resolvers/all_layers_resolver.go +++ b/syft/scope/resolvers/all_layers_resolver.go @@ -109,6 +109,15 @@ func (r *AllLayersResolver) FilesByGlob(patterns ...string) ([]file.Reference, e return uniqueFiles, nil } +func (r *AllLayersResolver) RelativeFileByPath(reference file.Reference, path string) (*file.Reference, error) { + entry, err := r.img.FileCatalog.Get(reference) + if err != nil { + return nil, err + } + + return entry.Source.SquashedTree.File(file.Path(path)), nil +} + // MultipleFileContentsByRef returns the file contents for all file.References relative to the image. Note that a // file.Reference is a path relative to a particular layer. func (r *AllLayersResolver) MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]string, error) { diff --git a/syft/scope/resolvers/directory_resolver.go b/syft/scope/resolvers/directory_resolver.go index 54db895bc..3d8d4ed07 100644 --- a/syft/scope/resolvers/directory_resolver.go +++ b/syft/scope/resolvers/directory_resolver.go @@ -5,6 +5,7 @@ import ( "io/ioutil" "os" "path" + "path/filepath" "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/internal/log" @@ -18,7 +19,7 @@ type DirectoryResolver struct { // Stringer to represent a directory path data source func (s DirectoryResolver) String() string { - return fmt.Sprintf("dir://%s", s.Path) + return fmt.Sprintf("dir:%s", s.Path) } // FilesByPath returns all file.References that match the given paths from the directory. @@ -26,15 +27,19 @@ func (s DirectoryResolver) FilesByPath(userPaths ...file.Path) ([]file.Reference var references = make([]file.Reference, 0) for _, userPath := range userPaths { - resolvedPath := path.Join(s.Path, string(userPath)) - _, err := os.Stat(resolvedPath) + userStrPath := string(userPath) + + if filepath.IsAbs(userStrPath) { + // a path relative to root should be prefixed with the resolvers directory path, otherwise it should be left as is + userStrPath = path.Join(s.Path, userStrPath) + } + _, err := os.Stat(userStrPath) if os.IsNotExist(err) { continue } else if err != nil { - log.Errorf("path (%s) is not valid: %v", resolvedPath, err) + log.Errorf("path (%s) is not valid: %v", userStrPath, err) } - filePath := file.Path(resolvedPath) - references = append(references, file.NewFileReference(filePath)) + references = append(references, file.NewFileReference(file.Path(userStrPath))) } return references, nil @@ -75,6 +80,18 @@ func (s DirectoryResolver) FilesByGlob(patterns ...string) ([]file.Reference, er return result, nil } +func (s *DirectoryResolver) RelativeFileByPath(_ file.Reference, path string) (*file.Reference, error) { + paths, err := s.FilesByPath(file.Path(path)) + if err != nil { + return nil, err + } + if len(paths) == 0 { + return nil, nil + } + + return &paths[0], nil +} + // MultipleFileContentsByRef returns the file contents for all file.References relative a directory. func (s DirectoryResolver) MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]string, error) { refContents := make(map[file.Reference]string) @@ -91,10 +108,10 @@ func (s DirectoryResolver) MultipleFileContentsByRef(f ...file.Reference) (map[f // FileContentsByRef fetches file contents for a single file reference relative to a directory. // If the path does not exist an error is returned. -func (s DirectoryResolver) FileContentsByRef(ref file.Reference) (string, error) { - contents, err := fileContents(ref.Path) +func (s DirectoryResolver) FileContentsByRef(reference file.Reference) (string, error) { + contents, err := fileContents(reference.Path) if err != nil { - return "", fmt.Errorf("could not read contents of file: %s", ref.Path) + return "", fmt.Errorf("could not read contents of file: %s", reference.Path) } return string(contents), nil diff --git a/syft/scope/resolvers/directory_resolver_test.go b/syft/scope/resolvers/directory_resolver_test.go index c70f8a4ff..5f2b93e19 100644 --- a/syft/scope/resolvers/directory_resolver_test.go +++ b/syft/scope/resolvers/directory_resolver_test.go @@ -1,7 +1,6 @@ package resolvers import ( - "path" "testing" "github.com/anchore/stereoscope/pkg/file" @@ -10,24 +9,49 @@ import ( func TestDirectoryResolver_FilesByPath(t *testing.T) { cases := []struct { name string + root string input string + expected string refCount int }{ { - name: "finds a file", - input: "image-symlinks/file-1.txt", + name: "finds a file (relative)", + root: "./test-fixtures/", + input: "test-fixtures/image-symlinks/file-1.txt", + expected: "test-fixtures/image-symlinks/file-1.txt", refCount: 1, }, { - name: "managed non-existing files", - input: "image-symlinks/bogus.txt", + name: "finds a file with relative indirection", + root: "./test-fixtures/../test-fixtures", + input: "test-fixtures/image-symlinks/file-1.txt", + expected: "test-fixtures/image-symlinks/file-1.txt", + refCount: 1, + }, + { + // note: this is asserting the old behavior is not supported + name: "relative lookup with wrong path fails", + root: "./test-fixtures/", + input: "image-symlinks/file-1.txt", refCount: 0, }, + { + name: "managed non-existing files (relative)", + root: "./test-fixtures/", + input: "test-fixtures/image-symlinks/bogus.txt", + refCount: 0, + }, + { + name: "finds a file (absolute)", + root: "./test-fixtures/", + input: "/image-symlinks/file-1.txt", + expected: "test-fixtures/image-symlinks/file-1.txt", + refCount: 1, + }, } for _, c := range cases { t.Run(c.name, func(t *testing.T) { - resolver := DirectoryResolver{"test-fixtures"} - expected := path.Join("test-fixtures", c.input) + resolver := DirectoryResolver{c.root} refs, err := resolver.FilesByPath(file.Path(c.input)) if err != nil { t.Fatalf("could not use resolver: %+v, %+v", err, refs) @@ -38,8 +62,8 @@ func TestDirectoryResolver_FilesByPath(t *testing.T) { } for _, actual := range refs { - if actual.Path != file.Path(expected) { - t.Errorf("bad resolve path: '%s'!='%s'", actual.Path, c.input) + if actual.Path != file.Path(c.expected) { + t.Errorf("bad resolve path: '%s'!='%s'", actual.Path, c.expected) } } }) @@ -54,17 +78,17 @@ func TestDirectoryResolver_MultipleFilesByPath(t *testing.T) { }{ { name: "finds multiple files", - input: []file.Path{file.Path("image-symlinks/file-1.txt"), file.Path("image-symlinks/file-2.txt")}, + input: []file.Path{file.Path("test-fixtures/image-symlinks/file-1.txt"), file.Path("test-fixtures/image-symlinks/file-2.txt")}, refCount: 2, }, { name: "skips non-existing files", - input: []file.Path{file.Path("image-symlinks/bogus.txt"), file.Path("image-symlinks/file-1.txt")}, + input: []file.Path{file.Path("test-fixtures/image-symlinks/bogus.txt"), file.Path("test-fixtures/image-symlinks/file-1.txt")}, refCount: 1, }, { name: "does not return anything for non-existing directories", - input: []file.Path{file.Path("non-existing/bogus.txt"), file.Path("non-existing/file-1.txt")}, + input: []file.Path{file.Path("test-fixtures/non-existing/bogus.txt"), file.Path("test-fixtures/non-existing/file-1.txt")}, refCount: 0, }, } @@ -93,17 +117,17 @@ func TestDirectoryResolver_MultipleFileContentsByRef(t *testing.T) { }{ { name: "gets multiple file contents", - input: []file.Path{file.Path("image-symlinks/file-1.txt"), file.Path("image-symlinks/file-2.txt")}, + input: []file.Path{file.Path("test-fixtures/image-symlinks/file-1.txt"), file.Path("test-fixtures/image-symlinks/file-2.txt")}, refCount: 2, }, { name: "skips non-existing files", - input: []file.Path{file.Path("image-symlinks/bogus.txt"), file.Path("image-symlinks/file-1.txt")}, + input: []file.Path{file.Path("test-fixtures/image-symlinks/bogus.txt"), file.Path("test-fixtures/image-symlinks/file-1.txt")}, refCount: 1, }, { name: "does not return anything for non-existing directories", - input: []file.Path{file.Path("non-existing/bogus.txt"), file.Path("non-existing/file-1.txt")}, + input: []file.Path{file.Path("test-fixtures/non-existing/bogus.txt"), file.Path("test-fixtures/non-existing/file-1.txt")}, refCount: 0, }, } diff --git a/syft/scope/resolvers/image_squash_resolver.go b/syft/scope/resolvers/image_squash_resolver.go index 429adff34..1e398f0ac 100644 --- a/syft/scope/resolvers/image_squash_resolver.go +++ b/syft/scope/resolvers/image_squash_resolver.go @@ -73,6 +73,18 @@ func (r *ImageSquashResolver) FilesByGlob(patterns ...string) ([]file.Reference, return uniqueFiles, nil } +func (r *ImageSquashResolver) RelativeFileByPath(reference file.Reference, path string) (*file.Reference, error) { + paths, err := r.FilesByPath(file.Path(path)) + if err != nil { + return nil, err + } + if len(paths) == 0 { + return nil, nil + } + + return &paths[0], nil +} + // MultipleFileContentsByRef returns the file contents for all file.References relative to the image. Note that a // file.Reference is a path relative to a particular layer, in this case only from the squashed representation. func (r *ImageSquashResolver) MultipleFileContentsByRef(f ...file.Reference) (map[file.Reference]string, error) { diff --git a/syft/scope/scope_test.go b/syft/scope/scope_test.go index deb55c1d5..1597ed7f2 100644 --- a/syft/scope/scope_test.go +++ b/syft/scope/scope_test.go @@ -61,13 +61,13 @@ func TestDirectoryScope(t *testing.T) { { desc: "path detected", input: "test-fixtures", - inputPaths: []file.Path{file.Path("path-detected")}, + inputPaths: []file.Path{file.Path("test-fixtures/path-detected")}, expRefs: 1, }, { desc: "no files-by-path detected", input: "test-fixtures", - inputPaths: []file.Path{file.Path("no-path-detected")}, + inputPaths: []file.Path{file.Path("test-fixtures/no-path-detected")}, expRefs: 0, }, } @@ -105,13 +105,13 @@ func TestMultipleFileContentsByRefContents(t *testing.T) { { input: "test-fixtures/path-detected", desc: "empty file", - path: "empty", + path: "test-fixtures/path-detected/empty", expected: "", }, { input: "test-fixtures/path-detected", desc: "file has contents", - path: ".vimrc", + path: "test-fixtures/path-detected/.vimrc", expected: "\" A .vimrc file\n", }, } @@ -127,7 +127,7 @@ func TestMultipleFileContentsByRefContents(t *testing.T) { } if len(refs) != 1 { - t.Errorf("expected a single ref to be generated but got: %d", len(refs)) + t.Fatalf("expected a single ref to be generated but got: %d", len(refs)) } ref := refs[0] diff --git a/test/integration/pkg_cases_test.go b/test/integration/pkg_cases_test.go index 3eb83e45e..26727d688 100644 --- a/test/integration/pkg_cases_test.go +++ b/test/integration/pkg_cases_test.go @@ -26,6 +26,17 @@ var imageOnlyTestCases = []testCase{ "npm": "6.14.6", }, }, + { + name: "find python egg & wheel packages", + pkgType: pkg.PythonPkg, + pkgLanguage: pkg.Python, + pkgInfo: map[string]string{ + "Pygments": "2.6.1", + "requests": "2.22.0", + "somerequests": "3.22.0", + "someotherpkg": "3.19.0", + }, + }, } var dirOnlyTestCases = []testCase{ @@ -96,6 +107,26 @@ var dirOnlyTestCases = []testCase{ "get-stdin": "8.0.0", }, }, + { + name: "find python requirements.txt & setup.py package references", + pkgType: pkg.PythonPkg, + pkgLanguage: pkg.Python, + pkgInfo: map[string]string{ + // dir specific test cases + "flask": "4.0.0", + "python-dateutil": "2.8.1", + "python-swiftclient": "3.8.1", + "pytz": "2019.3", + "jsonschema": "2.6.0", + "passlib": "1.7.2", + "mypy": "v0.770", + // common to image and directory + "Pygments": "2.6.1", + "requests": "2.22.0", + "somerequests": "3.22.0", + "someotherpkg": "3.19.0", + }, + }, } var commonTestCases = []testCase{ @@ -131,46 +162,6 @@ var commonTestCases = []testCase{ "example-jenkins-plugin": "1.0-SNAPSHOT", }, }, - { - name: "find python wheel packages", - pkgType: pkg.WheelPkg, - pkgLanguage: pkg.Python, - pkgInfo: map[string]string{ - "Pygments": "2.6.1", - "requests": "2.10.0", - }, - }, - { - name: "find python egg packages", - pkgType: pkg.EggPkg, - pkgLanguage: pkg.Python, - pkgInfo: map[string]string{ - "requests": "2.22.0", - "otherpkg": "2.19.0", - }, - }, - { - name: "find python requirements.txt packages", - pkgType: pkg.PythonRequirementsPkg, - pkgLanguage: pkg.Python, - pkgInfo: map[string]string{ - "flask": "4.0.0", - "python-dateutil": "2.8.1", - "python-swiftclient": "3.8.1", - "pytz": "2019.3", - "jsonschema": "2.6.0", - "passlib": "1.7.2", - "pathlib": "1.0.1", - }, - }, - { - name: "find python setup.py packages", - pkgType: pkg.PythonSetupPkg, - pkgLanguage: pkg.Python, - pkgInfo: map[string]string{ - "mypy": "v0.770", - }, - }, { name: "find apkdb packages", diff --git a/test/integration/pkg_coverage_test.go b/test/integration/pkg_coverage_test.go index 974421a96..13a5af513 100644 --- a/test/integration/pkg_coverage_test.go +++ b/test/integration/pkg_coverage_test.go @@ -68,6 +68,7 @@ func TestPkgCoverageImage(t *testing.T) { } if pkgCount != len(c.pkgInfo) { + t.Logf("Discovered packages of type %+v", c.pkgType) for a := range catalog.Enumerate(c.pkgType) { t.Log(" ", a) } diff --git a/test/integration/test-fixtures/image-pkg-coverage/python/dist-info/RECORD b/test/integration/test-fixtures/image-pkg-coverage/python/dist-info/RECORD new file mode 100644 index 000000000..af233f390 --- /dev/null +++ b/test/integration/test-fixtures/image-pkg-coverage/python/dist-info/RECORD @@ -0,0 +1,5 @@ +../../../bin/pygmentize,sha256=dDhv_U2jiCpmFQwIRHpFRLAHUO4R1jIJPEvT_QYTFp8,220 +Pygments-2.6.1.dist-info/AUTHORS,sha256=PVpa2_Oku6BGuiUvutvuPnWGpzxqFy2I8-NIrqCvqUY,8449 +Pygments-2.6.1.dist-info/RECORD,, +pygments/__pycache__/__init__.cpython-38.pyc,, +pygments/util.py,sha256=586xXHiJGGZxqk5PMBu3vBhE68DLuAe5MBARWrSPGxA,10778 \ No newline at end of file diff --git a/test/integration/test-fixtures/image-pkg-coverage/python/dist-info/top_level.txt b/test/integration/test-fixtures/image-pkg-coverage/python/dist-info/top_level.txt new file mode 100644 index 000000000..1e09fdb90 --- /dev/null +++ b/test/integration/test-fixtures/image-pkg-coverage/python/dist-info/top_level.txt @@ -0,0 +1 @@ +top-level-pkg \ No newline at end of file diff --git a/test/integration/test-fixtures/image-pkg-coverage/python/egg-info/top_level.txt b/test/integration/test-fixtures/image-pkg-coverage/python/egg-info/top_level.txt new file mode 100644 index 000000000..1e09fdb90 --- /dev/null +++ b/test/integration/test-fixtures/image-pkg-coverage/python/egg-info/top_level.txt @@ -0,0 +1 @@ +top-level-pkg \ No newline at end of file diff --git a/test/integration/test-fixtures/image-pkg-coverage/python/requires/requirements-dev.txt b/test/integration/test-fixtures/image-pkg-coverage/python/requires/requirements-dev.txt index 4b9365fe9..16c74e8b5 100644 --- a/test/integration/test-fixtures/image-pkg-coverage/python/requires/requirements-dev.txt +++ b/test/integration/test-fixtures/image-pkg-coverage/python/requires/requirements-dev.txt @@ -1,3 +1,2 @@ jsonschema==2.6.0 -passlib==1.7.2 -pathlib==1.0.1 \ No newline at end of file +passlib==1.7.2 \ No newline at end of file diff --git a/test/integration/test-fixtures/image-pkg-coverage/python/otherpkg-2.19.0-py3.8.egg-info/PKG-INFO b/test/integration/test-fixtures/image-pkg-coverage/python/someotherpkg-3.19.0-py3.8.egg-info/PKG-INFO similarity index 82% rename from test/integration/test-fixtures/image-pkg-coverage/python/otherpkg-2.19.0-py3.8.egg-info/PKG-INFO rename to test/integration/test-fixtures/image-pkg-coverage/python/someotherpkg-3.19.0-py3.8.egg-info/PKG-INFO index 05a01e1c8..6d4758610 100644 --- a/test/integration/test-fixtures/image-pkg-coverage/python/otherpkg-2.19.0-py3.8.egg-info/PKG-INFO +++ b/test/integration/test-fixtures/image-pkg-coverage/python/someotherpkg-3.19.0-py3.8.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 -Name: otherpkg -Version: 2.19.0 +Name: someotherpkg +Version: 3.19.0 Summary: Python HTTP for Humans. Home-page: http://python-requests.org Author: Kenneth Reitz diff --git a/test/integration/test-fixtures/image-pkg-coverage/python/someotherpkg-3.19.0-py3.8.egg-info/top_level.txt b/test/integration/test-fixtures/image-pkg-coverage/python/someotherpkg-3.19.0-py3.8.egg-info/top_level.txt new file mode 100644 index 000000000..1e09fdb90 --- /dev/null +++ b/test/integration/test-fixtures/image-pkg-coverage/python/someotherpkg-3.19.0-py3.8.egg-info/top_level.txt @@ -0,0 +1 @@ +top-level-pkg \ No newline at end of file diff --git a/test/integration/test-fixtures/image-pkg-coverage/python/requests-2.10.0.dist-info/METADATA b/test/integration/test-fixtures/image-pkg-coverage/python/somerequests-3.22.0.dist-info/METADATA similarity index 96% rename from test/integration/test-fixtures/image-pkg-coverage/python/requests-2.10.0.dist-info/METADATA rename to test/integration/test-fixtures/image-pkg-coverage/python/somerequests-3.22.0.dist-info/METADATA index cef498e16..ddc02219b 100644 --- a/test/integration/test-fixtures/image-pkg-coverage/python/requests-2.10.0.dist-info/METADATA +++ b/test/integration/test-fixtures/image-pkg-coverage/python/somerequests-3.22.0.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 -Name: requests -Version: 2.10.0 +Name: somerequests +Version: 3.22.0 Summary: stuff Home-page: stuff Author: Georg Brandl diff --git a/test/integration/test-fixtures/image-pkg-coverage/python/somerequests-3.22.0.dist-info/top_level.txt b/test/integration/test-fixtures/image-pkg-coverage/python/somerequests-3.22.0.dist-info/top_level.txt new file mode 100644 index 000000000..1e09fdb90 --- /dev/null +++ b/test/integration/test-fixtures/image-pkg-coverage/python/somerequests-3.22.0.dist-info/top_level.txt @@ -0,0 +1 @@ +top-level-pkg \ No newline at end of file