From 1076281566ed0c2d35b45a702204f4f40b619faf Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 4 Nov 2022 08:59:03 -0400 Subject: [PATCH] port python cataloger to new generic cataloger pattern (#1319) Signed-off-by: Alex Goodman Signed-off-by: Alex Goodman --- syft/pkg/cataloger/python/cataloger.go | 26 +++++ ...ge_cataloger_test.go => cataloger_test.go} | 29 +++-- syft/pkg/cataloger/python/index_cataloger.go | 20 ---- syft/pkg/cataloger/python/package.go | 81 ++++++++++++++ syft/pkg/cataloger/python/package_test.go | 46 ++++++++ .../cataloger/python/parse_pipfile_lock.go | 29 ++--- .../python/parse_pipfile_lock_test.go | 73 ++++++------- .../pkg/cataloger/python/parse_poetry_lock.go | 31 ++++-- .../python/parse_poetry_lock_test.go | 71 ++++++------- .../cataloger/python/parse_requirements.go | 24 ++--- .../python/parse_requirements_test.go | 61 +++++------ syft/pkg/cataloger/python/parse_setup.go | 28 ++--- syft/pkg/cataloger/python/parse_setup_test.go | 81 +++++++------- ...ackage_cataloger.go => parse_wheel_egg.go} | 100 +++++------------- syft/pkg/cataloger/python/poetry_metadata.go | 18 ---- .../python/poetry_metadata_package.go | 21 ---- syft/pkg/python_package_metadata.go | 39 +------ syft/pkg/python_package_metadata_test.go | 49 --------- syft/pkg/url_test.go | 14 +-- 19 files changed, 381 insertions(+), 460 deletions(-) create mode 100644 syft/pkg/cataloger/python/cataloger.go rename syft/pkg/cataloger/python/{package_cataloger_test.go => cataloger_test.go} (92%) delete mode 100644 syft/pkg/cataloger/python/index_cataloger.go create mode 100644 syft/pkg/cataloger/python/package.go create mode 100644 syft/pkg/cataloger/python/package_test.go rename syft/pkg/cataloger/python/{package_cataloger.go => parse_wheel_egg.go} (65%) delete mode 100644 syft/pkg/cataloger/python/poetry_metadata.go delete mode 100644 syft/pkg/cataloger/python/poetry_metadata_package.go diff --git a/syft/pkg/cataloger/python/cataloger.go b/syft/pkg/cataloger/python/cataloger.go new file mode 100644 index 000000000..cd2ba358b --- /dev/null +++ b/syft/pkg/cataloger/python/cataloger.go @@ -0,0 +1,26 @@ +package python + +import ( + "github.com/anchore/syft/syft/pkg/cataloger/generic" +) + +const ( + eggMetadataGlob = "**/*egg-info/PKG-INFO" + eggFileMetadataGlob = "**/*.egg-info" + wheelMetadataGlob = "**/*dist-info/METADATA" +) + +// NewPythonIndexCataloger returns a new cataloger for python packages referenced from poetry lock files, requirements.txt files, and setup.py files. +func NewPythonIndexCataloger() *generic.Cataloger { + return generic.NewCataloger("python-index-cataloger"). + WithParserByGlobs(parseRequirementsTxt, "**/*requirements*.txt"). + WithParserByGlobs(parsePoetryLock, "**/poetry.lock"). + WithParserByGlobs(parsePipfileLock, "**/Pipfile.lock"). + WithParserByGlobs(parseSetup, "**/setup.py") +} + +// NewPythonPackageCataloger returns a new cataloger for python packages within egg or wheel installation directories. +func NewPythonPackageCataloger() *generic.Cataloger { + return generic.NewCataloger("python-package-cataloger"). + WithParserByGlobs(parseWheelOrEgg, eggMetadataGlob, eggFileMetadataGlob, wheelMetadataGlob) +} diff --git a/syft/pkg/cataloger/python/package_cataloger_test.go b/syft/pkg/cataloger/python/cataloger_test.go similarity index 92% rename from syft/pkg/cataloger/python/package_cataloger_test.go rename to syft/pkg/cataloger/python/cataloger_test.go index b83860931..0c02ac36f 100644 --- a/syft/pkg/cataloger/python/package_cataloger_test.go +++ b/syft/pkg/cataloger/python/cataloger_test.go @@ -3,13 +3,12 @@ package python import ( "testing" - "github.com/go-test/deep" - "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" "github.com/anchore/syft/syft/source" ) -func TestPythonPackageWheelCataloger(t *testing.T) { +func Test_PackageCataloger(t *testing.T) { tests := []struct { name string fixtures []string @@ -20,6 +19,7 @@ func TestPythonPackageWheelCataloger(t *testing.T) { fixtures: []string{"test-fixtures/no-version-py3.8.egg-info"}, expectedPackage: pkg.Package{ Name: "no-version", + PURL: "pkg:pypi/no-version", Type: pkg.PythonPkg, Language: pkg.Python, FoundBy: "python-package-cataloger", @@ -40,6 +40,7 @@ func TestPythonPackageWheelCataloger(t *testing.T) { expectedPackage: pkg.Package{ Name: "requests", Version: "2.22.0", + PURL: "pkg:pypi/requests@2.22.0", Type: pkg.PythonPkg, Language: pkg.Python, Licenses: []string{"Apache 2.0"}, @@ -76,6 +77,7 @@ func TestPythonPackageWheelCataloger(t *testing.T) { expectedPackage: pkg.Package{ Name: "Pygments", Version: "2.6.1", + PURL: "pkg:pypi/Pygments@2.6.1?vcs_url=git+https://github.com/python-test/test.git%40aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", Type: pkg.PythonPkg, Language: pkg.Python, Licenses: []string{"BSD License"}, @@ -112,6 +114,7 @@ func TestPythonPackageWheelCataloger(t *testing.T) { expectedPackage: pkg.Package{ Name: "Pygments", Version: "2.6.1", + PURL: "pkg:pypi/Pygments@2.6.1", Type: pkg.PythonPkg, Language: pkg.Python, Licenses: []string{"BSD License"}, @@ -142,6 +145,7 @@ func TestPythonPackageWheelCataloger(t *testing.T) { expectedPackage: pkg.Package{ Name: "Pygments", Version: "2.6.1", + PURL: "pkg:pypi/Pygments@2.6.1", Type: pkg.PythonPkg, Language: pkg.Python, Licenses: []string{"BSD License"}, @@ -164,6 +168,7 @@ func TestPythonPackageWheelCataloger(t *testing.T) { expectedPackage: pkg.Package{ Name: "requests", Version: "2.22.0", + PURL: "pkg:pypi/requests@2.22.0", Type: pkg.PythonPkg, Language: pkg.Python, Licenses: []string{"Apache 2.0"}, @@ -193,23 +198,15 @@ func TestPythonPackageWheelCataloger(t *testing.T) { test.expectedPackage.Locations = source.NewLocationSet(locations...) - actual, _, err := NewPythonPackageCataloger().Catalog(resolver) - if err != nil { - t.Fatalf("failed to catalog python package: %+v", err) - } - - if len(actual) != 1 { - t.Fatalf("unexpected number of packages: %d", len(actual)) - } - - for _, d := range deep.Equal(test.expectedPackage, actual[0]) { - t.Errorf("diff: %+v", d) - } + pkgtest.NewCatalogTester(). + WithResolver(resolver). + Expects([]pkg.Package{test.expectedPackage}, nil). + TestCataloger(t, NewPythonPackageCataloger()) }) } } -func TestIgnorePackage(t *testing.T) { +func Test_PackageCataloger_IgnorePackage(t *testing.T) { tests := []struct { MetadataFixture string }{ diff --git a/syft/pkg/cataloger/python/index_cataloger.go b/syft/pkg/cataloger/python/index_cataloger.go deleted file mode 100644 index 4ebfe408e..000000000 --- a/syft/pkg/cataloger/python/index_cataloger.go +++ /dev/null @@ -1,20 +0,0 @@ -/* -Package python provides a concrete Cataloger implementation for Python ecosystem files (egg, wheel, requirements.txt). -*/ -package python - -import ( - "github.com/anchore/syft/syft/pkg/cataloger/common" -) - -// NewPythonIndexCataloger returns a new cataloger for python packages referenced from poetry lock files, requirements.txt files, and setup.py files. -func NewPythonIndexCataloger() *common.GenericCataloger { - globParsers := map[string]common.ParserFn{ - "**/*requirements*.txt": parseRequirementsTxt, - "**/poetry.lock": parsePoetryLock, - "**/Pipfile.lock": parsePipfileLock, - "**/setup.py": parseSetup, - } - - return common.NewGenericCataloger(nil, globParsers, "python-index-cataloger") -} diff --git a/syft/pkg/cataloger/python/package.go b/syft/pkg/cataloger/python/package.go new file mode 100644 index 000000000..dea6a4a13 --- /dev/null +++ b/syft/pkg/cataloger/python/package.go @@ -0,0 +1,81 @@ +package python + +import ( + "fmt" + + "github.com/anchore/packageurl-go" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/source" +) + +func newPackageForIndex(name, version string, locations ...source.Location) pkg.Package { + p := pkg.Package{ + Name: name, + Version: version, + Locations: source.NewLocationSet(locations...), + PURL: packageURL(name, version, nil), + Language: pkg.Python, + Type: pkg.PythonPkg, + } + + p.SetID() + + return p +} + +func newPackageForPackage(m pkg.PythonPackageMetadata, sources ...source.Location) pkg.Package { + var licenses []string + if m.License != "" { + licenses = []string{m.License} + } + + p := pkg.Package{ + Name: m.Name, + Version: m.Version, + PURL: packageURL(m.Name, m.Version, &m), + Locations: source.NewLocationSet(sources...), + Licenses: licenses, + Language: pkg.Python, + Type: pkg.PythonPkg, + MetadataType: pkg.PythonPackageMetadataType, + Metadata: m, + } + + p.SetID() + return p +} + +func packageURL(name, version string, m *pkg.PythonPackageMetadata) string { + // generate a purl from the package data + pURL := packageurl.NewPackageURL( + packageurl.TypePyPi, + "", + name, + version, + purlQualifiersForPackage(m), + "") + + return pURL.ToString() +} + +func purlQualifiersForPackage(m *pkg.PythonPackageMetadata) packageurl.Qualifiers { + q := packageurl.Qualifiers{} + if m == nil { + return q + } + if m.DirectURLOrigin != nil { + q = append(q, vcsURLQualifierForPackage(m.DirectURLOrigin)...) + } + return q +} + +func vcsURLQualifierForPackage(p *pkg.PythonDirectURLOriginInfo) packageurl.Qualifiers { + if p == nil || p.VCS == "" { + return nil + } + // Taken from https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst#known-qualifiers-keyvalue-pairs + // packageurl-go still doesn't support all qualifier names + return packageurl.Qualifiers{ + {Key: pkg.PURLQualifierVCSURL, Value: fmt.Sprintf("%s+%s@%s", p.VCS, p.URL, p.CommitID)}, + } +} diff --git a/syft/pkg/cataloger/python/package_test.go b/syft/pkg/cataloger/python/package_test.go new file mode 100644 index 000000000..5792661dd --- /dev/null +++ b/syft/pkg/cataloger/python/package_test.go @@ -0,0 +1,46 @@ +package python + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/anchore/syft/syft/pkg" +) + +func Test_packageURL(t *testing.T) { + tests := []struct { + testName string + name string + version string + metadata *pkg.PythonPackageMetadata + want string + }{ + { + testName: "without metadata", + name: "name", + version: "v0.1.0", + want: "pkg:pypi/name@v0.1.0", + }, + { + testName: "with vcs info", + name: "name", + version: "v0.1.0", + metadata: &pkg.PythonPackageMetadata{ + Name: "bogus", // note: ignored + Version: "v0.2.0", // note: ignored + DirectURLOrigin: &pkg.PythonDirectURLOriginInfo{ + VCS: "git", + URL: "https://github.com/test/test.git", + CommitID: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + }, + }, + want: "pkg:pypi/name@v0.1.0?vcs_url=git+https://github.com/test/test.git%40aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + }, + } + for _, tt := range tests { + t.Run(tt.testName, func(t *testing.T) { + assert.Equal(t, tt.want, packageURL(tt.name, tt.version, tt.metadata)) + }) + } +} diff --git a/syft/pkg/cataloger/python/parse_pipfile_lock.go b/syft/pkg/cataloger/python/parse_pipfile_lock.go index e1790ce1b..6c19755c8 100644 --- a/syft/pkg/cataloger/python/parse_pipfile_lock.go +++ b/syft/pkg/cataloger/python/parse_pipfile_lock.go @@ -4,15 +4,15 @@ import ( "encoding/json" "fmt" "io" - "sort" "strings" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/pkg/cataloger/common" + "github.com/anchore/syft/syft/pkg/cataloger/generic" + "github.com/anchore/syft/syft/source" ) -type PipfileLock struct { +type pipfileLock struct { Meta struct { Hash struct { Sha256 string `json:"sha256"` @@ -35,16 +35,15 @@ type Dependency struct { Version string `json:"version"` } -// integrity check -var _ common.ParserFn = parsePipfileLock +var _ generic.Parser = parsePipfileLock // parsePipfileLock is a parser function for Pipfile.lock contents, returning "Default" python packages discovered. -func parsePipfileLock(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { - packages := make([]*pkg.Package, 0) +func parsePipfileLock(_ source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { + pkgs := make([]pkg.Package, 0) dec := json.NewDecoder(reader) for { - var lock PipfileLock + var lock pipfileLock if err := dec.Decode(&lock); err == io.EOF { break } else if err != nil { @@ -52,19 +51,11 @@ func parsePipfileLock(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Re } for name, pkgMeta := range lock.Default { version := strings.TrimPrefix(pkgMeta.Version, "==") - packages = append(packages, &pkg.Package{ - Name: name, - Version: version, - Language: pkg.Python, - Type: pkg.PythonPkg, - }) + pkgs = append(pkgs, newPackageForIndex(name, version, reader.Location)) } } - // Without sorting the packages slice, the order of packages will be unstable, due to ranging over a map. - sort.Slice(packages, func(i, j int) bool { - return packages[i].String() < packages[j].String() - }) + pkg.Sort(pkgs) - return packages, nil, nil + return pkgs, nil, nil } diff --git a/syft/pkg/cataloger/python/parse_pipfile_lock_test.go b/syft/pkg/cataloger/python/parse_pipfile_lock_test.go index 42495a1f8..7b9a2e1fc 100644 --- a/syft/pkg/cataloger/python/parse_pipfile_lock_test.go +++ b/syft/pkg/cataloger/python/parse_pipfile_lock_test.go @@ -1,62 +1,55 @@ package python import ( - "os" "testing" - "github.com/google/go-cmp/cmp" - + "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" "github.com/anchore/syft/syft/source" ) func TestParsePipFileLock(t *testing.T) { - expected := []*pkg.Package{ + + fixture := "test-fixtures/pipfile-lock/Pipfile.lock" + locations := source.NewLocationSet(source.NewLocation(fixture)) + expectedPkgs := []pkg.Package{ { - Name: "aio-pika", - Version: "6.8.0", - Language: pkg.Python, - Type: pkg.PythonPkg, + Name: "aio-pika", + Version: "6.8.0", + PURL: "pkg:pypi/aio-pika@6.8.0", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, }, { - Name: "aiodns", - Version: "2.0.0", - Language: pkg.Python, - Type: pkg.PythonPkg, + Name: "aiodns", + Version: "2.0.0", + PURL: "pkg:pypi/aiodns@2.0.0", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, }, { - Name: "aiohttp", - Version: "3.7.4.post0", - Language: pkg.Python, - Type: pkg.PythonPkg, + Name: "aiohttp", + Version: "3.7.4.post0", + PURL: "pkg:pypi/aiohttp@3.7.4.post0", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, }, { - Name: "aiohttp-jinja2", - Version: "1.4.2", - Language: pkg.Python, - Type: pkg.PythonPkg, + Name: "aiohttp-jinja2", + Version: "1.4.2", + PURL: "pkg:pypi/aiohttp-jinja2@1.4.2", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, }, } - fixture, err := os.Open("test-fixtures/pipfile-lock/Pipfile.lock") - if err != nil { - t.Fatalf("failed to open fixture: %+v", err) - } + // TODO: relationships are not under test + var expectedRelationships []artifact.Relationship - // TODO: no relationships are under test yet - actual, _, err := parsePipfileLock(fixture.Name(), fixture) - if err != nil { - t.Fatalf("failed to parse requirements: %+v", err) - } - - if diff := cmp.Diff(expected, actual, - cmp.AllowUnexported(pkg.Package{}), - cmp.Comparer( - func(x, y source.LocationSet) bool { - return cmp.Equal(x.ToSlice(), y.ToSlice()) - }, - ), - ); diff != "" { - t.Errorf("unexpected result from parsing (-expected +actual)\n%s", diff) - } + pkgtest.TestFileParser(t, fixture, parsePipfileLock, expectedPkgs, expectedRelationships) } diff --git a/syft/pkg/cataloger/python/parse_poetry_lock.go b/syft/pkg/cataloger/python/parse_poetry_lock.go index cee921f4c..52e33e618 100644 --- a/syft/pkg/cataloger/python/parse_poetry_lock.go +++ b/syft/pkg/cataloger/python/parse_poetry_lock.go @@ -2,30 +2,45 @@ package python import ( "fmt" - "io" "github.com/pelletier/go-toml" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/pkg/cataloger/common" + "github.com/anchore/syft/syft/pkg/cataloger/generic" + "github.com/anchore/syft/syft/source" ) // integrity check -var _ common.ParserFn = parsePoetryLock +var _ generic.Parser = parsePoetryLock + +type poetryMetadata struct { + Packages []struct { + Name string `toml:"name"` + Version string `toml:"version"` + Category string `toml:"category"` + Description string `toml:"description"` + Optional bool `toml:"optional"` + } `toml:"package"` +} // parsePoetryLock is a parser function for poetry.lock contents, returning all python packages discovered. -func parsePoetryLock(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { +func parsePoetryLock(_ source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { tree, err := toml.LoadReader(reader) if err != nil { - return nil, nil, fmt.Errorf("unable to load poetry.lock for parsing: %v", err) + return nil, nil, fmt.Errorf("unable to load poetry.lock for parsing: %w", err) } - metadata := PoetryMetadata{} + metadata := poetryMetadata{} err = tree.Unmarshal(&metadata) if err != nil { - return nil, nil, fmt.Errorf("unable to parse poetry.lock: %v", err) + return nil, nil, fmt.Errorf("unable to parse poetry.lock: %w", err) } - return metadata.Pkgs(), nil, nil + var pkgs []pkg.Package + for _, p := range metadata.Packages { + pkgs = append(pkgs, newPackageForIndex(p.Name, p.Version, reader.Location)) + } + + return pkgs, nil, nil } diff --git a/syft/pkg/cataloger/python/parse_poetry_lock_test.go b/syft/pkg/cataloger/python/parse_poetry_lock_test.go index 3abdf6061..0a3478e1b 100644 --- a/syft/pkg/cataloger/python/parse_poetry_lock_test.go +++ b/syft/pkg/cataloger/python/parse_poetry_lock_test.go @@ -1,59 +1,54 @@ package python import ( - "os" "testing" - "github.com/go-test/deep" - + "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" + "github.com/anchore/syft/syft/source" ) func TestParsePoetryLock(t *testing.T) { - expected := []*pkg.Package{ + fixture := "test-fixtures/poetry/poetry.lock" + locations := source.NewLocationSet(source.NewLocation(fixture)) + expectedPkgs := []pkg.Package{ { - Name: "added-value", - Version: "0.14.2", - Language: pkg.Python, - Type: pkg.PythonPkg, - Licenses: nil, + Name: "added-value", + Version: "0.14.2", + PURL: "pkg:pypi/added-value@0.14.2", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, }, { - Name: "alabaster", - Version: "0.7.12", - Language: pkg.Python, - Type: pkg.PythonPkg, - Licenses: nil, + Name: "alabaster", + Version: "0.7.12", + PURL: "pkg:pypi/alabaster@0.7.12", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, }, { - Name: "appnope", - Version: "0.1.0", - Language: pkg.Python, - Type: pkg.PythonPkg, - Licenses: nil, + Name: "appnope", + Version: "0.1.0", + PURL: "pkg:pypi/appnope@0.1.0", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, }, { - Name: "asciitree", - Version: "0.3.3", - Language: pkg.Python, - Type: pkg.PythonPkg, - Licenses: nil, + Name: "asciitree", + Version: "0.3.3", + PURL: "pkg:pypi/asciitree@0.3.3", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, }, } - fixture, err := os.Open("test-fixtures/poetry/poetry.lock") - if err != nil { - t.Fatalf("failed to open fixture: %+v", err) - } + // TODO: relationships are not under test + var expectedRelationships []artifact.Relationship - // TODO: no relationships are under test yet - actual, _, err := parsePoetryLock(fixture.Name(), fixture) - if err != nil { - t.Error(err) - } - - differences := deep.Equal(expected, actual) - if differences != nil { - t.Errorf("returned package list differed from expectation: %+v", differences) - } + pkgtest.TestFileParser(t, fixture, parsePoetryLock, expectedPkgs, expectedRelationships) } diff --git a/syft/pkg/cataloger/python/parse_requirements.go b/syft/pkg/cataloger/python/parse_requirements.go index 62443bcfa..bdc532554 100644 --- a/syft/pkg/cataloger/python/parse_requirements.go +++ b/syft/pkg/cataloger/python/parse_requirements.go @@ -3,21 +3,21 @@ package python import ( "bufio" "fmt" - "io" "strings" + "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/pkg/cataloger/common" + "github.com/anchore/syft/syft/pkg/cataloger/generic" + "github.com/anchore/syft/syft/source" ) -// integrity check -var _ common.ParserFn = parseRequirementsTxt +var _ generic.Parser = parseRequirementsTxt // parseRequirementsTxt takes a Python requirements.txt file, returning all Python packages that are locked to a // specific version. -func parseRequirementsTxt(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { - packages := make([]*pkg.Package, 0) +func parseRequirementsTxt(_ source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { + var packages []pkg.Package scanner := bufio.NewScanner(reader) for scanner.Scan() { @@ -42,14 +42,14 @@ func parseRequirementsTxt(_ string, reader io.Reader) ([]*pkg.Package, []artifac // parse a new requirement parts := strings.Split(line, "==") + if len(parts) < 2 { + // this should never happen, but just in case + log.WithFields("path", reader.RealPath).Warnf("unable to parse requirements.txt line: %q", line) + continue + } name := strings.TrimSpace(parts[0]) version := strings.TrimSpace(parts[1]) - packages = append(packages, &pkg.Package{ - Name: name, - Version: version, - Language: pkg.Python, - Type: pkg.PythonPkg, - }) + packages = append(packages, newPackageForIndex(name, version, reader.Location)) } if err := scanner.Err(); err != nil { diff --git a/syft/pkg/cataloger/python/parse_requirements_test.go b/syft/pkg/cataloger/python/parse_requirements_test.go index 9d60f9575..1224ebf9c 100644 --- a/syft/pkg/cataloger/python/parse_requirements_test.go +++ b/syft/pkg/cataloger/python/parse_requirements_test.go @@ -1,56 +1,45 @@ package python import ( - "os" "testing" - "github.com/google/go-cmp/cmp" - + "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" "github.com/anchore/syft/syft/source" ) func TestParseRequirementsTxt(t *testing.T) { - expected := []*pkg.Package{ + fixture := "test-fixtures/requires/requirements.txt" + locations := source.NewLocationSet(source.NewLocation(fixture)) + expectedPkgs := []pkg.Package{ { - Name: "flask", - Version: "4.0.0", - Language: pkg.Python, - Type: pkg.PythonPkg, + Name: "flask", + Version: "4.0.0", + PURL: "pkg:pypi/flask@4.0.0", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, }, { - Name: "foo", - Version: "1.0.0", - Language: pkg.Python, - Type: pkg.PythonPkg, + Name: "foo", + Version: "1.0.0", + PURL: "pkg:pypi/foo@1.0.0", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, }, { - Name: "SomeProject", - Version: "5.4", - Language: pkg.Python, - Type: pkg.PythonPkg, + Name: "SomeProject", + Version: "5.4", + PURL: "pkg:pypi/SomeProject@5.4", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, }, } - fixture, err := os.Open("test-fixtures/requires/requirements.txt") - if err != nil { - t.Fatalf("failed to open fixture: %+v", err) - } + var expectedRelationships []artifact.Relationship - // TODO: no relationships are under test yet - actual, _, err := parseRequirementsTxt(fixture.Name(), fixture) - if err != nil { - t.Fatalf("failed to parse requirements: %+v", err) - } - - if diff := cmp.Diff(expected, actual, - cmp.AllowUnexported(pkg.Package{}), - cmp.Comparer( - func(x, y source.LocationSet) bool { - return cmp.Equal(x.ToSlice(), y.ToSlice()) - }, - ), - ); diff != "" { - t.Errorf("unexpected result from parsing (-expected +actual)\n%s", diff) - } + pkgtest.TestFileParser(t, fixture, parseRequirementsTxt, expectedPkgs, expectedRelationships) } diff --git a/syft/pkg/cataloger/python/parse_setup.go b/syft/pkg/cataloger/python/parse_setup.go index c3943ff1e..1aa542aed 100644 --- a/syft/pkg/cataloger/python/parse_setup.go +++ b/syft/pkg/cataloger/python/parse_setup.go @@ -2,27 +2,28 @@ package python import ( "bufio" - "io" "regexp" "strings" + "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/pkg/cataloger/common" + "github.com/anchore/syft/syft/pkg/cataloger/generic" + "github.com/anchore/syft/syft/source" ) // integrity check -var _ common.ParserFn = parseSetup +var _ generic.Parser = parseSetup // match examples: // // 'pathlib3==2.2.0;python_version<"3.6"' --> match(name=pathlib3 version=2.2.0) // "mypy==v0.770", --> match(name=mypy version=v0.770) // " mypy2 == v0.770", ' mypy3== v0.770', --> match(name=mypy2 version=v0.770), match(name=mypy3, version=v0.770) -var pinnedDependency = regexp.MustCompile(`['"]\W?(\w+\W?==\W?[\w\.]*)`) +var pinnedDependency = regexp.MustCompile(`['"]\W?(\w+\W?==\W?[\w.]*)`) -func parseSetup(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { - packages := make([]*pkg.Package, 0) +func parseSetup(_ source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { + var packages []pkg.Package scanner := bufio.NewScanner(reader) @@ -37,14 +38,17 @@ func parseSetup(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relation } name := strings.Trim(parts[0], "'\"") name = strings.TrimSpace(name) + name = strings.Trim(name, "'\"") version := strings.TrimSpace(parts[len(parts)-1]) - packages = append(packages, &pkg.Package{ - Name: strings.Trim(name, "'\""), - Version: strings.Trim(version, "'\""), - Language: pkg.Python, - Type: pkg.PythonPkg, - }) + version = strings.Trim(version, "'\"") + + if name == "" || version == "" { + log.WithFields("path", reader.RealPath).Warnf("unable to parse package in setup.py line: %q", line) + continue + } + + packages = append(packages, newPackageForIndex(name, version, reader.Location)) } } diff --git a/syft/pkg/cataloger/python/parse_setup_test.go b/syft/pkg/cataloger/python/parse_setup_test.go index 685af622c..96deaa4fe 100644 --- a/syft/pkg/cataloger/python/parse_setup_test.go +++ b/syft/pkg/cataloger/python/parse_setup_test.go @@ -1,68 +1,61 @@ package python import ( - "os" "testing" - "github.com/google/go-cmp/cmp" - + "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" "github.com/anchore/syft/syft/source" ) func TestParseSetup(t *testing.T) { - expected := []*pkg.Package{ + fixture := "test-fixtures/setup/setup.py" + locations := source.NewLocationSet(source.NewLocation(fixture)) + expectedPkgs := []pkg.Package{ { - Name: "pathlib3", - Version: "2.2.0", - Language: pkg.Python, - Type: pkg.PythonPkg, + Name: "pathlib3", + Version: "2.2.0", + PURL: "pkg:pypi/pathlib3@2.2.0", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, }, { - Name: "mypy", - Version: "v0.770", - Language: pkg.Python, - Type: pkg.PythonPkg, + Name: "mypy", + Version: "v0.770", + PURL: "pkg:pypi/mypy@v0.770", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, }, { - Name: "mypy1", - Version: "v0.770", - Language: pkg.Python, - Type: pkg.PythonPkg, + Name: "mypy1", + Version: "v0.770", + PURL: "pkg:pypi/mypy1@v0.770", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, }, { - Name: "mypy2", - Version: "v0.770", - Language: pkg.Python, - Type: pkg.PythonPkg, + Name: "mypy2", + Version: "v0.770", + PURL: "pkg:pypi/mypy2@v0.770", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, }, { - Name: "mypy3", - Version: "v0.770", - Language: pkg.Python, - Type: pkg.PythonPkg, + Name: "mypy3", + Version: "v0.770", + PURL: "pkg:pypi/mypy3@v0.770", + Locations: locations, + Language: pkg.Python, + Type: pkg.PythonPkg, }, } - fixture, err := os.Open("test-fixtures/setup/setup.py") - if err != nil { - t.Fatalf("failed to open fixture: %+v", err) - } + var expectedRelationships []artifact.Relationship - // TODO: no relationships are under test yet - actual, _, err := parseSetup(fixture.Name(), fixture) - if err != nil { - t.Fatalf("failed to parse requirements: %+v", err) - } - - if diff := cmp.Diff(expected, actual, - cmp.AllowUnexported(pkg.Package{}), - cmp.Comparer( - func(x, y source.LocationSet) bool { - return cmp.Equal(x.ToSlice(), y.ToSlice()) - }, - ), - ); diff != "" { - t.Errorf("unexpected result from parsing (-expected +actual)\n%s", diff) - } + pkgtest.TestFileParser(t, fixture, parseSetup, expectedPkgs, expectedRelationships) } diff --git a/syft/pkg/cataloger/python/package_cataloger.go b/syft/pkg/cataloger/python/parse_wheel_egg.go similarity index 65% rename from syft/pkg/cataloger/python/package_cataloger.go rename to syft/pkg/cataloger/python/parse_wheel_egg.go index 4526a69fa..1ccf4664b 100644 --- a/syft/pkg/cataloger/python/package_cataloger.go +++ b/syft/pkg/cataloger/python/parse_wheel_egg.go @@ -11,89 +11,33 @@ import ( "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/generic" "github.com/anchore/syft/syft/source" ) -const ( - eggMetadataGlob = "**/*egg-info/PKG-INFO" - eggFileMetadataGlob = "**/*.egg-info" - wheelMetadataGlob = "**/*dist-info/METADATA" -) - -type PackageCataloger struct{} - -// NewPythonPackageCataloger returns a new cataloger for python packages within egg or wheel installation directories. -func NewPythonPackageCataloger() *PackageCataloger { - return &PackageCataloger{} -} - -// Name returns a string that uniquely describes a cataloger -func (c *PackageCataloger) Name() string { - return "python-package-cataloger" -} - -// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing python egg and wheel installations. -func (c *PackageCataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, []artifact.Relationship, error) { - var fileMatches []source.Location - - for _, glob := range []string{eggMetadataGlob, wheelMetadataGlob, eggFileMetadataGlob} { - matches, err := resolver.FilesByGlob(glob) - if err != nil { - return nil, nil, fmt.Errorf("failed to find files by glob: %s", glob) - } - fileMatches = append(fileMatches, matches...) - } - - var pkgs []pkg.Package - for _, location := range fileMatches { - p, err := c.catalogEggOrWheel(resolver, location) - if err != nil { - return nil, nil, fmt.Errorf("unable to catalog python package=%+v: %w", location.RealPath, err) - } - if pkg.IsValid(p) { - pkgs = append(pkgs, *p) - } - } - return pkgs, nil, nil -} - -// catalogEggOrWheel takes the primary metadata file reference and returns the python package it represents. -func (c *PackageCataloger) catalogEggOrWheel(resolver source.FileResolver, metadataLocation source.Location) (*pkg.Package, error) { - metadata, sources, err := c.assembleEggOrWheelMetadata(resolver, metadataLocation) +// parseWheelOrEgg takes the primary metadata file reference and returns the python package it represents. +func parseWheelOrEgg(resolver source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { + metadata, sources, err := assembleEggOrWheelMetadata(resolver, reader.Location) if err != nil { - return nil, err + return nil, nil, err + } + if metadata == nil { + return nil, nil, nil } // This can happen for Python 2.7 where it is reported from an egg-info, but Python is // the actual runtime, it isn't a "package". The special-casing here allows to skip it if metadata.Name == "Python" { - return nil, nil + return nil, nil, nil } - var licenses []string - if metadata.License != "" { - licenses = []string{metadata.License} - } + pkgs := []pkg.Package{newPackageForPackage(*metadata, sources...)} - p := &pkg.Package{ - Name: metadata.Name, - Version: metadata.Version, - FoundBy: c.Name(), - Locations: source.NewLocationSet(sources...), - Licenses: licenses, - Language: pkg.Python, - Type: pkg.PythonPkg, - MetadataType: pkg.PythonPackageMetadataType, - Metadata: *metadata, - } - - p.SetID() - - return p, nil + return pkgs, nil, nil } // fetchRecordFiles finds a corresponding installed-files.txt file for the given python package metadata file and returns the set of file records contained. -func (c *PackageCataloger) fetchInstalledFiles(resolver source.FileResolver, metadataLocation source.Location, sitePackagesRootPath string) (files []pkg.PythonFileRecord, sources []source.Location, err error) { +func fetchInstalledFiles(resolver source.FileResolver, metadataLocation source.Location, sitePackagesRootPath string) (files []pkg.PythonFileRecord, sources []source.Location, err error) { // we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory // or for an image... for an image the METADATA file may be present within multiple layers, so it is important // to reconcile the installed-files.txt path to the same layer (or the next adjacent lower layer). @@ -124,7 +68,7 @@ func (c *PackageCataloger) fetchInstalledFiles(resolver source.FileResolver, met } // fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained. -func (c *PackageCataloger) fetchRecordFiles(resolver source.FileResolver, metadataLocation source.Location) (files []pkg.PythonFileRecord, sources []source.Location, err error) { +func fetchRecordFiles(resolver source.FileResolver, metadataLocation source.Location) (files []pkg.PythonFileRecord, sources []source.Location, err error) { // we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory // or for an image... for an image the METADATA file may be present within multiple layers, so it is important // to reconcile the RECORD path to the same layer (or the next adjacent lower layer). @@ -151,7 +95,7 @@ func (c *PackageCataloger) fetchRecordFiles(resolver source.FileResolver, metada } // fetchTopLevelPackages finds a corresponding top_level.txt file for the given python package metadata file and returns the set of package names contained. -func (c *PackageCataloger) fetchTopLevelPackages(resolver source.FileResolver, metadataLocation source.Location) (pkgs []string, sources []source.Location, err error) { +func fetchTopLevelPackages(resolver source.FileResolver, metadataLocation source.Location) (pkgs []string, sources []source.Location, err error) { // a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages parentDir := filepath.Dir(metadataLocation.RealPath) topLevelPath := filepath.Join(parentDir, "top_level.txt") @@ -181,7 +125,7 @@ func (c *PackageCataloger) fetchTopLevelPackages(resolver source.FileResolver, m return pkgs, sources, nil } -func (c *PackageCataloger) fetchDirectURLData(resolver source.FileResolver, metadataLocation source.Location) (d *pkg.PythonDirectURLOriginInfo, sources []source.Location, err error) { +func fetchDirectURLData(resolver source.FileResolver, metadataLocation source.Location) (d *pkg.PythonDirectURLOriginInfo, sources []source.Location, err error) { parentDir := filepath.Dir(metadataLocation.RealPath) directURLPath := filepath.Join(parentDir, "direct_url.json") directURLLocation := resolver.RelativeFileByPath(metadataLocation, directURLPath) @@ -216,7 +160,7 @@ func (c *PackageCataloger) fetchDirectURLData(resolver source.FileResolver, meta } // assembleEggOrWheelMetadata discovers and accumulates python package metadata from multiple file sources and returns a single metadata object as well as a list of files where the metadata was derived from. -func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver source.FileResolver, metadataLocation source.Location) (*pkg.PythonPackageMetadata, []source.Location, error) { +func assembleEggOrWheelMetadata(resolver source.FileResolver, metadataLocation source.Location) (*pkg.PythonPackageMetadata, []source.Location, error) { var sources = []source.Location{metadataLocation} metadataContents, err := resolver.FileContentsByLocation(metadataLocation) @@ -230,13 +174,17 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver source.FileResolv return nil, nil, err } + if metadata.Name == "" { + return nil, nil, nil + } + // attach any python files found for the given wheel/egg installation - r, s, err := c.fetchRecordFiles(resolver, metadataLocation) + r, s, err := fetchRecordFiles(resolver, metadataLocation) if err != nil { return nil, nil, err } if len(r) == 0 { - r, s, err = c.fetchInstalledFiles(resolver, metadataLocation, metadata.SitePackagesRootPath) + r, s, err = fetchInstalledFiles(resolver, metadataLocation, metadata.SitePackagesRootPath) if err != nil { return nil, nil, err } @@ -246,7 +194,7 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver source.FileResolv metadata.Files = r // attach any top-level package names found for the given wheel/egg installation - p, s, err := c.fetchTopLevelPackages(resolver, metadataLocation) + p, s, err := fetchTopLevelPackages(resolver, metadataLocation) if err != nil { return nil, nil, err } @@ -254,7 +202,7 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver source.FileResolv metadata.TopLevelPackages = p // attach any direct-url package data found for the given wheel/egg installation - d, s, err := c.fetchDirectURLData(resolver, metadataLocation) + d, s, err := fetchDirectURLData(resolver, metadataLocation) if err != nil { return nil, nil, err } diff --git a/syft/pkg/cataloger/python/poetry_metadata.go b/syft/pkg/cataloger/python/poetry_metadata.go deleted file mode 100644 index f75180566..000000000 --- a/syft/pkg/cataloger/python/poetry_metadata.go +++ /dev/null @@ -1,18 +0,0 @@ -package python - -import "github.com/anchore/syft/syft/pkg" - -type PoetryMetadata struct { - Packages []PoetryMetadataPackage `toml:"package"` -} - -// Pkgs returns all of the packages referenced within the poetry.lock metadata. -func (m PoetryMetadata) Pkgs() []*pkg.Package { - pkgs := make([]*pkg.Package, 0) - - for _, p := range m.Packages { - pkgs = append(pkgs, p.Pkg()) - } - - return pkgs -} diff --git a/syft/pkg/cataloger/python/poetry_metadata_package.go b/syft/pkg/cataloger/python/poetry_metadata_package.go deleted file mode 100644 index db75c19f4..000000000 --- a/syft/pkg/cataloger/python/poetry_metadata_package.go +++ /dev/null @@ -1,21 +0,0 @@ -package python - -import "github.com/anchore/syft/syft/pkg" - -type PoetryMetadataPackage struct { - Name string `toml:"name"` - Version string `toml:"version"` - Category string `toml:"category"` - Description string `toml:"description"` - Optional bool `toml:"optional"` -} - -// Pkg returns the standard `pkg.Package` representation of the package referenced within the poetry.lock metadata. -func (p PoetryMetadataPackage) Pkg() *pkg.Package { - return &pkg.Package{ - Name: p.Name, - Version: p.Version, - Language: pkg.Python, - Type: pkg.PythonPkg, - } -} diff --git a/syft/pkg/python_package_metadata.go b/syft/pkg/python_package_metadata.go index cacfb5ab4..97a8ab806 100644 --- a/syft/pkg/python_package_metadata.go +++ b/syft/pkg/python_package_metadata.go @@ -1,19 +1,12 @@ package pkg import ( - "fmt" "sort" "github.com/scylladb/go-set/strset" - - "github.com/anchore/packageurl-go" - "github.com/anchore/syft/syft/linux" ) -var ( - _ FileOwner = (*PythonPackageMetadata)(nil) - _ urlIdentifier = (*PythonPackageMetadata)(nil) -) +var _ FileOwner = (*PythonPackageMetadata)(nil) // PythonFileDigest represents the file metadata for a single file attributed to a python package. type PythonFileDigest struct { @@ -80,33 +73,3 @@ func (m PythonPackageMetadata) OwnedFiles() (result []string) { sort.Strings(result) return result } - -func (m PythonPackageMetadata) PackageURL(_ *linux.Release) string { - // generate a purl from the package data - pURL := packageurl.NewPackageURL( - packageurl.TypePyPi, - "", - m.Name, - m.Version, - m.purlQualifiers(), - "") - - return pURL.ToString() -} - -func (m PythonPackageMetadata) purlQualifiers() packageurl.Qualifiers { - q := packageurl.Qualifiers{} - if m.DirectURLOrigin != nil { - q = append(q, m.DirectURLOrigin.vcsURLQualifier()...) - } - return q -} - -func (p PythonDirectURLOriginInfo) vcsURLQualifier() packageurl.Qualifiers { - if p.VCS != "" { - // Taken from https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst#known-qualifiers-keyvalue-pairs - // packageurl-go still doesn't support all qualifier names - return packageurl.Qualifiers{{Key: PURLQualifierVCSURL, Value: fmt.Sprintf("%s+%s@%s", p.VCS, p.URL, p.CommitID)}} - } - return nil -} diff --git a/syft/pkg/python_package_metadata_test.go b/syft/pkg/python_package_metadata_test.go index 4798ef373..686980305 100644 --- a/syft/pkg/python_package_metadata_test.go +++ b/syft/pkg/python_package_metadata_test.go @@ -5,57 +5,8 @@ import ( "testing" "github.com/go-test/deep" - "github.com/sergi/go-diff/diffmatchpatch" - - "github.com/anchore/syft/syft/linux" ) -func TestPythonPackageMetadata_pURL(t *testing.T) { - tests := []struct { - name string - distro *linux.Release - metadata PythonPackageMetadata - expected string - }{ - { - name: "with vcs info", - metadata: PythonPackageMetadata{ - Name: "name", - Version: "v0.1.0", - DirectURLOrigin: &PythonDirectURLOriginInfo{ - VCS: "git", - URL: "https://github.com/test/test.git", - CommitID: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", - }, - }, - expected: "pkg:pypi/name@v0.1.0?vcs_url=git+https://github.com/test/test.git%40aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", - }, - { - name: "should not respond to release info", - distro: &linux.Release{ - ID: "rhel", - VersionID: "8.4", - }, - metadata: PythonPackageMetadata{ - Name: "name", - Version: "v0.1.0", - }, - expected: "pkg:pypi/name@v0.1.0", - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - actual := test.metadata.PackageURL(test.distro) - if actual != test.expected { - dmp := diffmatchpatch.New() - diffs := dmp.DiffMain(test.expected, actual, true) - t.Errorf("diff: %s", dmp.DiffPrettyText(diffs)) - } - }) - } -} - func TestPythonMetadata_FileOwner(t *testing.T) { tests := []struct { metadata PythonPackageMetadata diff --git a/syft/pkg/url_test.go b/syft/pkg/url_test.go index 2b66bc3da..e06169820 100644 --- a/syft/pkg/url_test.go +++ b/syft/pkg/url_test.go @@ -17,19 +17,6 @@ func TestPackageURL(t *testing.T) { distro *linux.Release expected string }{ - { - name: "python", - pkg: Package{ - Name: "bad-name", - Version: "bad-v0.1.0", - Type: PythonPkg, - Metadata: PythonPackageMetadata{ - Name: "name", - Version: "v0.1.0", - }, - }, - expected: "pkg:pypi/name@v0.1.0", - }, { name: "gem", pkg: Package{ @@ -140,6 +127,7 @@ func TestPackageURL(t *testing.T) { expectedTypes.Remove(string(HackagePkg)) expectedTypes.Remove(string(BinaryPkg)) expectedTypes.Remove(string(PhpComposerPkg)) + expectedTypes.Remove(string(PythonPkg)) for _, test := range tests { t.Run(test.name, func(t *testing.T) {