port python cataloger to new generic cataloger pattern (#1319)

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2022-11-04 08:59:03 -04:00 committed by GitHub
parent 2deb96a801
commit 1076281566
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 381 additions and 460 deletions

View File

@ -0,0 +1,26 @@
package python
import (
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
const (
eggMetadataGlob = "**/*egg-info/PKG-INFO"
eggFileMetadataGlob = "**/*.egg-info"
wheelMetadataGlob = "**/*dist-info/METADATA"
)
// NewPythonIndexCataloger returns a new cataloger for python packages referenced from poetry lock files, requirements.txt files, and setup.py files.
func NewPythonIndexCataloger() *generic.Cataloger {
return generic.NewCataloger("python-index-cataloger").
WithParserByGlobs(parseRequirementsTxt, "**/*requirements*.txt").
WithParserByGlobs(parsePoetryLock, "**/poetry.lock").
WithParserByGlobs(parsePipfileLock, "**/Pipfile.lock").
WithParserByGlobs(parseSetup, "**/setup.py")
}
// NewPythonPackageCataloger returns a new cataloger for python packages within egg or wheel installation directories.
func NewPythonPackageCataloger() *generic.Cataloger {
return generic.NewCataloger("python-package-cataloger").
WithParserByGlobs(parseWheelOrEgg, eggMetadataGlob, eggFileMetadataGlob, wheelMetadataGlob)
}

View File

@ -3,13 +3,12 @@ package python
import (
"testing"
"github.com/go-test/deep"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
"github.com/anchore/syft/syft/source"
)
func TestPythonPackageWheelCataloger(t *testing.T) {
func Test_PackageCataloger(t *testing.T) {
tests := []struct {
name string
fixtures []string
@ -20,6 +19,7 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
fixtures: []string{"test-fixtures/no-version-py3.8.egg-info"},
expectedPackage: pkg.Package{
Name: "no-version",
PURL: "pkg:pypi/no-version",
Type: pkg.PythonPkg,
Language: pkg.Python,
FoundBy: "python-package-cataloger",
@ -40,6 +40,7 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
expectedPackage: pkg.Package{
Name: "requests",
Version: "2.22.0",
PURL: "pkg:pypi/requests@2.22.0",
Type: pkg.PythonPkg,
Language: pkg.Python,
Licenses: []string{"Apache 2.0"},
@ -76,6 +77,7 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
expectedPackage: pkg.Package{
Name: "Pygments",
Version: "2.6.1",
PURL: "pkg:pypi/Pygments@2.6.1?vcs_url=git+https://github.com/python-test/test.git%40aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
Type: pkg.PythonPkg,
Language: pkg.Python,
Licenses: []string{"BSD License"},
@ -112,6 +114,7 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
expectedPackage: pkg.Package{
Name: "Pygments",
Version: "2.6.1",
PURL: "pkg:pypi/Pygments@2.6.1",
Type: pkg.PythonPkg,
Language: pkg.Python,
Licenses: []string{"BSD License"},
@ -142,6 +145,7 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
expectedPackage: pkg.Package{
Name: "Pygments",
Version: "2.6.1",
PURL: "pkg:pypi/Pygments@2.6.1",
Type: pkg.PythonPkg,
Language: pkg.Python,
Licenses: []string{"BSD License"},
@ -164,6 +168,7 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
expectedPackage: pkg.Package{
Name: "requests",
Version: "2.22.0",
PURL: "pkg:pypi/requests@2.22.0",
Type: pkg.PythonPkg,
Language: pkg.Python,
Licenses: []string{"Apache 2.0"},
@ -193,23 +198,15 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
test.expectedPackage.Locations = source.NewLocationSet(locations...)
actual, _, err := NewPythonPackageCataloger().Catalog(resolver)
if err != nil {
t.Fatalf("failed to catalog python package: %+v", err)
}
if len(actual) != 1 {
t.Fatalf("unexpected number of packages: %d", len(actual))
}
for _, d := range deep.Equal(test.expectedPackage, actual[0]) {
t.Errorf("diff: %+v", d)
}
pkgtest.NewCatalogTester().
WithResolver(resolver).
Expects([]pkg.Package{test.expectedPackage}, nil).
TestCataloger(t, NewPythonPackageCataloger())
})
}
}
func TestIgnorePackage(t *testing.T) {
func Test_PackageCataloger_IgnorePackage(t *testing.T) {
tests := []struct {
MetadataFixture string
}{

View File

@ -1,20 +0,0 @@
/*
Package python provides a concrete Cataloger implementation for Python ecosystem files (egg, wheel, requirements.txt).
*/
package python
import (
"github.com/anchore/syft/syft/pkg/cataloger/common"
)
// NewPythonIndexCataloger returns a new cataloger for python packages referenced from poetry lock files, requirements.txt files, and setup.py files.
func NewPythonIndexCataloger() *common.GenericCataloger {
globParsers := map[string]common.ParserFn{
"**/*requirements*.txt": parseRequirementsTxt,
"**/poetry.lock": parsePoetryLock,
"**/Pipfile.lock": parsePipfileLock,
"**/setup.py": parseSetup,
}
return common.NewGenericCataloger(nil, globParsers, "python-index-cataloger")
}

View File

@ -0,0 +1,81 @@
package python
import (
"fmt"
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
)
func newPackageForIndex(name, version string, locations ...source.Location) pkg.Package {
p := pkg.Package{
Name: name,
Version: version,
Locations: source.NewLocationSet(locations...),
PURL: packageURL(name, version, nil),
Language: pkg.Python,
Type: pkg.PythonPkg,
}
p.SetID()
return p
}
func newPackageForPackage(m pkg.PythonPackageMetadata, sources ...source.Location) pkg.Package {
var licenses []string
if m.License != "" {
licenses = []string{m.License}
}
p := pkg.Package{
Name: m.Name,
Version: m.Version,
PURL: packageURL(m.Name, m.Version, &m),
Locations: source.NewLocationSet(sources...),
Licenses: licenses,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonPackageMetadataType,
Metadata: m,
}
p.SetID()
return p
}
func packageURL(name, version string, m *pkg.PythonPackageMetadata) string {
// generate a purl from the package data
pURL := packageurl.NewPackageURL(
packageurl.TypePyPi,
"",
name,
version,
purlQualifiersForPackage(m),
"")
return pURL.ToString()
}
func purlQualifiersForPackage(m *pkg.PythonPackageMetadata) packageurl.Qualifiers {
q := packageurl.Qualifiers{}
if m == nil {
return q
}
if m.DirectURLOrigin != nil {
q = append(q, vcsURLQualifierForPackage(m.DirectURLOrigin)...)
}
return q
}
func vcsURLQualifierForPackage(p *pkg.PythonDirectURLOriginInfo) packageurl.Qualifiers {
if p == nil || p.VCS == "" {
return nil
}
// Taken from https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst#known-qualifiers-keyvalue-pairs
// packageurl-go still doesn't support all qualifier names
return packageurl.Qualifiers{
{Key: pkg.PURLQualifierVCSURL, Value: fmt.Sprintf("%s+%s@%s", p.VCS, p.URL, p.CommitID)},
}
}

View File

@ -0,0 +1,46 @@
package python
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/anchore/syft/syft/pkg"
)
func Test_packageURL(t *testing.T) {
tests := []struct {
testName string
name string
version string
metadata *pkg.PythonPackageMetadata
want string
}{
{
testName: "without metadata",
name: "name",
version: "v0.1.0",
want: "pkg:pypi/name@v0.1.0",
},
{
testName: "with vcs info",
name: "name",
version: "v0.1.0",
metadata: &pkg.PythonPackageMetadata{
Name: "bogus", // note: ignored
Version: "v0.2.0", // note: ignored
DirectURLOrigin: &pkg.PythonDirectURLOriginInfo{
VCS: "git",
URL: "https://github.com/test/test.git",
CommitID: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
},
},
want: "pkg:pypi/name@v0.1.0?vcs_url=git+https://github.com/test/test.git%40aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
},
}
for _, tt := range tests {
t.Run(tt.testName, func(t *testing.T) {
assert.Equal(t, tt.want, packageURL(tt.name, tt.version, tt.metadata))
})
}
}

View File

@ -4,15 +4,15 @@ import (
"encoding/json"
"fmt"
"io"
"sort"
"strings"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/anchore/syft/syft/source"
)
type PipfileLock struct {
type pipfileLock struct {
Meta struct {
Hash struct {
Sha256 string `json:"sha256"`
@ -35,16 +35,15 @@ type Dependency struct {
Version string `json:"version"`
}
// integrity check
var _ common.ParserFn = parsePipfileLock
var _ generic.Parser = parsePipfileLock
// parsePipfileLock is a parser function for Pipfile.lock contents, returning "Default" python packages discovered.
func parsePipfileLock(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
packages := make([]*pkg.Package, 0)
func parsePipfileLock(_ source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
pkgs := make([]pkg.Package, 0)
dec := json.NewDecoder(reader)
for {
var lock PipfileLock
var lock pipfileLock
if err := dec.Decode(&lock); err == io.EOF {
break
} else if err != nil {
@ -52,19 +51,11 @@ func parsePipfileLock(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Re
}
for name, pkgMeta := range lock.Default {
version := strings.TrimPrefix(pkgMeta.Version, "==")
packages = append(packages, &pkg.Package{
Name: name,
Version: version,
Language: pkg.Python,
Type: pkg.PythonPkg,
})
pkgs = append(pkgs, newPackageForIndex(name, version, reader.Location))
}
}
// Without sorting the packages slice, the order of packages will be unstable, due to ranging over a map.
sort.Slice(packages, func(i, j int) bool {
return packages[i].String() < packages[j].String()
})
pkg.Sort(pkgs)
return packages, nil, nil
return pkgs, nil, nil
}

View File

@ -1,62 +1,55 @@
package python
import (
"os"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
"github.com/anchore/syft/syft/source"
)
func TestParsePipFileLock(t *testing.T) {
expected := []*pkg.Package{
fixture := "test-fixtures/pipfile-lock/Pipfile.lock"
locations := source.NewLocationSet(source.NewLocation(fixture))
expectedPkgs := []pkg.Package{
{
Name: "aio-pika",
Version: "6.8.0",
Language: pkg.Python,
Type: pkg.PythonPkg,
Name: "aio-pika",
Version: "6.8.0",
PURL: "pkg:pypi/aio-pika@6.8.0",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "aiodns",
Version: "2.0.0",
Language: pkg.Python,
Type: pkg.PythonPkg,
Name: "aiodns",
Version: "2.0.0",
PURL: "pkg:pypi/aiodns@2.0.0",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "aiohttp",
Version: "3.7.4.post0",
Language: pkg.Python,
Type: pkg.PythonPkg,
Name: "aiohttp",
Version: "3.7.4.post0",
PURL: "pkg:pypi/aiohttp@3.7.4.post0",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "aiohttp-jinja2",
Version: "1.4.2",
Language: pkg.Python,
Type: pkg.PythonPkg,
Name: "aiohttp-jinja2",
Version: "1.4.2",
PURL: "pkg:pypi/aiohttp-jinja2@1.4.2",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
},
}
fixture, err := os.Open("test-fixtures/pipfile-lock/Pipfile.lock")
if err != nil {
t.Fatalf("failed to open fixture: %+v", err)
}
// TODO: relationships are not under test
var expectedRelationships []artifact.Relationship
// TODO: no relationships are under test yet
actual, _, err := parsePipfileLock(fixture.Name(), fixture)
if err != nil {
t.Fatalf("failed to parse requirements: %+v", err)
}
if diff := cmp.Diff(expected, actual,
cmp.AllowUnexported(pkg.Package{}),
cmp.Comparer(
func(x, y source.LocationSet) bool {
return cmp.Equal(x.ToSlice(), y.ToSlice())
},
),
); diff != "" {
t.Errorf("unexpected result from parsing (-expected +actual)\n%s", diff)
}
pkgtest.TestFileParser(t, fixture, parsePipfileLock, expectedPkgs, expectedRelationships)
}

View File

@ -2,30 +2,45 @@ package python
import (
"fmt"
"io"
"github.com/pelletier/go-toml"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/anchore/syft/syft/source"
)
// integrity check
var _ common.ParserFn = parsePoetryLock
var _ generic.Parser = parsePoetryLock
type poetryMetadata struct {
Packages []struct {
Name string `toml:"name"`
Version string `toml:"version"`
Category string `toml:"category"`
Description string `toml:"description"`
Optional bool `toml:"optional"`
} `toml:"package"`
}
// parsePoetryLock is a parser function for poetry.lock contents, returning all python packages discovered.
func parsePoetryLock(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
func parsePoetryLock(_ source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
tree, err := toml.LoadReader(reader)
if err != nil {
return nil, nil, fmt.Errorf("unable to load poetry.lock for parsing: %v", err)
return nil, nil, fmt.Errorf("unable to load poetry.lock for parsing: %w", err)
}
metadata := PoetryMetadata{}
metadata := poetryMetadata{}
err = tree.Unmarshal(&metadata)
if err != nil {
return nil, nil, fmt.Errorf("unable to parse poetry.lock: %v", err)
return nil, nil, fmt.Errorf("unable to parse poetry.lock: %w", err)
}
return metadata.Pkgs(), nil, nil
var pkgs []pkg.Package
for _, p := range metadata.Packages {
pkgs = append(pkgs, newPackageForIndex(p.Name, p.Version, reader.Location))
}
return pkgs, nil, nil
}

View File

@ -1,59 +1,54 @@
package python
import (
"os"
"testing"
"github.com/go-test/deep"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
"github.com/anchore/syft/syft/source"
)
func TestParsePoetryLock(t *testing.T) {
expected := []*pkg.Package{
fixture := "test-fixtures/poetry/poetry.lock"
locations := source.NewLocationSet(source.NewLocation(fixture))
expectedPkgs := []pkg.Package{
{
Name: "added-value",
Version: "0.14.2",
Language: pkg.Python,
Type: pkg.PythonPkg,
Licenses: nil,
Name: "added-value",
Version: "0.14.2",
PURL: "pkg:pypi/added-value@0.14.2",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "alabaster",
Version: "0.7.12",
Language: pkg.Python,
Type: pkg.PythonPkg,
Licenses: nil,
Name: "alabaster",
Version: "0.7.12",
PURL: "pkg:pypi/alabaster@0.7.12",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "appnope",
Version: "0.1.0",
Language: pkg.Python,
Type: pkg.PythonPkg,
Licenses: nil,
Name: "appnope",
Version: "0.1.0",
PURL: "pkg:pypi/appnope@0.1.0",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "asciitree",
Version: "0.3.3",
Language: pkg.Python,
Type: pkg.PythonPkg,
Licenses: nil,
Name: "asciitree",
Version: "0.3.3",
PURL: "pkg:pypi/asciitree@0.3.3",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
},
}
fixture, err := os.Open("test-fixtures/poetry/poetry.lock")
if err != nil {
t.Fatalf("failed to open fixture: %+v", err)
}
// TODO: relationships are not under test
var expectedRelationships []artifact.Relationship
// TODO: no relationships are under test yet
actual, _, err := parsePoetryLock(fixture.Name(), fixture)
if err != nil {
t.Error(err)
}
differences := deep.Equal(expected, actual)
if differences != nil {
t.Errorf("returned package list differed from expectation: %+v", differences)
}
pkgtest.TestFileParser(t, fixture, parsePoetryLock, expectedPkgs, expectedRelationships)
}

View File

@ -3,21 +3,21 @@ package python
import (
"bufio"
"fmt"
"io"
"strings"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/anchore/syft/syft/source"
)
// integrity check
var _ common.ParserFn = parseRequirementsTxt
var _ generic.Parser = parseRequirementsTxt
// parseRequirementsTxt takes a Python requirements.txt file, returning all Python packages that are locked to a
// specific version.
func parseRequirementsTxt(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
packages := make([]*pkg.Package, 0)
func parseRequirementsTxt(_ source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
var packages []pkg.Package
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
@ -42,14 +42,14 @@ func parseRequirementsTxt(_ string, reader io.Reader) ([]*pkg.Package, []artifac
// parse a new requirement
parts := strings.Split(line, "==")
if len(parts) < 2 {
// this should never happen, but just in case
log.WithFields("path", reader.RealPath).Warnf("unable to parse requirements.txt line: %q", line)
continue
}
name := strings.TrimSpace(parts[0])
version := strings.TrimSpace(parts[1])
packages = append(packages, &pkg.Package{
Name: name,
Version: version,
Language: pkg.Python,
Type: pkg.PythonPkg,
})
packages = append(packages, newPackageForIndex(name, version, reader.Location))
}
if err := scanner.Err(); err != nil {

View File

@ -1,56 +1,45 @@
package python
import (
"os"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
"github.com/anchore/syft/syft/source"
)
func TestParseRequirementsTxt(t *testing.T) {
expected := []*pkg.Package{
fixture := "test-fixtures/requires/requirements.txt"
locations := source.NewLocationSet(source.NewLocation(fixture))
expectedPkgs := []pkg.Package{
{
Name: "flask",
Version: "4.0.0",
Language: pkg.Python,
Type: pkg.PythonPkg,
Name: "flask",
Version: "4.0.0",
PURL: "pkg:pypi/flask@4.0.0",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "foo",
Version: "1.0.0",
Language: pkg.Python,
Type: pkg.PythonPkg,
Name: "foo",
Version: "1.0.0",
PURL: "pkg:pypi/foo@1.0.0",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "SomeProject",
Version: "5.4",
Language: pkg.Python,
Type: pkg.PythonPkg,
Name: "SomeProject",
Version: "5.4",
PURL: "pkg:pypi/SomeProject@5.4",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
},
}
fixture, err := os.Open("test-fixtures/requires/requirements.txt")
if err != nil {
t.Fatalf("failed to open fixture: %+v", err)
}
var expectedRelationships []artifact.Relationship
// TODO: no relationships are under test yet
actual, _, err := parseRequirementsTxt(fixture.Name(), fixture)
if err != nil {
t.Fatalf("failed to parse requirements: %+v", err)
}
if diff := cmp.Diff(expected, actual,
cmp.AllowUnexported(pkg.Package{}),
cmp.Comparer(
func(x, y source.LocationSet) bool {
return cmp.Equal(x.ToSlice(), y.ToSlice())
},
),
); diff != "" {
t.Errorf("unexpected result from parsing (-expected +actual)\n%s", diff)
}
pkgtest.TestFileParser(t, fixture, parseRequirementsTxt, expectedPkgs, expectedRelationships)
}

View File

@ -2,27 +2,28 @@ package python
import (
"bufio"
"io"
"regexp"
"strings"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/anchore/syft/syft/source"
)
// integrity check
var _ common.ParserFn = parseSetup
var _ generic.Parser = parseSetup
// match examples:
//
// 'pathlib3==2.2.0;python_version<"3.6"' --> match(name=pathlib3 version=2.2.0)
// "mypy==v0.770", --> match(name=mypy version=v0.770)
// " mypy2 == v0.770", ' mypy3== v0.770', --> match(name=mypy2 version=v0.770), match(name=mypy3, version=v0.770)
var pinnedDependency = regexp.MustCompile(`['"]\W?(\w+\W?==\W?[\w\.]*)`)
var pinnedDependency = regexp.MustCompile(`['"]\W?(\w+\W?==\W?[\w.]*)`)
func parseSetup(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
packages := make([]*pkg.Package, 0)
func parseSetup(_ source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
var packages []pkg.Package
scanner := bufio.NewScanner(reader)
@ -37,14 +38,17 @@ func parseSetup(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relation
}
name := strings.Trim(parts[0], "'\"")
name = strings.TrimSpace(name)
name = strings.Trim(name, "'\"")
version := strings.TrimSpace(parts[len(parts)-1])
packages = append(packages, &pkg.Package{
Name: strings.Trim(name, "'\""),
Version: strings.Trim(version, "'\""),
Language: pkg.Python,
Type: pkg.PythonPkg,
})
version = strings.Trim(version, "'\"")
if name == "" || version == "" {
log.WithFields("path", reader.RealPath).Warnf("unable to parse package in setup.py line: %q", line)
continue
}
packages = append(packages, newPackageForIndex(name, version, reader.Location))
}
}

View File

@ -1,68 +1,61 @@
package python
import (
"os"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
"github.com/anchore/syft/syft/source"
)
func TestParseSetup(t *testing.T) {
expected := []*pkg.Package{
fixture := "test-fixtures/setup/setup.py"
locations := source.NewLocationSet(source.NewLocation(fixture))
expectedPkgs := []pkg.Package{
{
Name: "pathlib3",
Version: "2.2.0",
Language: pkg.Python,
Type: pkg.PythonPkg,
Name: "pathlib3",
Version: "2.2.0",
PURL: "pkg:pypi/pathlib3@2.2.0",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "mypy",
Version: "v0.770",
Language: pkg.Python,
Type: pkg.PythonPkg,
Name: "mypy",
Version: "v0.770",
PURL: "pkg:pypi/mypy@v0.770",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "mypy1",
Version: "v0.770",
Language: pkg.Python,
Type: pkg.PythonPkg,
Name: "mypy1",
Version: "v0.770",
PURL: "pkg:pypi/mypy1@v0.770",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "mypy2",
Version: "v0.770",
Language: pkg.Python,
Type: pkg.PythonPkg,
Name: "mypy2",
Version: "v0.770",
PURL: "pkg:pypi/mypy2@v0.770",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "mypy3",
Version: "v0.770",
Language: pkg.Python,
Type: pkg.PythonPkg,
Name: "mypy3",
Version: "v0.770",
PURL: "pkg:pypi/mypy3@v0.770",
Locations: locations,
Language: pkg.Python,
Type: pkg.PythonPkg,
},
}
fixture, err := os.Open("test-fixtures/setup/setup.py")
if err != nil {
t.Fatalf("failed to open fixture: %+v", err)
}
var expectedRelationships []artifact.Relationship
// TODO: no relationships are under test yet
actual, _, err := parseSetup(fixture.Name(), fixture)
if err != nil {
t.Fatalf("failed to parse requirements: %+v", err)
}
if diff := cmp.Diff(expected, actual,
cmp.AllowUnexported(pkg.Package{}),
cmp.Comparer(
func(x, y source.LocationSet) bool {
return cmp.Equal(x.ToSlice(), y.ToSlice())
},
),
); diff != "" {
t.Errorf("unexpected result from parsing (-expected +actual)\n%s", diff)
}
pkgtest.TestFileParser(t, fixture, parseSetup, expectedPkgs, expectedRelationships)
}

View File

@ -11,89 +11,33 @@ import (
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/anchore/syft/syft/source"
)
const (
eggMetadataGlob = "**/*egg-info/PKG-INFO"
eggFileMetadataGlob = "**/*.egg-info"
wheelMetadataGlob = "**/*dist-info/METADATA"
)
type PackageCataloger struct{}
// NewPythonPackageCataloger returns a new cataloger for python packages within egg or wheel installation directories.
func NewPythonPackageCataloger() *PackageCataloger {
return &PackageCataloger{}
}
// Name returns a string that uniquely describes a cataloger
func (c *PackageCataloger) Name() string {
return "python-package-cataloger"
}
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing python egg and wheel installations.
func (c *PackageCataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, []artifact.Relationship, error) {
var fileMatches []source.Location
for _, glob := range []string{eggMetadataGlob, wheelMetadataGlob, eggFileMetadataGlob} {
matches, err := resolver.FilesByGlob(glob)
if err != nil {
return nil, nil, fmt.Errorf("failed to find files by glob: %s", glob)
}
fileMatches = append(fileMatches, matches...)
}
var pkgs []pkg.Package
for _, location := range fileMatches {
p, err := c.catalogEggOrWheel(resolver, location)
if err != nil {
return nil, nil, fmt.Errorf("unable to catalog python package=%+v: %w", location.RealPath, err)
}
if pkg.IsValid(p) {
pkgs = append(pkgs, *p)
}
}
return pkgs, nil, nil
}
// catalogEggOrWheel takes the primary metadata file reference and returns the python package it represents.
func (c *PackageCataloger) catalogEggOrWheel(resolver source.FileResolver, metadataLocation source.Location) (*pkg.Package, error) {
metadata, sources, err := c.assembleEggOrWheelMetadata(resolver, metadataLocation)
// parseWheelOrEgg takes the primary metadata file reference and returns the python package it represents.
func parseWheelOrEgg(resolver source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
metadata, sources, err := assembleEggOrWheelMetadata(resolver, reader.Location)
if err != nil {
return nil, err
return nil, nil, err
}
if metadata == nil {
return nil, nil, nil
}
// This can happen for Python 2.7 where it is reported from an egg-info, but Python is
// the actual runtime, it isn't a "package". The special-casing here allows to skip it
if metadata.Name == "Python" {
return nil, nil
return nil, nil, nil
}
var licenses []string
if metadata.License != "" {
licenses = []string{metadata.License}
}
pkgs := []pkg.Package{newPackageForPackage(*metadata, sources...)}
p := &pkg.Package{
Name: metadata.Name,
Version: metadata.Version,
FoundBy: c.Name(),
Locations: source.NewLocationSet(sources...),
Licenses: licenses,
Language: pkg.Python,
Type: pkg.PythonPkg,
MetadataType: pkg.PythonPackageMetadataType,
Metadata: *metadata,
}
p.SetID()
return p, nil
return pkgs, nil, nil
}
// fetchRecordFiles finds a corresponding installed-files.txt file for the given python package metadata file and returns the set of file records contained.
func (c *PackageCataloger) fetchInstalledFiles(resolver source.FileResolver, metadataLocation source.Location, sitePackagesRootPath string) (files []pkg.PythonFileRecord, sources []source.Location, err error) {
func fetchInstalledFiles(resolver source.FileResolver, metadataLocation source.Location, sitePackagesRootPath string) (files []pkg.PythonFileRecord, sources []source.Location, err error) {
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
// to reconcile the installed-files.txt path to the same layer (or the next adjacent lower layer).
@ -124,7 +68,7 @@ func (c *PackageCataloger) fetchInstalledFiles(resolver source.FileResolver, met
}
// fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained.
func (c *PackageCataloger) fetchRecordFiles(resolver source.FileResolver, metadataLocation source.Location) (files []pkg.PythonFileRecord, sources []source.Location, err error) {
func fetchRecordFiles(resolver source.FileResolver, metadataLocation source.Location) (files []pkg.PythonFileRecord, sources []source.Location, err error) {
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
// to reconcile the RECORD path to the same layer (or the next adjacent lower layer).
@ -151,7 +95,7 @@ func (c *PackageCataloger) fetchRecordFiles(resolver source.FileResolver, metada
}
// fetchTopLevelPackages finds a corresponding top_level.txt file for the given python package metadata file and returns the set of package names contained.
func (c *PackageCataloger) fetchTopLevelPackages(resolver source.FileResolver, metadataLocation source.Location) (pkgs []string, sources []source.Location, err error) {
func fetchTopLevelPackages(resolver source.FileResolver, metadataLocation source.Location) (pkgs []string, sources []source.Location, err error) {
// a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages
parentDir := filepath.Dir(metadataLocation.RealPath)
topLevelPath := filepath.Join(parentDir, "top_level.txt")
@ -181,7 +125,7 @@ func (c *PackageCataloger) fetchTopLevelPackages(resolver source.FileResolver, m
return pkgs, sources, nil
}
func (c *PackageCataloger) fetchDirectURLData(resolver source.FileResolver, metadataLocation source.Location) (d *pkg.PythonDirectURLOriginInfo, sources []source.Location, err error) {
func fetchDirectURLData(resolver source.FileResolver, metadataLocation source.Location) (d *pkg.PythonDirectURLOriginInfo, sources []source.Location, err error) {
parentDir := filepath.Dir(metadataLocation.RealPath)
directURLPath := filepath.Join(parentDir, "direct_url.json")
directURLLocation := resolver.RelativeFileByPath(metadataLocation, directURLPath)
@ -216,7 +160,7 @@ func (c *PackageCataloger) fetchDirectURLData(resolver source.FileResolver, meta
}
// assembleEggOrWheelMetadata discovers and accumulates python package metadata from multiple file sources and returns a single metadata object as well as a list of files where the metadata was derived from.
func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver source.FileResolver, metadataLocation source.Location) (*pkg.PythonPackageMetadata, []source.Location, error) {
func assembleEggOrWheelMetadata(resolver source.FileResolver, metadataLocation source.Location) (*pkg.PythonPackageMetadata, []source.Location, error) {
var sources = []source.Location{metadataLocation}
metadataContents, err := resolver.FileContentsByLocation(metadataLocation)
@ -230,13 +174,17 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver source.FileResolv
return nil, nil, err
}
if metadata.Name == "" {
return nil, nil, nil
}
// attach any python files found for the given wheel/egg installation
r, s, err := c.fetchRecordFiles(resolver, metadataLocation)
r, s, err := fetchRecordFiles(resolver, metadataLocation)
if err != nil {
return nil, nil, err
}
if len(r) == 0 {
r, s, err = c.fetchInstalledFiles(resolver, metadataLocation, metadata.SitePackagesRootPath)
r, s, err = fetchInstalledFiles(resolver, metadataLocation, metadata.SitePackagesRootPath)
if err != nil {
return nil, nil, err
}
@ -246,7 +194,7 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver source.FileResolv
metadata.Files = r
// attach any top-level package names found for the given wheel/egg installation
p, s, err := c.fetchTopLevelPackages(resolver, metadataLocation)
p, s, err := fetchTopLevelPackages(resolver, metadataLocation)
if err != nil {
return nil, nil, err
}
@ -254,7 +202,7 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver source.FileResolv
metadata.TopLevelPackages = p
// attach any direct-url package data found for the given wheel/egg installation
d, s, err := c.fetchDirectURLData(resolver, metadataLocation)
d, s, err := fetchDirectURLData(resolver, metadataLocation)
if err != nil {
return nil, nil, err
}

View File

@ -1,18 +0,0 @@
package python
import "github.com/anchore/syft/syft/pkg"
type PoetryMetadata struct {
Packages []PoetryMetadataPackage `toml:"package"`
}
// Pkgs returns all of the packages referenced within the poetry.lock metadata.
func (m PoetryMetadata) Pkgs() []*pkg.Package {
pkgs := make([]*pkg.Package, 0)
for _, p := range m.Packages {
pkgs = append(pkgs, p.Pkg())
}
return pkgs
}

View File

@ -1,21 +0,0 @@
package python
import "github.com/anchore/syft/syft/pkg"
type PoetryMetadataPackage struct {
Name string `toml:"name"`
Version string `toml:"version"`
Category string `toml:"category"`
Description string `toml:"description"`
Optional bool `toml:"optional"`
}
// Pkg returns the standard `pkg.Package` representation of the package referenced within the poetry.lock metadata.
func (p PoetryMetadataPackage) Pkg() *pkg.Package {
return &pkg.Package{
Name: p.Name,
Version: p.Version,
Language: pkg.Python,
Type: pkg.PythonPkg,
}
}

View File

@ -1,19 +1,12 @@
package pkg
import (
"fmt"
"sort"
"github.com/scylladb/go-set/strset"
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/linux"
)
var (
_ FileOwner = (*PythonPackageMetadata)(nil)
_ urlIdentifier = (*PythonPackageMetadata)(nil)
)
var _ FileOwner = (*PythonPackageMetadata)(nil)
// PythonFileDigest represents the file metadata for a single file attributed to a python package.
type PythonFileDigest struct {
@ -80,33 +73,3 @@ func (m PythonPackageMetadata) OwnedFiles() (result []string) {
sort.Strings(result)
return result
}
func (m PythonPackageMetadata) PackageURL(_ *linux.Release) string {
// generate a purl from the package data
pURL := packageurl.NewPackageURL(
packageurl.TypePyPi,
"",
m.Name,
m.Version,
m.purlQualifiers(),
"")
return pURL.ToString()
}
func (m PythonPackageMetadata) purlQualifiers() packageurl.Qualifiers {
q := packageurl.Qualifiers{}
if m.DirectURLOrigin != nil {
q = append(q, m.DirectURLOrigin.vcsURLQualifier()...)
}
return q
}
func (p PythonDirectURLOriginInfo) vcsURLQualifier() packageurl.Qualifiers {
if p.VCS != "" {
// Taken from https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst#known-qualifiers-keyvalue-pairs
// packageurl-go still doesn't support all qualifier names
return packageurl.Qualifiers{{Key: PURLQualifierVCSURL, Value: fmt.Sprintf("%s+%s@%s", p.VCS, p.URL, p.CommitID)}}
}
return nil
}

View File

@ -5,57 +5,8 @@ import (
"testing"
"github.com/go-test/deep"
"github.com/sergi/go-diff/diffmatchpatch"
"github.com/anchore/syft/syft/linux"
)
func TestPythonPackageMetadata_pURL(t *testing.T) {
tests := []struct {
name string
distro *linux.Release
metadata PythonPackageMetadata
expected string
}{
{
name: "with vcs info",
metadata: PythonPackageMetadata{
Name: "name",
Version: "v0.1.0",
DirectURLOrigin: &PythonDirectURLOriginInfo{
VCS: "git",
URL: "https://github.com/test/test.git",
CommitID: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
},
},
expected: "pkg:pypi/name@v0.1.0?vcs_url=git+https://github.com/test/test.git%40aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
},
{
name: "should not respond to release info",
distro: &linux.Release{
ID: "rhel",
VersionID: "8.4",
},
metadata: PythonPackageMetadata{
Name: "name",
Version: "v0.1.0",
},
expected: "pkg:pypi/name@v0.1.0",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
actual := test.metadata.PackageURL(test.distro)
if actual != test.expected {
dmp := diffmatchpatch.New()
diffs := dmp.DiffMain(test.expected, actual, true)
t.Errorf("diff: %s", dmp.DiffPrettyText(diffs))
}
})
}
}
func TestPythonMetadata_FileOwner(t *testing.T) {
tests := []struct {
metadata PythonPackageMetadata

View File

@ -17,19 +17,6 @@ func TestPackageURL(t *testing.T) {
distro *linux.Release
expected string
}{
{
name: "python",
pkg: Package{
Name: "bad-name",
Version: "bad-v0.1.0",
Type: PythonPkg,
Metadata: PythonPackageMetadata{
Name: "name",
Version: "v0.1.0",
},
},
expected: "pkg:pypi/name@v0.1.0",
},
{
name: "gem",
pkg: Package{
@ -140,6 +127,7 @@ func TestPackageURL(t *testing.T) {
expectedTypes.Remove(string(HackagePkg))
expectedTypes.Remove(string(BinaryPkg))
expectedTypes.Remove(string(PhpComposerPkg))
expectedTypes.Remove(string(PythonPkg))
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {