Add python wheel egg relationships (#2903)

* add python package relationships

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* nil for empty relationships collections

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* new json schema for optional python requiremenets

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* update format snapshots for python packages

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* decompose python parsers more + add tests around plural fields

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* update JSON schema with python dep refs

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

---------

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Alex Goodman 2024-05-25 16:11:38 -04:00 committed by GitHub
parent 64e11d53e9
commit 05e8ba948d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
42 changed files with 3706 additions and 321 deletions

View File

@ -3,5 +3,5 @@ package internal
const (
// JSONSchemaVersion is the current schema version output by the JSON encoder
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
JSONSchemaVersion = "16.0.10"
JSONSchemaVersion = "16.0.11"
)

View File

@ -4,6 +4,7 @@ import (
"path"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/relationship"
"github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
@ -20,22 +21,22 @@ func NewDependencyRelationships(resolver file.Resolver, accessor sbomsync.Access
// 3. craft package-to-package relationships for each binary that represent shared library dependencies
//note: we only care about package-to-package relationships
var relIndex *relationshipIndex
var relIndex *relationship.Index
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
relIndex = newRelationshipIndex(s.Relationships...)
relIndex = relationship.NewIndex(s.Relationships...)
})
return generateRelationships(resolver, accessor, index, relIndex)
}
func generateRelationships(resolver file.Resolver, accessor sbomsync.Accessor, index *sharedLibraryIndex, relIndex *relationshipIndex) []artifact.Relationship {
func generateRelationships(resolver file.Resolver, accessor sbomsync.Accessor, index *sharedLibraryIndex, relIndex *relationship.Index) []artifact.Relationship {
// read all existing dependencyOf relationships
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
for _, r := range s.Relationships {
if r.Type != artifact.DependencyOfRelationship {
continue
}
relIndex.track(r)
relIndex.Track(r)
}
})
@ -58,7 +59,7 @@ func generateRelationships(resolver file.Resolver, accessor sbomsync.Accessor, i
}
})
return relIndex.newRelationships()
return relIndex.NewRelationships()
}
// PackagesToRemove returns a list of binary packages (resolved by the ELF cataloger) that should be removed from the SBOM
@ -146,7 +147,7 @@ func getBinaryPackagesToDelete(resolver file.Resolver, s *sbom.SBOM) []artifact.
return pkgsToDelete
}
func populateRelationships(exec file.Executable, parentPkg pkg.Package, resolver file.Resolver, relIndex *relationshipIndex, index *sharedLibraryIndex) {
func populateRelationships(exec file.Executable, parentPkg pkg.Package, resolver file.Resolver, relIndex *relationship.Index, index *sharedLibraryIndex) {
for _, libReference := range exec.ImportedLibraries {
// for each library reference, check s.Artifacts.Packages.Sorted(pkg.BinaryPkg) for a binary package that represents that library
// if found, create a relationship between the parent package and the library package
@ -166,7 +167,7 @@ func populateRelationships(exec file.Executable, parentPkg pkg.Package, resolver
realBaseName := path.Base(loc.RealPath)
pkgCollection := index.owningLibraryPackage(realBaseName)
if pkgCollection.PackageCount() < 1 {
relIndex.add(
relIndex.Add(
artifact.Relationship{
From: loc.Coordinates,
To: parentPkg,
@ -175,7 +176,7 @@ func populateRelationships(exec file.Executable, parentPkg pkg.Package, resolver
)
}
for _, p := range pkgCollection.Sorted() {
relIndex.add(
relIndex.Add(
artifact.Relationship{
From: p,
To: parentPkg,

View File

@ -225,7 +225,6 @@ func TestNewDependencyRelationships(t *testing.T) {
resolver: nil,
coordinateIndex: map[file.Coordinates]file.Executable{},
packages: []pkg.Package{},
want: make([]artifact.Relationship, 0),
},
{
name: "given a package that imports glibc, expect a relationship between the two packages when the package is an executable",
@ -297,7 +296,6 @@ func TestNewDependencyRelationships(t *testing.T) {
Type: artifact.DependencyOfRelationship,
},
},
want: []artifact.Relationship{},
},
{
name: "given a package that imports a library that is not tracked by the resolver, expect no relationships to be created",
@ -308,7 +306,6 @@ func TestNewDependencyRelationships(t *testing.T) {
parallelLibCoordinate: syftTestFixtureExecutable2,
},
packages: []pkg.Package{glibCPackage, syftTestFixturePackage},
want: []artifact.Relationship{},
},
}
for _, tt := range tests {

View File

@ -1,59 +0,0 @@
package binary
import (
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/syft/artifact"
)
type relationshipIndex struct {
typesByFromTo map[artifact.ID]map[artifact.ID]*strset.Set
additional []artifact.Relationship
}
func newRelationshipIndex(existing ...artifact.Relationship) *relationshipIndex {
r := &relationshipIndex{
typesByFromTo: make(map[artifact.ID]map[artifact.ID]*strset.Set),
additional: make([]artifact.Relationship, 0),
}
for _, rel := range existing {
r.track(rel)
}
return r
}
// track this relationship as "exists" in the index (this is used to prevent duplicate relationships from being added).
// returns true if the relationship is new to the index, false otherwise.
func (i *relationshipIndex) track(r artifact.Relationship) bool {
fromID := r.From.ID()
if _, ok := i.typesByFromTo[fromID]; !ok {
i.typesByFromTo[fromID] = make(map[artifact.ID]*strset.Set)
}
toID := r.To.ID()
if _, ok := i.typesByFromTo[fromID][toID]; !ok {
i.typesByFromTo[fromID][toID] = strset.New()
}
var exists bool
if i.typesByFromTo[fromID][toID].Has(string(r.Type)) {
exists = true
}
i.typesByFromTo[fromID][toID].Add(string(r.Type))
return !exists
}
// add a new relationship to the index, returning true if the relationship is new to the index, false otherwise (thus is a duplicate).
// nolint:unparam
func (i *relationshipIndex) add(r artifact.Relationship) bool {
if i.track(r) {
i.additional = append(i.additional, r)
return true
}
return false
}
func (i *relationshipIndex) newRelationships() []artifact.Relationship {
return i.additional
}

View File

@ -1,128 +0,0 @@
package binary
import (
"reflect"
"testing"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/syft/artifact"
)
func Test_newRelationshipIndex(t *testing.T) {
from := fakeIdentifiable{id: "from"}
to := fakeIdentifiable{id: "to"}
tests := []struct {
name string
given []artifact.Relationship
want *relationshipIndex
}{
{
name: "newRelationshipIndex returns an empty index with no existing relationships",
want: &relationshipIndex{
typesByFromTo: make(map[artifact.ID]map[artifact.ID]*strset.Set),
additional: make([]artifact.Relationship, 0),
},
},
{
name: "newRelationshipIndex returns an index which tracks existing relationships",
given: []artifact.Relationship{
{
From: from,
To: to,
Type: artifact.EvidentByRelationship,
},
},
want: &relationshipIndex{
typesByFromTo: map[artifact.ID]map[artifact.ID]*strset.Set{
"from": {
"to": strset.New("evident-by"),
},
},
additional: make([]artifact.Relationship, 0),
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := newRelationshipIndex(tt.given...); !reflect.DeepEqual(got, tt.want) {
t.Errorf("newRelationshipIndex() = %v, want %v", got, tt.want)
}
})
}
}
func Test_relationshipIndex_track(t *testing.T) {
from := fakeIdentifiable{id: "from"}
to := fakeIdentifiable{id: "to"}
relationship := artifact.Relationship{From: from, To: to, Type: artifact.EvidentByRelationship}
tests := []struct {
name string
existing []artifact.Relationship
given artifact.Relationship
want bool
}{
{
name: "track returns true for a new relationship",
existing: []artifact.Relationship{},
given: relationship,
want: true,
},
{
name: "track returns false for an existing relationship",
existing: []artifact.Relationship{relationship},
given: relationship,
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
i := newRelationshipIndex(tt.existing...)
if got := i.track(tt.given); got != tt.want {
t.Errorf("track() = %v, want %v", got, tt.want)
}
})
}
}
func Test_relationshipIndex_add(t *testing.T) {
from := fakeIdentifiable{id: "from"}
to := fakeIdentifiable{id: "to"}
relationship := artifact.Relationship{From: from, To: to, Type: artifact.EvidentByRelationship}
tests := []struct {
name string
existing []artifact.Relationship
given artifact.Relationship
want bool
}{
{
name: "add returns true for a new relationship",
existing: []artifact.Relationship{},
given: relationship,
want: true,
},
{
name: "add returns false for an existing relationship",
existing: []artifact.Relationship{relationship},
given: relationship,
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
i := newRelationshipIndex(tt.existing...)
if got := i.add(tt.given); got != tt.want {
t.Errorf("add() = %v, want %v", got, tt.want)
}
})
}
}
type fakeIdentifiable struct {
id string
}
func (f fakeIdentifiable) ID() artifact.ID {
return artifact.ID(f.id)
}

View File

@ -32,7 +32,7 @@ type ownershipByFilesMetadata struct {
Files []string `json:"files"`
}
func byFileOwnershipOverlapWorker(accessor sbomsync.Accessor) {
func ByFileOwnershipOverlapWorker(accessor sbomsync.Accessor) {
var relationships []artifact.Relationship
accessor.ReadFromSBOM(func(s *sbom.SBOM) {

View File

@ -5,7 +5,7 @@ import (
"github.com/anchore/syft/syft/pkg"
)
func evidentBy(catalog *pkg.Collection) []artifact.Relationship {
func EvidentBy(catalog *pkg.Collection) []artifact.Relationship {
var edges []artifact.Relationship
for _, p := range catalog.Sorted() {
for _, l := range p.Locations.ToSlice() {

View File

@ -76,7 +76,7 @@ func TestRelationshipsEvidentBy(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
actual := evidentBy(tt.catalog)
actual := EvidentBy(tt.catalog)
require.Len(t, actual, len(tt.want))
for i := range actual {
assert.Equal(t, tt.want[i].From.ID(), actual[i].From.ID(), "from mismatch at index %d", i)

View File

@ -23,7 +23,7 @@ var (
}
)
func excludeBinariesByFileOwnershipOverlap(accessor sbomsync.Accessor) {
func ExcludeBinariesByFileOwnershipOverlap(accessor sbomsync.Accessor) {
accessor.WriteToSBOM(func(s *sbom.SBOM) {
for _, r := range s.Relationships {
if excludeBinaryByFileOwnershipOverlap(r, s.Artifacts.Packages) {

View File

@ -1,50 +0,0 @@
package relationship
import (
"github.com/anchore/syft/internal/relationship/binary"
"github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/cataloging"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/sbom"
)
func Finalize(resolver file.Resolver, builder sbomsync.Builder, cfg cataloging.RelationshipsConfig, src artifact.Identifiable) {
accessor := builder.(sbomsync.Accessor)
// remove ELF packages and Binary packages that are already
// represented by a source package (e.g. a package that is evident by some package manager)
builder.DeletePackages(binary.PackagesToRemove(resolver, accessor)...)
// add relationships showing packages that are evident by a file which is owned by another package (package-to-package)
if cfg.PackageFileOwnershipOverlap {
byFileOwnershipOverlapWorker(accessor)
}
// conditionally remove binary packages based on file ownership overlap relationships found
// https://github.com/anchore/syft/issues/931
if cfg.ExcludeBinaryPackagesWithFileOwnershipOverlap {
excludeBinariesByFileOwnershipOverlap(accessor)
}
// add the new relationships for executables to the SBOM
newBinaryRelationships := binary.NewDependencyRelationships(resolver, accessor)
accessor.WriteToSBOM(func(s *sbom.SBOM) {
s.Relationships = append(s.Relationships, newBinaryRelationships...)
})
builder.AddRelationships(newBinaryRelationships...)
// add source "contains package" relationship (source-to-package)
var sourceRelationships []artifact.Relationship
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
sourceRelationships = toSource(src, s.Artifacts.Packages)
})
builder.AddRelationships(sourceRelationships...)
// add evident-by relationships (package-to-file)
var evidentByRelationships []artifact.Relationship
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
evidentByRelationships = evidentBy(s.Artifacts.Packages)
})
builder.AddRelationships(evidentByRelationships...)
}

View File

@ -0,0 +1,88 @@
package relationship
import (
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/syft/artifact"
)
type Index struct {
typesByFromTo map[artifact.ID]map[artifact.ID]*strset.Set
existing []artifact.Relationship
additional []artifact.Relationship
}
func NewIndex(existing ...artifact.Relationship) *Index {
r := &Index{
typesByFromTo: make(map[artifact.ID]map[artifact.ID]*strset.Set),
}
r.TrackAll(existing...)
return r
}
func (i *Index) track(r artifact.Relationship) bool {
fromID := r.From.ID()
if _, ok := i.typesByFromTo[fromID]; !ok {
i.typesByFromTo[fromID] = make(map[artifact.ID]*strset.Set)
}
toID := r.To.ID()
if _, ok := i.typesByFromTo[fromID][toID]; !ok {
i.typesByFromTo[fromID][toID] = strset.New()
}
var exists bool
if i.typesByFromTo[fromID][toID].Has(string(r.Type)) {
exists = true
}
i.typesByFromTo[fromID][toID].Add(string(r.Type))
return !exists
}
// Track this relationship as "exists" in the index (this is used to prevent duplicate relationships from being added).
// returns true if the relationship is new to the index, false otherwise.
func (i *Index) Track(r artifact.Relationship) bool {
unique := i.track(r)
if unique {
i.existing = append(i.existing, r)
}
return unique
}
// Add a new relationship to the index, returning true if the relationship is new to the index, false otherwise (thus is a duplicate).
func (i *Index) Add(r artifact.Relationship) bool {
if i.track(r) {
i.additional = append(i.additional, r)
return true
}
return false
}
func (i *Index) TrackAll(rs ...artifact.Relationship) {
for _, r := range rs {
i.Track(r)
}
}
func (i *Index) AddAll(rs ...artifact.Relationship) {
for _, r := range rs {
i.Add(r)
}
}
func (i *Index) NewRelationships() []artifact.Relationship {
return i.additional
}
func (i *Index) ExistingRelationships() []artifact.Relationship {
return i.existing
}
func (i *Index) AllUniqueRelationships() []artifact.Relationship {
var all []artifact.Relationship
all = append(all, i.existing...)
all = append(all, i.additional...)
return all
}

View File

@ -0,0 +1,225 @@
package relationship
import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/anchore/syft/syft/artifact"
)
func Test_newRelationshipIndex(t *testing.T) {
from := fakeIdentifiable{id: "from"}
to := fakeIdentifiable{id: "to"}
tests := []struct {
name string
given []artifact.Relationship
track []artifact.Relationship
add []artifact.Relationship
wantExisting []string
wantAdditional []string
}{
{
name: "empty",
},
{
name: "tracks existing relationships",
given: []artifact.Relationship{
{
From: from,
To: to,
Type: artifact.EvidentByRelationship,
},
},
wantExisting: []string{"from [evident-by] to"},
},
{
name: "deduplicate tracked relationships",
given: []artifact.Relationship{
{
From: from,
To: to,
Type: artifact.EvidentByRelationship,
},
{
From: from,
To: to,
Type: artifact.EvidentByRelationship,
},
{
From: from,
To: to,
Type: artifact.EvidentByRelationship,
},
},
track: []artifact.Relationship{
{
From: from,
To: to,
Type: artifact.EvidentByRelationship,
},
{
From: from,
To: to,
Type: artifact.EvidentByRelationship,
},
},
wantExisting: []string{"from [evident-by] to"},
},
{
name: "deduplicate any input relationships",
given: []artifact.Relationship{
{
From: from,
To: to,
Type: artifact.EvidentByRelationship,
},
{
From: from,
To: to,
Type: artifact.EvidentByRelationship,
},
},
track: []artifact.Relationship{
{
From: from,
To: to,
Type: artifact.EvidentByRelationship,
},
{
From: from,
To: to,
Type: artifact.EvidentByRelationship,
},
},
add: []artifact.Relationship{
{
From: from,
To: to,
Type: artifact.EvidentByRelationship,
},
{
From: from,
To: to,
Type: artifact.EvidentByRelationship,
},
},
wantExisting: []string{"from [evident-by] to"},
},
{
name: "deduplicate any added relationships",
add: []artifact.Relationship{
{
From: from,
To: to,
Type: artifact.EvidentByRelationship,
},
{
From: from,
To: to,
Type: artifact.EvidentByRelationship,
},
},
wantAdditional: []string{"from [evident-by] to"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
idx := NewIndex(tt.given...)
idx.TrackAll(tt.track...)
idx.AddAll(tt.add...)
diffRelationships(t, tt.wantExisting, idx.existing)
diffRelationships(t, tt.wantAdditional, idx.additional)
})
}
}
func diffRelationships(t *testing.T, expected []string, actual []artifact.Relationship) {
if d := cmp.Diff(expected, stringRelationships(actual)); d != "" {
t.Errorf("unexpected relationships (-want, +got): %s", d)
}
}
func stringRelationships(relationships []artifact.Relationship) []string {
var result []string
for _, r := range relationships {
result = append(result, string(r.From.ID())+" ["+string(r.Type)+"] "+string(r.To.ID()))
}
return result
}
func Test_relationshipIndex_track(t *testing.T) {
from := fakeIdentifiable{id: "from"}
to := fakeIdentifiable{id: "to"}
relationship := artifact.Relationship{From: from, To: to, Type: artifact.EvidentByRelationship}
tests := []struct {
name string
existing []artifact.Relationship
given artifact.Relationship
want bool
}{
{
name: "track returns true for a new relationship",
existing: []artifact.Relationship{},
given: relationship,
want: true,
},
{
name: "track returns false for an existing relationship",
existing: []artifact.Relationship{relationship},
given: relationship,
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
i := NewIndex(tt.existing...)
if got := i.Track(tt.given); got != tt.want {
t.Errorf("track() = %v, want %v", got, tt.want)
}
})
}
}
func Test_relationshipIndex_add(t *testing.T) {
from := fakeIdentifiable{id: "from"}
to := fakeIdentifiable{id: "to"}
relationship := artifact.Relationship{From: from, To: to, Type: artifact.EvidentByRelationship}
tests := []struct {
name string
existing []artifact.Relationship
given artifact.Relationship
want bool
}{
{
name: "add returns true for a new relationship",
existing: []artifact.Relationship{},
given: relationship,
want: true,
},
{
name: "add returns false for an existing relationship",
existing: []artifact.Relationship{relationship},
given: relationship,
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
i := NewIndex(tt.existing...)
if got := i.Add(tt.given); got != tt.want {
t.Errorf("add() = %v, want %v", got, tt.want)
}
})
}
}
type fakeIdentifiable struct {
id string
}
func (f fakeIdentifiable) ID() artifact.ID {
return artifact.ID(f.id)
}

View File

@ -5,7 +5,7 @@ import (
"github.com/anchore/syft/syft/pkg"
)
func toSource(src artifact.Identifiable, c *pkg.Collection) []artifact.Relationship {
func ToSource(src artifact.Identifiable, c *pkg.Collection) []artifact.Relationship {
relationships := make([]artifact.Relationship, 0) // Should we pre-allocate this by giving catalog a Len() method?
for p := range c.Enumerate() {
relationships = append(relationships, artifact.Relationship{

View File

@ -4,10 +4,12 @@ import (
"context"
"github.com/anchore/syft/internal/relationship"
"github.com/anchore/syft/internal/relationship/binary"
"github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/cataloging"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/sbom"
"github.com/anchore/syft/syft/source"
)
@ -23,7 +25,7 @@ func (s sourceIdentifierAdapter) ID() artifact.ID {
func NewRelationshipsTask(cfg cataloging.RelationshipsConfig, src source.Description) Task {
fn := func(_ context.Context, resolver file.Resolver, builder sbomsync.Builder) error {
relationship.Finalize(
finalizeRelationships(
resolver,
builder,
cfg,
@ -34,3 +36,43 @@ func NewRelationshipsTask(cfg cataloging.RelationshipsConfig, src source.Descrip
return NewTask("relationships-cataloger", fn)
}
func finalizeRelationships(resolver file.Resolver, builder sbomsync.Builder, cfg cataloging.RelationshipsConfig, src artifact.Identifiable) {
accessor := builder.(sbomsync.Accessor)
// remove ELF packages and Binary packages that are already
// represented by a source package (e.g. a package that is evident by some package manager)
builder.DeletePackages(binary.PackagesToRemove(resolver, accessor)...)
// add relationships showing packages that are evident by a file which is owned by another package (package-to-package)
if cfg.PackageFileOwnershipOverlap {
relationship.ByFileOwnershipOverlapWorker(accessor)
}
// conditionally remove binary packages based on file ownership overlap relationships found
// https://github.com/anchore/syft/issues/931
if cfg.ExcludeBinaryPackagesWithFileOwnershipOverlap {
relationship.ExcludeBinariesByFileOwnershipOverlap(accessor)
}
// add the new relationships for executables to the SBOM
newBinaryRelationships := binary.NewDependencyRelationships(resolver, accessor)
accessor.WriteToSBOM(func(s *sbom.SBOM) {
s.Relationships = append(s.Relationships, newBinaryRelationships...)
})
builder.AddRelationships(newBinaryRelationships...)
// add source "contains package" relationship (source-to-package)
var sourceRelationships []artifact.Relationship
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
sourceRelationships = relationship.ToSource(src, s.Artifacts.Packages)
})
builder.AddRelationships(sourceRelationships...)
// add evident-by relationships (package-to-file)
var evidentByRelationships []artifact.Relationship
accessor.ReadFromSBOM(func(s *sbom.SBOM) {
evidentByRelationships = relationship.EvidentBy(s.Artifacts.Packages)
})
builder.AddRelationships(evidentByRelationships...)
}

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "anchore.io/schema/syft/json/16.0.10/document",
"$id": "anchore.io/schema/syft/json/16.0.11/document",
"$ref": "#/$defs/Document",
"$defs": {
"AlpmDbEntry": {
@ -1973,6 +1973,21 @@
},
"directUrlOrigin": {
"$ref": "#/$defs/PythonDirectURLOriginInfo"
},
"requiresPython": {
"type": "string"
},
"requiresDist": {
"items": {
"type": "string"
},
"type": "array"
},
"providesExtra": {
"items": {
"type": "string"
},
"type": "array"
}
},
"type": "object",

View File

@ -15,7 +15,7 @@
"packages": [
{
"name": "package-1",
"SPDXID": "SPDXRef-Package-python-package-1-fb6bef15e281ea43",
"SPDXID": "SPDXRef-Package-python-package-1-5a2b1ae000fcb51e",
"versionInfo": "1.0.1",
"supplier": "NOASSERTION",
"downloadLocation": "NOASSERTION",
@ -75,7 +75,7 @@
"relationships": [
{
"spdxElementId": "SPDXRef-DocumentRoot-Directory-some-path",
"relatedSpdxElement": "SPDXRef-Package-python-package-1-fb6bef15e281ea43",
"relatedSpdxElement": "SPDXRef-Package-python-package-1-5a2b1ae000fcb51e",
"relationshipType": "CONTAINS"
},
{

View File

@ -15,7 +15,7 @@
"packages": [
{
"name": "package-1",
"SPDXID": "SPDXRef-Package-python-package-1-80210ebcba92e632",
"SPDXID": "SPDXRef-Package-python-package-1-c5cf7ac34cbca450",
"versionInfo": "1.0.1",
"supplier": "NOASSERTION",
"downloadLocation": "NOASSERTION",
@ -89,7 +89,7 @@
"relationships": [
{
"spdxElementId": "SPDXRef-DocumentRoot-Image-user-image-input",
"relatedSpdxElement": "SPDXRef-Package-python-package-1-80210ebcba92e632",
"relatedSpdxElement": "SPDXRef-Package-python-package-1-c5cf7ac34cbca450",
"relationshipType": "CONTAINS"
},
{

View File

@ -15,7 +15,7 @@
"packages": [
{
"name": "package-1",
"SPDXID": "SPDXRef-Package-python-package-1-80210ebcba92e632",
"SPDXID": "SPDXRef-Package-python-package-1-c5cf7ac34cbca450",
"versionInfo": "1.0.1",
"supplier": "NOASSERTION",
"downloadLocation": "NOASSERTION",
@ -198,38 +198,38 @@
],
"relationships": [
{
"spdxElementId": "SPDXRef-Package-python-package-1-80210ebcba92e632",
"spdxElementId": "SPDXRef-Package-python-package-1-c5cf7ac34cbca450",
"relatedSpdxElement": "SPDXRef-File-f1-5265a4dde3edbf7c",
"relationshipType": "CONTAINS"
},
{
"spdxElementId": "SPDXRef-Package-python-package-1-80210ebcba92e632",
"spdxElementId": "SPDXRef-Package-python-package-1-c5cf7ac34cbca450",
"relatedSpdxElement": "SPDXRef-File-z1-f5-839d99ee67d9d174",
"relationshipType": "CONTAINS"
},
{
"spdxElementId": "SPDXRef-Package-python-package-1-80210ebcba92e632",
"spdxElementId": "SPDXRef-Package-python-package-1-c5cf7ac34cbca450",
"relatedSpdxElement": "SPDXRef-File-a1-f6-9c2f7510199b17f6",
"relationshipType": "CONTAINS"
},
{
"spdxElementId": "SPDXRef-Package-python-package-1-80210ebcba92e632",
"spdxElementId": "SPDXRef-Package-python-package-1-c5cf7ac34cbca450",
"relatedSpdxElement": "SPDXRef-File-d2-f4-c641caa71518099f",
"relationshipType": "CONTAINS"
},
{
"spdxElementId": "SPDXRef-Package-python-package-1-80210ebcba92e632",
"spdxElementId": "SPDXRef-Package-python-package-1-c5cf7ac34cbca450",
"relatedSpdxElement": "SPDXRef-File-d1-f3-c6f5b29dca12661f",
"relationshipType": "CONTAINS"
},
{
"spdxElementId": "SPDXRef-Package-python-package-1-80210ebcba92e632",
"spdxElementId": "SPDXRef-Package-python-package-1-c5cf7ac34cbca450",
"relatedSpdxElement": "SPDXRef-File-f2-f9e49132a4b96ccd",
"relationshipType": "CONTAINS"
},
{
"spdxElementId": "SPDXRef-DocumentRoot-Image-user-image-input",
"relatedSpdxElement": "SPDXRef-Package-python-package-1-80210ebcba92e632",
"relatedSpdxElement": "SPDXRef-Package-python-package-1-c5cf7ac34cbca450",
"relationshipType": "CONTAINS"
},
{

View File

@ -84,7 +84,7 @@ ExternalRef: PACKAGE-MANAGER purl pkg:deb/debian/package-2@2.0.1
##### Package: package-1
PackageName: package-1
SPDXID: SPDXRef-Package-python-package-1-80210ebcba92e632
SPDXID: SPDXRef-Package-python-package-1-c5cf7ac34cbca450
PackageVersion: 1.0.1
PackageSupplier: NOASSERTION
PackageDownloadLocation: NOASSERTION
@ -98,13 +98,13 @@ ExternalRef: PACKAGE-MANAGER purl a-purl-1
##### Relationships
Relationship: SPDXRef-Package-python-package-1-80210ebcba92e632 CONTAINS SPDXRef-File-f1-5265a4dde3edbf7c
Relationship: SPDXRef-Package-python-package-1-80210ebcba92e632 CONTAINS SPDXRef-File-z1-f5-839d99ee67d9d174
Relationship: SPDXRef-Package-python-package-1-80210ebcba92e632 CONTAINS SPDXRef-File-a1-f6-9c2f7510199b17f6
Relationship: SPDXRef-Package-python-package-1-80210ebcba92e632 CONTAINS SPDXRef-File-d2-f4-c641caa71518099f
Relationship: SPDXRef-Package-python-package-1-80210ebcba92e632 CONTAINS SPDXRef-File-d1-f3-c6f5b29dca12661f
Relationship: SPDXRef-Package-python-package-1-80210ebcba92e632 CONTAINS SPDXRef-File-f2-f9e49132a4b96ccd
Relationship: SPDXRef-DocumentRoot-Image-user-image-input CONTAINS SPDXRef-Package-python-package-1-80210ebcba92e632
Relationship: SPDXRef-Package-python-package-1-c5cf7ac34cbca450 CONTAINS SPDXRef-File-f1-5265a4dde3edbf7c
Relationship: SPDXRef-Package-python-package-1-c5cf7ac34cbca450 CONTAINS SPDXRef-File-z1-f5-839d99ee67d9d174
Relationship: SPDXRef-Package-python-package-1-c5cf7ac34cbca450 CONTAINS SPDXRef-File-a1-f6-9c2f7510199b17f6
Relationship: SPDXRef-Package-python-package-1-c5cf7ac34cbca450 CONTAINS SPDXRef-File-d2-f4-c641caa71518099f
Relationship: SPDXRef-Package-python-package-1-c5cf7ac34cbca450 CONTAINS SPDXRef-File-d1-f3-c6f5b29dca12661f
Relationship: SPDXRef-Package-python-package-1-c5cf7ac34cbca450 CONTAINS SPDXRef-File-f2-f9e49132a4b96ccd
Relationship: SPDXRef-DocumentRoot-Image-user-image-input CONTAINS SPDXRef-Package-python-package-1-c5cf7ac34cbca450
Relationship: SPDXRef-DocumentRoot-Image-user-image-input CONTAINS SPDXRef-Package-deb-package-2-4b756c6f6fb127a3
Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-DocumentRoot-Image-user-image-input

View File

@ -37,7 +37,7 @@ ExternalRef: PACKAGE-MANAGER purl pkg:deb/debian/package-2@2.0.1
##### Package: package-1
PackageName: package-1
SPDXID: SPDXRef-Package-python-package-1-fb6bef15e281ea43
SPDXID: SPDXRef-Package-python-package-1-5a2b1ae000fcb51e
PackageVersion: 1.0.1
PackageSupplier: NOASSERTION
PackageDownloadLocation: NOASSERTION
@ -51,7 +51,7 @@ ExternalRef: PACKAGE-MANAGER purl a-purl-2
##### Relationships
Relationship: SPDXRef-DocumentRoot-Directory-some-path CONTAINS SPDXRef-Package-python-package-1-fb6bef15e281ea43
Relationship: SPDXRef-DocumentRoot-Directory-some-path CONTAINS SPDXRef-Package-python-package-1-5a2b1ae000fcb51e
Relationship: SPDXRef-DocumentRoot-Directory-some-path CONTAINS SPDXRef-Package-deb-package-2-39392bb5e270f669
Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-DocumentRoot-Directory-some-path

View File

@ -40,7 +40,7 @@ ExternalRef: PACKAGE-MANAGER purl pkg:deb/debian/package-2@2.0.1
##### Package: package-1
PackageName: package-1
SPDXID: SPDXRef-Package-python-package-1-80210ebcba92e632
SPDXID: SPDXRef-Package-python-package-1-c5cf7ac34cbca450
PackageVersion: 1.0.1
PackageSupplier: NOASSERTION
PackageDownloadLocation: NOASSERTION
@ -54,7 +54,7 @@ ExternalRef: PACKAGE-MANAGER purl a-purl-1
##### Relationships
Relationship: SPDXRef-DocumentRoot-Image-user-image-input CONTAINS SPDXRef-Package-python-package-1-80210ebcba92e632
Relationship: SPDXRef-DocumentRoot-Image-user-image-input CONTAINS SPDXRef-Package-python-package-1-c5cf7ac34cbca450
Relationship: SPDXRef-DocumentRoot-Image-user-image-input CONTAINS SPDXRef-Package-deb-package-2-4b756c6f6fb127a3
Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-DocumentRoot-Image-user-image-input

View File

@ -1,7 +1,7 @@
{
"artifacts": [
{
"id": "fb6bef15e281ea43",
"id": "5a2b1ae000fcb51e",
"name": "package-1",
"version": "1.0.1",
"type": "python",

View File

@ -1,7 +1,7 @@
{
"artifacts": [
{
"id": "d748d4614750058d",
"id": "ad3ecac55fe1c30f",
"name": "package-1",
"version": "1.0.1",
"type": "python",

View File

@ -1,7 +1,7 @@
{
"artifacts": [
{
"id": "80210ebcba92e632",
"id": "c5cf7ac34cbca450",
"name": "package-1",
"version": "1.0.1",
"type": "python",

View File

@ -12,8 +12,12 @@ import (
"github.com/anchore/syft/syft/pkg"
)
// Processor is a function that can filter or augment existing packages and relationships based on existing material.
type Processor func([]pkg.Package, []artifact.Relationship, error) ([]pkg.Package, []artifact.Relationship, error)
// ResolvingProcessor is a Processor with the additional behavior of being able to reference additional material from a file resolver.
type ResolvingProcessor func(context.Context, file.Resolver, []pkg.Package, []artifact.Relationship, error) ([]pkg.Package, []artifact.Relationship, error)
type requester func(resolver file.Resolver, env Environment) []request
type request struct {
@ -21,10 +25,30 @@ type request struct {
Parser
}
type processExecutor interface {
process(ctx context.Context, resolver file.Resolver, pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error)
}
type processorWrapper struct {
Processor
}
func (p processorWrapper) process(_ context.Context, _ file.Resolver, pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
return p.Processor(pkgs, rels, err)
}
type resolvingProcessorWrapper struct {
ResolvingProcessor
}
func (p resolvingProcessorWrapper) process(ctx context.Context, resolver file.Resolver, pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
return p.ResolvingProcessor(ctx, resolver, pkgs, rels, err)
}
// Cataloger implements the Catalog interface and is responsible for dispatching the proper parser function for
// a given path or glob pattern. This is intended to be reusable across many package cataloger types.
type Cataloger struct {
processors []Processor
processors []processExecutor
requesters []requester
upstreamCataloger string
}
@ -87,7 +111,16 @@ func (c *Cataloger) WithParserByPath(parser Parser, paths ...string) *Cataloger
}
func (c *Cataloger) WithProcessors(processors ...Processor) *Cataloger {
c.processors = append(c.processors, processors...)
for _, p := range processors {
c.processors = append(c.processors, processorWrapper{Processor: p})
}
return c
}
func (c *Cataloger) WithResolvingProcessors(processors ...ResolvingProcessor) *Cataloger {
for _, p := range processors {
c.processors = append(c.processors, resolvingProcessorWrapper{ResolvingProcessor: p})
}
return c
}
@ -143,12 +176,12 @@ func (c *Cataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]pkg.
relationships = append(relationships, discoveredRelationships...)
}
return c.process(packages, relationships, nil)
return c.process(ctx, resolver, packages, relationships, nil)
}
func (c *Cataloger) process(pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
for _, proc := range c.processors {
pkgs, rels, err = proc(pkgs, rels, err)
func (c *Cataloger) process(ctx context.Context, resolver file.Resolver, pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
for _, p := range c.processors {
pkgs, rels, err = p.process(ctx, resolver, pkgs, rels, err)
}
return pkgs, rels, err
}

View File

@ -1,8 +1,9 @@
package lua
import (
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
"testing"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
)
func Test_PackageCataloger_Globs(t *testing.T) {

View File

@ -1,10 +1,11 @@
package lua
import (
"testing"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
"testing"
)
func TestParseRockspec(t *testing.T) {

View File

@ -40,5 +40,5 @@ func NewInstalledPackageCataloger() pkg.Cataloger {
"**/*egg-info/PKG-INFO",
"**/*DIST-INFO/METADATA",
"**/*EGG-INFO/PKG-INFO",
)
).WithResolvingProcessors(wheelEggRelationships)
}

View File

@ -2,10 +2,15 @@ package python
import (
"context"
"fmt"
"path"
"sort"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
@ -65,6 +70,8 @@ func Test_PackageCataloger(t *testing.T) {
{Path: "requests/utils.py", Digest: &pkg.PythonFileDigest{"sha256", "LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A"}, Size: "30049"},
},
TopLevelPackages: []string{"requests"},
RequiresPython: ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*",
ProvidesExtra: []string{"security", "socks"},
},
},
},
@ -101,6 +108,8 @@ func Test_PackageCataloger(t *testing.T) {
{Path: "requests/utils.py", Digest: &pkg.PythonFileDigest{"sha256", "LtPJ1db6mJff2TJSJWKi7rBpzjPS3mSOrjC9zRhoD3A"}, Size: "30049"},
},
TopLevelPackages: []string{"requests"},
RequiresPython: ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*",
ProvidesExtra: []string{"security", "socks"},
},
},
},
@ -140,6 +149,9 @@ func Test_PackageCataloger(t *testing.T) {
},
TopLevelPackages: []string{"pygments", "something_else"},
DirectURLOrigin: &pkg.PythonDirectURLOriginInfo{URL: "https://github.com/python-test/test.git", VCS: "git", CommitID: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"},
RequiresPython: ">=3.5",
RequiresDist: []string{"soupsieve (>1.2)", "html5lib ; extra == 'html5lib'", "lxml ; extra == 'lxml'"},
ProvidesExtra: []string{"html5lib", "lxml"},
},
},
},
@ -179,6 +191,7 @@ func Test_PackageCataloger(t *testing.T) {
},
TopLevelPackages: []string{"pygments", "something_else"},
DirectURLOrigin: &pkg.PythonDirectURLOriginInfo{URL: "https://github.com/python-test/test.git", VCS: "git", CommitID: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"},
RequiresPython: ">=3.5",
},
},
},
@ -211,6 +224,7 @@ func Test_PackageCataloger(t *testing.T) {
{Path: "../../Scripts/flask.exe", Size: "89470", Digest: &pkg.PythonFileDigest{"sha256", "jvqh4N3qOqXLlq40i6ZOLCY9tAOwfwdzIpLDYhRjoqQ"}},
{Path: "Flask-1.0.2.dist-info/INSTALLER", Size: "4", Digest: &pkg.PythonFileDigest{"sha256", "zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg"}},
},
RequiresPython: ">=3.5",
},
},
},
@ -236,6 +250,7 @@ func Test_PackageCataloger(t *testing.T) {
Author: "Georg Brandl",
AuthorEmail: "georg@python.org",
SitePackagesRootPath: "test-fixtures",
RequiresPython: ">=3.5",
},
},
},
@ -259,6 +274,8 @@ func Test_PackageCataloger(t *testing.T) {
Author: "Kenneth Reitz",
AuthorEmail: "me@kennethreitz.org",
SitePackagesRootPath: "test-fixtures",
RequiresPython: ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*",
ProvidesExtra: []string{"security", "socks"},
},
},
},
@ -362,7 +379,155 @@ func Test_PackageCataloger_Globs(t *testing.T) {
pkgtest.NewCatalogTester().
FromDirectory(t, test.fixture).
ExpectsResolverContentQueries(test.expected).
IgnoreUnfulfilledPathResponses("**/pyvenv.cfg").
TestCataloger(t, NewInstalledPackageCataloger())
})
}
}
func Test_PackageCataloger_SitePackageRelationships(t *testing.T) {
tests := []struct {
name string
fixture string
expectedRelationships []string
}{
{
name: "with multiple python installations and virtual envs",
fixture: "image-multi-site-package",
expectedRelationships: []string{
// purely python 3.9 dist-packages
//
// in the container, you can get a sense for dependencies with :
// $ python3.9 -m pip list | tail -n +3 | awk '{print $1}' | xargs python3.9 -m pip show | grep -e 'Name:' -e 'Requires:' -e '\-\-\-'
//
// which approximates to (all in system packages):
//
// - beautifulsoup4: soupsieve
// - requests: certifi, chardet, idna, urllib3
// - blessed: six, wcwidth
// - virtualenv: distlib, filelock, platformdirs
"certifi @ 2020.12.5 (/usr/local/lib/python3.9/dist-packages) [dependency-of] requests @ 2.25.0 (/usr/local/lib/python3.9/dist-packages)",
"certifi @ 2020.12.5 (/usr/local/lib/python3.9/dist-packages) [dependency-of] urllib3 @ 1.26.18 (/usr/local/lib/python3.9/dist-packages)", // available when extra == "secure", but another dependency is primarily installing it
"chardet @ 3.0.4 (/usr/local/lib/python3.9/dist-packages) [dependency-of] requests @ 2.25.0 (/usr/local/lib/python3.9/dist-packages)",
"idna @ 2.10 (/usr/local/lib/python3.9/dist-packages) [dependency-of] requests @ 2.25.0 (/usr/local/lib/python3.9/dist-packages)",
"six @ 1.16.0 (/usr/local/lib/python3.9/dist-packages) [dependency-of] blessed @ 1.20.0 (/usr/local/lib/python3.9/dist-packages)",
"soupsieve @ 2.2.1 (/usr/local/lib/python3.9/dist-packages) [dependency-of] beautifulsoup4 @ 4.9.3 (/usr/local/lib/python3.9/dist-packages)",
"urllib3 @ 1.26.18 (/usr/local/lib/python3.9/dist-packages) [dependency-of] requests @ 2.25.0 (/usr/local/lib/python3.9/dist-packages)",
"wcwidth @ 0.2.13 (/usr/local/lib/python3.9/dist-packages) [dependency-of] blessed @ 1.20.0 (/usr/local/lib/python3.9/dist-packages)",
// purely python 3.8 dist-packages
//
// in the container, you can get a sense for dependencies with :
// $ python3.8 -m pip list | tail -n +3 | awk '{print $1}' | xargs python3.8 -m pip show | grep -e 'Name:' -e 'Requires:' -e '\-\-\-'
//
// which approximates to (all in system packages):
//
// - beautifulsoup4: soupsieve
// - requests: certifi, chardet, idna, urllib3
// - runs: xmod
// - virtualenv: distlib, filelock, platformdirs
"certifi @ 2020.12.5 (/usr/local/lib/python3.8/dist-packages) [dependency-of] requests @ 2.25.0 (/usr/local/lib/python3.8/dist-packages)",
"certifi @ 2020.12.5 (/usr/local/lib/python3.8/dist-packages) [dependency-of] urllib3 @ 1.26.18 (/usr/local/lib/python3.8/dist-packages)", // available when extra == "secure", but another dependency is primarily installing it
"chardet @ 3.0.4 (/usr/local/lib/python3.8/dist-packages) [dependency-of] requests @ 2.25.0 (/usr/local/lib/python3.8/dist-packages)",
"idna @ 2.10 (/usr/local/lib/python3.8/dist-packages) [dependency-of] requests @ 2.25.0 (/usr/local/lib/python3.8/dist-packages)",
"soupsieve @ 2.2 (/usr/local/lib/python3.8/dist-packages) [dependency-of] beautifulsoup4 @ 4.9.2 (/usr/local/lib/python3.8/dist-packages)",
"urllib3 @ 1.26.18 (/usr/local/lib/python3.8/dist-packages) [dependency-of] requests @ 2.25.0 (/usr/local/lib/python3.8/dist-packages)",
"xmod @ 1.8.1 (/usr/local/lib/python3.8/dist-packages) [dependency-of] runs @ 1.2.2 (/usr/local/lib/python3.8/dist-packages)",
// project 1 virtual env
//
// in the container, you can get a sense for dependencies with :
// $ source /app/project1/venv/bin/activate
// $ pip list | tail -n +3 | awk '{print $1}' | xargs pip show | grep -e 'Name:' -e 'Requires:' -e '\-\-\-' -e 'Location:' | grep -A 1 -B 1 '\-packages'
//
// which approximates to (some in virtual env, some in system packages):
//
// - beautifulsoup4: soupsieve
// - requests [SYSTEM]: certifi [SYSTEM], chardet [SYSTEM], idna [SYSTEM], urllib3 [SYSTEM]
// - blessed [SYSTEM]: six [SYSTEM], wcwidth [SYSTEM]
// - virtualenv [SYSTEM]: distlib [SYSTEM], filelock [SYSTEM], platformdirs [SYSTEM]
// - inquirer: python-editor [SYSTEM], blessed [SYSTEM], readchar
//
// Note: we'll only see new relationships, so any relationship where there is at least one new player (in FROM or TO)
"blessed @ 1.20.0 (/usr/local/lib/python3.9/dist-packages) [dependency-of] inquirer @ 3.0.0 (/app/project1/venv/lib/python3.9/site-packages)", // note: depends on global site package!
"python-editor @ 1.0.4 (/usr/local/lib/python3.9/dist-packages) [dependency-of] inquirer @ 3.0.0 (/app/project1/venv/lib/python3.9/site-packages)", // note: depends on global site package!
"readchar @ 4.1.0 (/app/project1/venv/lib/python3.9/site-packages) [dependency-of] inquirer @ 3.0.0 (/app/project1/venv/lib/python3.9/site-packages)",
"soupsieve @ 2.3 (/app/project1/venv/lib/python3.9/site-packages) [dependency-of] beautifulsoup4 @ 4.10.0 (/app/project1/venv/lib/python3.9/site-packages)",
// project 2 virtual env
//
// in the container, you can get a sense for dependencies with :
// $ source /app/project2/venv/bin/activate
// $ pip list | tail -n +3 | awk '{print $1}' | xargs pip show | grep -e 'Name:' -e 'Requires:' -e '\-\-\-' -e 'Location:'
//
// which approximates to (all in virtual env):
//
// - blessed: six, wcwidth
// - editor: runs, xmod
// - runs: xmod
// - inquirer: editor, blessed, readchar
"blessed @ 1.20.0 (/app/project2/venv/lib/python3.8/site-packages) [dependency-of] inquirer @ 3.2.4 (/app/project2/venv/lib/python3.8/site-packages)",
"editor @ 1.6.6 (/app/project2/venv/lib/python3.8/site-packages) [dependency-of] inquirer @ 3.2.4 (/app/project2/venv/lib/python3.8/site-packages)",
"readchar @ 4.1.0 (/app/project2/venv/lib/python3.8/site-packages) [dependency-of] inquirer @ 3.2.4 (/app/project2/venv/lib/python3.8/site-packages)",
"runs @ 1.2.2 (/app/project2/venv/lib/python3.8/site-packages) [dependency-of] editor @ 1.6.6 (/app/project2/venv/lib/python3.8/site-packages)",
"six @ 1.16.0 (/app/project2/venv/lib/python3.8/site-packages) [dependency-of] blessed @ 1.20.0 (/app/project2/venv/lib/python3.8/site-packages)",
"wcwidth @ 0.2.13 (/app/project2/venv/lib/python3.8/site-packages) [dependency-of] blessed @ 1.20.0 (/app/project2/venv/lib/python3.8/site-packages)",
"xmod @ 1.8.1 (/app/project2/venv/lib/python3.8/site-packages) [dependency-of] editor @ 1.6.6 (/app/project2/venv/lib/python3.8/site-packages)",
"xmod @ 1.8.1 (/app/project2/venv/lib/python3.8/site-packages) [dependency-of] runs @ 1.2.2 (/app/project2/venv/lib/python3.8/site-packages)",
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
pkgtest.NewCatalogTester().
WithImageResolver(t, test.fixture).
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, relationships []artifact.Relationship) {
diffRelationships(t, test.expectedRelationships, relationships, pkgs)
}).
TestCataloger(t, NewInstalledPackageCataloger())
})
}
}
func diffRelationships(t *testing.T, expected []string, actual []artifact.Relationship, pkgs []pkg.Package) {
pkgsByID := make(map[artifact.ID]pkg.Package)
for _, p := range pkgs {
pkgsByID[p.ID()] = p
}
sort.Strings(expected)
if d := cmp.Diff(expected, stringRelationships(actual, pkgsByID)); d != "" {
t.Errorf("unexpected relationships (-want, +got): %s", d)
}
}
func stringRelationships(relationships []artifact.Relationship, nameLookup map[artifact.ID]pkg.Package) []string {
var result []string
for _, r := range relationships {
var fromName, toName string
{
fromPkg, ok := nameLookup[r.From.ID()]
if !ok {
fromName = string(r.From.ID())
} else {
loc := path.Dir(path.Dir(fromPkg.Locations.ToSlice()[0].RealPath))
fromName = fmt.Sprintf("%s @ %s (%s)", fromPkg.Name, fromPkg.Version, loc)
}
}
{
toPkg, ok := nameLookup[r.To.ID()]
if !ok {
toName = string(r.To.ID())
} else {
loc := path.Dir(path.Dir(toPkg.Locations.ToSlice()[0].RealPath))
toName = fmt.Sprintf("%s @ %s (%s)", toPkg.Name, toPkg.Version, loc)
}
}
result = append(result, fromName+" ["+string(r.Type)+"] "+toName)
}
sort.Strings(result)
return result
}

View File

@ -0,0 +1,182 @@
package python
import (
"context"
"fmt"
"path"
"strings"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/relationship"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/dependency"
)
var _ dependency.Specifier = wheelEggDependencySpecifier
func wheelEggDependencySpecifier(p pkg.Package) dependency.Specification {
meta, ok := p.Metadata.(pkg.PythonPackage)
if !ok {
log.Tracef("cataloger failed to extract wheel/egg metadata for package %+v", p.Name)
return dependency.Specification{}
}
provides := []string{p.Name}
var requires []string
// extract dependencies from the Requires-Dist field
// note: this also includes Extras, which are currently partially supported.
// Specifically, we claim that a package needs all extra dependencies and a relationship will be created
// if that dependency happens to be installed. We currently do not do any version constraint resolution
// or similar behaviors to ensure what is installed will function correctly. This is somewhat consistent with
// how extras function, where there tends to be a try/except around imports as an indication if that extra
// functionality should be executed or not (there isn't a package declaration to reference at runtime).
for _, depSpecifier := range meta.RequiresDist {
depSpecifier = extractPackageNameFromRequiresDest(depSpecifier)
if depSpecifier == "" {
continue
}
requires = append(requires, depSpecifier)
}
return dependency.Specification{
Provides: provides,
Requires: requires,
}
}
// extractPackageNameFromRequiresDest removes any extras or version constraints from a given Requires-Dist field value,
// leaving only the package name.
func extractPackageNameFromRequiresDest(s string) string {
// examples:
// html5lib ; extra == 'html5lib' --> html5lib
// soupsieve (>1.2) --> soupsieve
return strings.TrimSpace(internal.SplitAny(s, "(;")[0])
}
func wheelEggRelationships(ctx context.Context, resolver file.Resolver, pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
if err != nil {
return pkgs, rels, err
}
pkgsBySitePackageAndName := make(map[string]map[string]pkg.Package)
for _, p := range pkgs {
sitePackagesDir := deriveSitePackageDir(p)
if pkgsBySitePackageAndName[sitePackagesDir] == nil {
pkgsBySitePackageAndName[sitePackagesDir] = make(map[string]pkg.Package)
}
pkgsBySitePackageAndName[sitePackagesDir][p.Name] = p
}
var sitePackagesDirs []string
for site := range pkgsBySitePackageAndName {
sitePackagesDirs = append(sitePackagesDirs, site)
}
venvs, globalSitePackages, err := findVirtualEnvs(ctx, resolver, sitePackagesDirs)
if err != nil {
return nil, nil, err
}
relationshipsProcessor := dependency.Processor(wheelEggDependencySpecifier)
relationshipIndex := relationship.NewIndex(rels...)
// create relationships between packages within each global site package directory
for _, globalSitePackage := range globalSitePackages {
sitePkgs := collectPackages(pkgsBySitePackageAndName, []string{globalSitePackage})
_, siteRels, err := relationshipsProcessor(sitePkgs, nil, nil)
if err != nil {
return nil, nil, fmt.Errorf("failed to resolve relationships for global site package %q: %w", globalSitePackage, err)
}
relationshipIndex.AddAll(siteRels...)
}
// create relationships between packages within each virtual env site package directory (that doesn't link to a global site-packages directory)
for _, venv := range venvs {
if venv.IncludeSystemSitePackages {
continue
}
sitePkgs := collectPackages(pkgsBySitePackageAndName, []string{venv.SitePackagesPath})
_, siteRels, err := relationshipsProcessor(sitePkgs, nil, nil)
if err != nil {
return nil, nil, fmt.Errorf("failed to resolve relationships for virtualenv site package %q: %w", venv.SitePackagesPath, err)
}
relationshipIndex.AddAll(siteRels...)
}
// create relationships between packages within each virtual env site package directory (that links to a global site package directory)
for _, venv := range venvs {
if !venv.IncludeSystemSitePackages {
continue
}
globalSitePackage := venv.matchSystemPackagesPath(globalSitePackages)
sitePkgs := collectPackages(pkgsBySitePackageAndName, []string{venv.SitePackagesPath, globalSitePackage})
_, siteRels, err := relationshipsProcessor(sitePkgs, nil, nil)
if err != nil {
return nil, nil, fmt.Errorf("failed to resolve relationships for virtualenv + global site package path %q + %q: %w", venv.SitePackagesPath, globalSitePackage, err)
}
relationshipIndex.AddAll(siteRels...)
}
return pkgs, relationshipIndex.AllUniqueRelationships(), err
}
func collectPackages(pkgsBySitePackageAndName map[string]map[string]pkg.Package, sites []string) []pkg.Package {
// get packages for all sites, preferring packages from earlier sites for packages with the same name
pkgByName := make(map[string]struct{})
var pkgs []pkg.Package
for _, site := range sites {
for name, p := range pkgsBySitePackageAndName[site] {
if _, ok := pkgByName[name]; !ok {
pkgByName[name] = struct{}{}
pkgs = append(pkgs, p)
}
}
}
return pkgs
}
func deriveSitePackageDir(p pkg.Package) string {
for _, l := range packagePrimaryLocations(p) {
sitePackageDir := extractSitePackageDir(l.RealPath)
if sitePackageDir != "" {
return sitePackageDir
}
}
return ""
}
func packagePrimaryLocations(p pkg.Package) []file.Location {
var locs []file.Location
for _, l := range p.Locations.ToSlice() {
a, ok := l.Annotations[pkg.EvidenceAnnotationKey]
if !ok {
continue
}
if a == pkg.PrimaryEvidenceAnnotation {
locs = append(locs, l)
}
}
return locs
}
func extractSitePackageDir(p string) string {
// walk up the path until we find a site-packages or dist-packages directory
fields := strings.Split(path.Dir(p), "/")
for i := len(fields) - 1; i >= 0; i-- {
if fields[i] == "site-packages" || fields[i] == "dist-packages" {
return path.Join(fields[:i+1]...)
}
}
return ""
}

View File

@ -0,0 +1 @@
package python

View File

@ -194,7 +194,7 @@ func assembleEggOrWheelMetadata(resolver file.Resolver, metadataLocation file.Lo
}
defer internal.CloseAndLogError(metadataContents, metadataLocation.AccessPath)
pd, err := parseWheelOrEggMetadata(metadataLocation.RealPath, metadataContents)
pd, err := parseWheelOrEggMetadata(file.NewLocationReadCloser(metadataLocation, metadataContents))
if err != nil {
return nil, nil, err
}

View File

@ -3,7 +3,6 @@ package python
import (
"bufio"
"fmt"
"io"
"path/filepath"
"strings"
@ -23,13 +22,44 @@ type parsedData struct {
pkg.PythonPackage `mapstructure:",squash"`
}
var pluralFields = map[string]bool{
"ProvidesExtra": true,
"RequiresDist": true,
}
// parseWheelOrEggMetadata takes a Python Egg or Wheel (which share the same format and values for our purposes),
// returning all Python packages listed.
func parseWheelOrEggMetadata(path string, reader io.Reader) (parsedData, error) {
fields := make(map[string]string)
func parseWheelOrEggMetadata(locationReader file.LocationReadCloser) (parsedData, error) {
fields, err := extractRFC5322Fields(locationReader)
if err != nil {
return parsedData{}, fmt.Errorf("unable to extract python wheel/egg metadata: %w", err)
}
var pd parsedData
if err := mapstructure.Decode(fields, &pd); err != nil {
return pd, fmt.Errorf("unable to translate python wheel/egg metadata: %w", err)
}
// add additional metadata not stored in the egg/wheel metadata file
path := locationReader.Path()
pd.SitePackagesRootPath = determineSitePackagesRootPath(path)
if pd.Licenses != "" || pd.LicenseExpression != "" {
pd.LicenseLocation = file.NewLocation(path)
} else if pd.LicenseFile != "" {
pd.LicenseLocation = file.NewLocation(filepath.Join(filepath.Dir(path), pd.LicenseFile))
}
return pd, nil
}
func extractRFC5322Fields(locationReader file.LocationReadCloser) (map[string]any, error) {
fields := make(map[string]any)
var key string
scanner := bufio.NewScanner(reader)
// though this spec is governed by RFC 5322 (mail message), the metadata files are not guaranteed to be compliant.
// We must survive parsing as much info as possible without failing and dropping the data.
scanner := bufio.NewScanner(locationReader)
for scanner.Scan() {
line := scanner.Text()
line = strings.TrimRight(line, "\n")
@ -52,43 +82,50 @@ func parseWheelOrEggMetadata(path string, reader io.Reader) (parsedData, error)
// a field-body continuation
updatedValue, err := handleFieldBodyContinuation(key, line, fields)
if err != nil {
return parsedData{}, err
return nil, err
}
fields[key] = updatedValue
default:
// parse a new key (note, duplicate keys are overridden)
// parse a new key (note, duplicate keys that are for singular fields are overridden, where as plural fields are appended)
if i := strings.Index(line, ":"); i > 0 {
// mapstruct cannot map keys with dashes, and we are expected to persist the "Author-email" field
key = strings.ReplaceAll(strings.TrimSpace(line[0:i]), "-", "")
val := strings.TrimSpace(line[i+1:])
val := getFieldType(key, strings.TrimSpace(line[i+1:]))
fields[key] = val
fields[key] = handleSingleOrMultiField(fields[key], val)
} else {
log.Warnf("cannot parse field from path: %q from line: %q", path, line)
log.Warnf("cannot parse field from path: %q from line: %q", locationReader.Path(), line)
}
}
}
return fields, nil
}
if err := scanner.Err(); err != nil {
return parsedData{}, fmt.Errorf("failed to parse python wheel/egg: %w", err)
func handleSingleOrMultiField(existingValue, val any) any {
strSlice, ok := val.([]string)
if !ok {
return val
}
if existingValue == nil {
return strSlice
}
var pd parsedData
if err := mapstructure.Decode(fields, &pd); err != nil {
return pd, fmt.Errorf("unable to parse APK metadata: %w", err)
switch existingValueTy := existingValue.(type) {
case []string:
return append(existingValueTy, strSlice...)
case string:
return append([]string{existingValueTy}, strSlice...)
}
// add additional metadata not stored in the egg/wheel metadata file
pd.SitePackagesRootPath = determineSitePackagesRootPath(path)
if pd.Licenses != "" || pd.LicenseExpression != "" {
pd.LicenseLocation = file.NewLocation(path)
} else if pd.LicenseFile != "" {
pd.LicenseLocation = file.NewLocation(filepath.Join(filepath.Dir(path), pd.LicenseFile))
return append([]string{fmt.Sprintf("%s", existingValue)}, strSlice...)
}
return pd, nil
func getFieldType(key, in string) any {
if plural, ok := pluralFields[key]; ok && plural {
return []string{in}
}
return in
}
// isEggRegularFile determines if the specified path is the regular file variant
@ -110,7 +147,7 @@ func determineSitePackagesRootPath(path string) string {
// handleFieldBodyContinuation returns the updated value for the specified field after processing the specified line.
// If the continuation cannot be processed, it returns an error.
func handleFieldBodyContinuation(key, line string, fields map[string]string) (string, error) {
func handleFieldBodyContinuation(key, line string, fields map[string]any) (any, error) {
if len(key) == 0 {
return "", fmt.Errorf("no match for continuation: line: '%s'", line)
}
@ -121,5 +158,16 @@ func handleFieldBodyContinuation(key, line string, fields map[string]string) (st
}
// concatenate onto previous value
return fmt.Sprintf("%s\n %s", val, strings.TrimSpace(line)), nil
switch s := val.(type) {
case string:
return fmt.Sprintf("%s\n %s", s, strings.TrimSpace(line)), nil
case []string:
if len(s) == 0 {
s = append(s, "")
}
s[len(s)-1] = fmt.Sprintf("%s\n %s", s[len(s)-1], strings.TrimSpace(line))
return s, nil
default:
return "", fmt.Errorf("unexpected type for continuation: %T", val)
}
}

View File

@ -1,11 +1,17 @@
package python
import (
"io"
"os"
"strings"
"testing"
"github.com/go-test/deep"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/internal/cmptest"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
)
@ -29,6 +35,8 @@ func TestParseWheelEggMetadata(t *testing.T) {
Author: "Kenneth Reitz",
AuthorEmail: "me@kennethreitz.org",
SitePackagesRootPath: "test-fixtures",
RequiresPython: ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*",
ProvidesExtra: []string{"security", "socks"},
},
},
},
@ -46,6 +54,9 @@ func TestParseWheelEggMetadata(t *testing.T) {
Author: "Georg Brandl",
AuthorEmail: "georg@python.org",
SitePackagesRootPath: "test-fixtures",
RequiresPython: ">=3.5",
RequiresDist: []string{"soupsieve (>1.2)", "html5lib ; extra == 'html5lib'", "lxml ; extra == 'lxml'"},
ProvidesExtra: []string{"html5lib", "lxml"},
},
},
},
@ -58,13 +69,15 @@ func TestParseWheelEggMetadata(t *testing.T) {
t.Fatalf("failed to open fixture: %+v", err)
}
actual, err := parseWheelOrEggMetadata(test.Fixture, fixture)
l := file.NewLocationReadCloser(file.NewLocation(test.Fixture), fixture)
actual, err := parseWheelOrEggMetadata(l)
if err != nil {
t.Fatalf("failed to parse: %+v", err)
}
for _, d := range deep.Equal(actual, test.ExpectedMetadata) {
t.Errorf("diff: %+v", d)
if d := cmp.Diff(test.ExpectedMetadata, actual, cmptest.DefaultCommonOptions()...); d != "" {
t.Errorf("metadata mismatch (-want +got):\n%s", d)
}
})
}
@ -158,7 +171,9 @@ func TestParseWheelEggMetadataInvalid(t *testing.T) {
t.Fatalf("failed to open fixture: %+v", err)
}
actual, err := parseWheelOrEggMetadata(test.Fixture, fixture)
l := file.NewLocationReadCloser(file.NewLocation(test.Fixture), fixture)
actual, err := parseWheelOrEggMetadata(l)
if err != nil {
t.Fatalf("failed to parse: %+v", err)
}
@ -169,3 +184,59 @@ func TestParseWheelEggMetadataInvalid(t *testing.T) {
})
}
}
func Test_extractRFC5322Fields(t *testing.T) {
tests := []struct {
name string
input string
want map[string]any
wantErr require.ErrorAssertionFunc
}{
{
name: "with valid plural fields",
input: `
Name: mxnet
Version: 1.8.0
Requires-Dist: numpy (>=1.16.6)
Requires-Dist: requests (>=2.22.0)
ProvidesExtra: cryptoutils ; extra == 'secure'
ProvidesExtra: socks ; extra == 'secure'
`,
want: map[string]any{
"Name": "mxnet",
"Version": "1.8.0",
"RequiresDist": []string{"numpy (>=1.16.6)", "requests (>=2.22.0)"},
"ProvidesExtra": []string{"cryptoutils ; extra == 'secure'", "socks ; extra == 'secure'"},
},
},
{
name: "with invalid plural fields (overwrite)",
input: `
Name: mxnet
Version: 1.8.0
Version: 1.9.0
`,
want: map[string]any{
"Name": "mxnet",
"Version": "1.9.0",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.wantErr == nil {
tt.wantErr = require.NoError
}
reader := file.NewLocationReadCloser(
file.NewLocation("/made/up"),
io.NopCloser(strings.NewReader(tt.input)),
)
got, err := extractRFC5322Fields(reader)
tt.wantErr(t, err)
assert.Equal(t, tt.want, got)
})
}
}

View File

@ -25,6 +25,12 @@ Classifier: Operating System :: OS Independent
Classifier: Topic :: Text Processing :: Filters
Classifier: Topic :: Utilities
Requires-Python: >=3.5
Description-Content-Type: text/markdown
Requires-Dist: soupsieve (>1.2)
Provides-Extra: html5lib
Requires-Dist: html5lib ; extra == 'html5lib'
Provides-Extra: lxml
Requires-Dist: lxml ; extra == 'lxml'
Pygments

View File

@ -0,0 +1,37 @@
# digest is for linux/amd64
FROM ubuntu:20.04@sha256:cc9cc8169c9517ae035cf293b15f06922cb8c6c864d625a72b7b18667f264b70
# install Python 3.8 and Python 3.9
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y python3.8 python3.9 python3-pip python3-venv python3.9-venv python3.8-venv
# install pip and virtualenv for both Python versions
RUN python3.8 -m pip install --upgrade pip virtualenv
RUN python3.9 -m pip install --upgrade pip virtualenv
# install global packages for Python 3.8 & 3.9
RUN python3.9 -m pip install click==8.0.3 beautifulsoup4==4.9.3 soupsieve==2.2.1 requests==2.25.1
RUN python3.9 -m pip install six==1.16.0 wcwidth==0.2.13 blessed==1.20.0 python-editor==1.0.4 # total dependencies for inquirer in project1 (which is linked)
RUN python3.9 -m pip install requests==2.25.0 certifi==2020.12.5 chardet==3.0.4 idna==2.10 urllib3==1.26.18 # total dependencies for requests
RUN python3.8 -m pip install click==8.0.2 beautifulsoup4==4.9.2 soupsieve==2.2.0 requests==2.25.0
RUN python3.8 -m pip install runs==1.2.2 xmod==1.8.1 # partial dependencies for inquirer in project2 (which is a red herring)
RUN python3.8 -m pip install requests==2.25.0 certifi==2020.12.5 chardet==3.0.4 idna==2.10 urllib3==1.26.18 # total dependencies for requests
RUN python3.8 -m pip install readchar==4.1.0
# create directories for the two projects
RUN mkdir -p /app/project1 /app/project2
# set up the first project with a virtual environment using Python 3.9
WORKDIR /app/project1
RUN python3.9 -m venv --system-site-packages venv
RUN /app/project1/venv/bin/pip install pyyaml==5.4.1 beautifulsoup4==4.10.0 soupsieve==2.3.0 requests # note: use requests from global site packages, but use the rest from the virtual environment
RUN /app/project1/venv/bin/pip install inquirer==3.0.0 # note: should use dependencies from global site packages
# set up the second project with a virtual environment using Python 3.8
WORKDIR /app/project2
RUN python3.8 -m venv venv
RUN /app/project2/venv/bin/pip install click==8.0.3 pyyaml==6.0
RUN /app/project2/venv/bin/pip install inquirer==3.2.4 runs==1.2.2 xmod==1.8.1 six==1.16.0 wcwidth==0.2.13 blessed==1.20.0 editor==1.6.6 readchar==4.1.0
WORKDIR /app

View File

@ -0,0 +1,3 @@
home = /usr/bin
include-system-site-packages = true
version = 3.9.5

View File

@ -0,0 +1,17 @@
# last one wins... but we should survive multiple entries too
include-system-site-packages = true
include-system-site-packages = true
# note: empty lines
# note: bad entry
include system-site-packages = false
# note: in comment
# include-system-site-packages = false
# note: lots of spaces
version = 3.3.3

View File

@ -0,0 +1,172 @@
package python
import (
"bufio"
"context"
"fmt"
"path"
"sort"
"strings"
"github.com/bmatcuk/doublestar/v4"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file"
)
type virtualEnvInfo struct {
// Context
Location file.Location
SitePackagesPath string
// Config values
Version string
IncludeSystemSitePackages bool
}
func (v virtualEnvInfo) majorMinorVersion() string {
parts := strings.Split(v.Version, ".")
if len(parts) < 2 {
return ""
}
return strings.Join(parts[:2], ".")
}
func findVirtualEnvs(_ context.Context, resolver file.Resolver, sitePackagePaths []string) ([]virtualEnvInfo, []string, error) {
locations, err := resolver.FilesByGlob("**/pyvenv.cfg")
if err != nil {
return nil, nil, fmt.Errorf("failed to find python virtualenvs: %w", err)
}
sitePackagePathsSet := strset.New(sitePackagePaths...)
var virtualEnvs []virtualEnvInfo
for _, location := range locations {
cfg, err := parsePyvenvCfg(context.Background(), resolver, location)
if err != nil {
return nil, nil, fmt.Errorf("failed to parse pyvenv.cfg: %w", err)
}
if cfg == nil {
continue
}
cfg.SitePackagesPath = cfg.matchVirtualEnvSitePackagesPath(sitePackagePaths)
if cfg.SitePackagesPath != "" {
sitePackagePathsSet.Remove(cfg.SitePackagesPath)
}
virtualEnvs = append(virtualEnvs, *cfg)
}
unusedSitePackageDirs := sitePackagePathsSet.List()
sort.Strings(unusedSitePackageDirs)
return virtualEnvs, unusedSitePackageDirs, nil
}
func (v virtualEnvInfo) matchSystemPackagesPath(sitePackagePaths []string) string {
sitePackagePathsSet := strset.New(sitePackagePaths...)
// we are searchin for the system site-packages directory within the virtualenv
search := "**/python" + v.majorMinorVersion() + "/*-packages"
var matches []string
for _, p := range sitePackagePathsSet.List() {
doesMatch, err := doublestar.Match(search, p)
if err != nil {
log.Tracef("unable to match system site-packages path %q: %v", p, err)
continue
}
if doesMatch {
matches = append(matches, p)
}
}
// we should get either 0 or 1 matches, we cannot reason about multiple matches
if len(matches) == 1 {
return matches[0]
}
return ""
}
func (v virtualEnvInfo) matchVirtualEnvSitePackagesPath(sitePackagePaths []string) string {
sitePackagePathsSet := strset.New(sitePackagePaths...)
// the parent directory of the venv config is the top-level directory of the virtualenv
// e.g. /app/project1/venv/pyvenv.cfg -> /app/project1/venv
parent := strings.TrimLeft(path.Dir(v.Location.RealPath), "/")
// we are searchin for the site-packages directory within the virtualenv
search := parent + "/lib/python" + v.majorMinorVersion() + "/site-packages"
var matches []string
for _, p := range sitePackagePathsSet.List() {
if strings.Contains(p, search) {
matches = append(matches, p)
}
}
// we should get either 0 or 1 matches, we cannot reason about multiple matches
if len(matches) == 1 {
return matches[0]
}
return ""
}
func parsePyvenvCfg(_ context.Context, resolver file.Resolver, location file.Location) (*virtualEnvInfo, error) {
reader, err := resolver.FileContentsByLocation(location)
if err != nil {
return nil, fmt.Errorf("unable to read file %q: %w", location.Path(), err)
}
defer internal.CloseAndLogError(reader, location.Path())
cfg, err := parsePyvenvCfgReader(file.NewLocationReadCloser(location, reader))
if err != nil {
return nil, fmt.Errorf("unable to parse pyvenv.cfg: %w", err)
}
return cfg, nil
}
func parsePyvenvCfgReader(reader file.LocationReadCloser) (*virtualEnvInfo, error) {
scanner := bufio.NewScanner(reader)
venv := virtualEnvInfo{
Location: reader.Location,
}
for scanner.Scan() {
line := scanner.Text()
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") {
// skip empty lines and comments
continue
}
parts := strings.SplitN(line, "=", 2)
if len(parts) != 2 {
// skip malformed lines
continue
}
key := strings.TrimSpace(parts[0])
value := strings.TrimSpace(parts[1])
switch key {
case "version":
venv.Version = value
case "include-system-site-packages":
venv.IncludeSystemSitePackages = strings.ToLower(value) == "true"
}
}
if err := scanner.Err(); err != nil {
return nil, err
}
return &venv, nil
}

View File

@ -0,0 +1,54 @@
package python
import (
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/file"
)
func Test_parsePyvenvCfgReader(t *testing.T) {
location := file.NewLocation("/some/bogus/path")
tests := []struct {
name string
fixture string
want *virtualEnvInfo
wantErr require.ErrorAssertionFunc
}{
{
name: "parse basic pyenv file",
fixture: "test-fixtures/pyenv/good-config",
want: &virtualEnvInfo{
Location: location,
Version: "3.9.5",
IncludeSystemSitePackages: true,
},
},
{
name: "trixy config cases",
fixture: "test-fixtures/pyenv/trixy-config",
want: &virtualEnvInfo{
Location: location,
Version: "3.3.3",
IncludeSystemSitePackages: true,
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.wantErr == nil {
tt.wantErr = require.NoError
}
reader, err := os.Open(tt.fixture)
require.NoError(t, err)
got, err := parsePyvenvCfgReader(file.NewLocationReadCloser(location, reader))
tt.wantErr(t, err)
assert.Equal(t, tt.want, got)
})
}
}

View File

@ -22,6 +22,9 @@ type PythonPackage struct {
SitePackagesRootPath string `json:"sitePackagesRootPath"`
TopLevelPackages []string `json:"topLevelPackages,omitempty"`
DirectURLOrigin *PythonDirectURLOriginInfo `json:"directUrlOrigin,omitempty"`
RequiresPython string `json:"requiresPython,omitempty" mapstruct:"RequiresPython"`
RequiresDist []string `json:"requiresDist,omitempty" mapstruct:"RequiresDist"`
ProvidesExtra []string `json:"providesExtra,omitempty" mapstruct:"ProvidesExtra"`
}
// PythonFileDigest represents the file metadata for a single file attributed to a python package.