mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 16:33:21 +01:00
Merge pull request #290 from anchore/improve-python-cataloger
Improve performance of the python cataloger
This commit is contained in:
commit
d1d7471f2f
2
.gitignore
vendored
2
.gitignore
vendored
@ -33,3 +33,5 @@ coverage.txt
|
||||
|
||||
# macOS Finder metadata
|
||||
.DS_STORE
|
||||
|
||||
*.profile
|
||||
22
cmd/root.go
22
cmd/root.go
@ -7,11 +7,7 @@ import (
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/anchore/syft/syft/distro"
|
||||
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
|
||||
"github.com/anchore/syft/syft/source"
|
||||
"github.com/pkg/profile"
|
||||
|
||||
"github.com/anchore/syft/internal"
|
||||
"github.com/anchore/syft/internal/anchore"
|
||||
@ -20,8 +16,11 @@ import (
|
||||
"github.com/anchore/syft/internal/ui"
|
||||
"github.com/anchore/syft/internal/version"
|
||||
"github.com/anchore/syft/syft"
|
||||
"github.com/anchore/syft/syft/distro"
|
||||
"github.com/anchore/syft/syft/event"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/presenter"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
"github.com/docker/docker/api/types"
|
||||
"github.com/docker/docker/api/types/filters"
|
||||
"github.com/docker/docker/client"
|
||||
@ -56,7 +55,20 @@ You can also explicitly specify the scheme to use:
|
||||
}
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if appConfig.Dev.ProfileCPU && appConfig.Dev.ProfileMem {
|
||||
log.Errorf("cannot profile CPU and memory simultaneously")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if appConfig.Dev.ProfileCPU {
|
||||
defer profile.Start(profile.CPUProfile).Stop()
|
||||
} else if appConfig.Dev.ProfileMem {
|
||||
defer profile.Start(profile.MemProfile).Stop()
|
||||
}
|
||||
|
||||
err := doRunCmd(cmd, args)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf(err.Error())
|
||||
os.Exit(1)
|
||||
|
||||
1
go.mod
1
go.mod
@ -24,6 +24,7 @@ require (
|
||||
github.com/olekukonko/tablewriter v0.0.4
|
||||
github.com/package-url/packageurl-go v0.1.0
|
||||
github.com/pelletier/go-toml v1.8.0
|
||||
github.com/pkg/profile v1.5.0
|
||||
github.com/scylladb/go-set v1.0.2
|
||||
github.com/sergi/go-diff v1.1.0
|
||||
github.com/sirupsen/logrus v1.6.0
|
||||
|
||||
13
go.sum
13
go.sum
@ -126,7 +126,6 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy
|
||||
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=
|
||||
github.com/anchore/client-go v0.0.0-20201120223920-9f812673f4d6/go.mod h1:FaODhIA06mxO1E6R32JE0TL1JWZZkmjRIAd4ULvHUKk=
|
||||
github.com/anchore/client-go v0.0.0-20201210022459-59e7a0749c74 h1:9kkKTIyXJC+/syUcY6KWxFoJZJ+GWwrIscF+gBY067k=
|
||||
github.com/anchore/client-go v0.0.0-20201210022459-59e7a0749c74/go.mod h1:FaODhIA06mxO1E6R32JE0TL1JWZZkmjRIAd4ULvHUKk=
|
||||
github.com/anchore/go-rpmdb v0.0.0-20201106153645-0043963c2e12 h1:xbeIbn5F52JVx3RUIajxCj8b0y+9lywspql4sFhcxWQ=
|
||||
@ -135,14 +134,6 @@ github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04 h1:VzprUTpc0v
|
||||
github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04/go.mod h1:6dK64g27Qi1qGQZ67gFmBFvEHScy0/C8qhQhNe5B5pQ=
|
||||
github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b h1:e1bmaoJfZVsCYMrIZBpFxwV26CbsuoEh5muXD5I1Ods=
|
||||
github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b/go.mod h1:Bkc+JYWjMCF8OyZ340IMSIi2Ebf3uwByOk6ho4wne1E=
|
||||
github.com/anchore/stereoscope v0.0.0-20201106140100-12e75c48f409 h1:xKSpDRjmYrEFrdMeDh4AuSUAFc99pdro6YFBKxy2um0=
|
||||
github.com/anchore/stereoscope v0.0.0-20201106140100-12e75c48f409/go.mod h1:2Jja/4l0zYggW52og+nn0rut4i+OYjCf9vTyrM8RT4E=
|
||||
github.com/anchore/stereoscope v0.0.0-20201130153727-b3f1fad856b0 h1:wa0hdnvBeCpI+rmzDbPG7k5SKlsGkot7aZ8Az1i/vws=
|
||||
github.com/anchore/stereoscope v0.0.0-20201130153727-b3f1fad856b0/go.mod h1:2Jja/4l0zYggW52og+nn0rut4i+OYjCf9vTyrM8RT4E=
|
||||
github.com/anchore/stereoscope v0.0.0-20201203153145-3f9a05a624d7 h1:G3LnRqHL/IIeQZTAMtDOJNYfSYsXLNCZX4DCiS0R0FY=
|
||||
github.com/anchore/stereoscope v0.0.0-20201203153145-3f9a05a624d7/go.mod h1:2Jja/4l0zYggW52og+nn0rut4i+OYjCf9vTyrM8RT4E=
|
||||
github.com/anchore/stereoscope v0.0.0-20201203222654-09e79bf5fef4 h1:XDuCqOWKyQQlKhd9kEDnyKbvSCwShKBDCsyBmD/ALYs=
|
||||
github.com/anchore/stereoscope v0.0.0-20201203222654-09e79bf5fef4/go.mod h1:/dHAFjYflH/1tzhdHAcnMCjprMch+YzHJKi59m/1KCM=
|
||||
github.com/anchore/stereoscope v0.0.0-20201210022249-091f9bddb42e h1:vHUqHTvH9/oxdDDh1fxS9Ls9gWGytKO7XbbzcQ9MBwI=
|
||||
github.com/anchore/stereoscope v0.0.0-20201210022249-091f9bddb42e/go.mod h1:/dHAFjYflH/1tzhdHAcnMCjprMch+YzHJKi59m/1KCM=
|
||||
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8=
|
||||
@ -274,6 +265,7 @@ github.com/facebookincubator/nvdtools v0.1.4/go.mod h1:0/FIVnSEl9YHXLq3tKBPpKaI0
|
||||
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
|
||||
github.com/fatih/color v1.9.0 h1:8xPHl4/q1VyqGIPif1F+1V3Y3lSmrq01EabUW3CoW5s=
|
||||
github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
|
||||
github.com/fatih/set v0.2.1 h1:nn2CaJyknWE/6txyUDGwysr3G5QC6xWB/PtVjPBbeaA=
|
||||
github.com/fatih/set v0.2.1/go.mod h1:+RKtMCH+favT2+3YecHGxcc0b4KyVWA1QWWJUs4E0CI=
|
||||
github.com/fortytw2/leaktest v1.2.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
|
||||
github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
|
||||
@ -700,6 +692,8 @@ github.com/pkg/errors v0.8.1-0.20171018195549-f15c970de5b7/go.mod h1:bwawxfHBFNV
|
||||
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pkg/profile v1.5.0 h1:042Buzk+NhDI+DeSAA62RwJL8VAuZUMQZUjCsRz1Mug=
|
||||
github.com/pkg/profile v1.5.0/go.mod h1:qBsxPvzyUincmltOk6iyRVxHYg4adc0OFOv72ZdLa18=
|
||||
github.com/pmezard/go-difflib v0.0.0-20151028094244-d8ed2627bdf0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
@ -824,6 +818,7 @@ github.com/tetafro/godot v0.4.2/go.mod h1:/7NLHhv08H1+8DNj0MElpAACw1ajsCuf3TKNQx
|
||||
github.com/timakin/bodyclose v0.0.0-20190930140734-f7f2e9bca95e/go.mod h1:Qimiffbc6q9tBWlVV6x0P9sat/ao1xEkREYPPj9hphk=
|
||||
github.com/timakin/bodyclose v0.0.0-20200424151742-cb6215831a94 h1:ig99OeTyDwQWhPe2iw9lwfQVF1KB3Q4fpP3X7/2VBG8=
|
||||
github.com/timakin/bodyclose v0.0.0-20200424151742-cb6215831a94/go.mod h1:Qimiffbc6q9tBWlVV6x0P9sat/ao1xEkREYPPj9hphk=
|
||||
github.com/tj/assert v0.0.0-20171129193455-018094318fb0 h1:Rw8kxzWo1mr6FSaYXjQELRe88y2KdfynXdnK72rdjtA=
|
||||
github.com/tj/assert v0.0.0-20171129193455-018094318fb0/go.mod h1:mZ9/Rh9oLWpLLDRpvE+3b7gP/C2YyLFYxNmcLnPTMe0=
|
||||
github.com/tj/go-elastic v0.0.0-20171221160941-36157cbbebc2/go.mod h1:WjeM0Oo1eNAjXGDx2yma7uG2XoyRZTq1uv3M/o7imD0=
|
||||
github.com/tj/go-kinesis v0.0.0-20171128231115-08b17f58cb1b/go.mod h1:/yhzCV0xPfx6jb1bBgRFjl5lytqVqZXEaeqWP8lTEao=
|
||||
|
||||
@ -17,14 +17,15 @@ import (
|
||||
|
||||
// Application is the main syft application configuration.
|
||||
type Application struct {
|
||||
ConfigPath string `yaml:",omitempty"` // the location where the application config was read from (either from -c or discovered while loading)
|
||||
PresenterOpt presenter.Option `yaml:"-"` // -o, the native Presenter.Option to use for report formatting
|
||||
Output string `yaml:"output" mapstructure:"output"` // -o, the Presenter hint string to use for report formatting
|
||||
ScopeOpt source.Scope `yaml:"-"` // -s, the native source.Scope option to use for how to catalog the container image
|
||||
Scope string `yaml:"scope" mapstructure:"scope"` // -s, the source.Scope string hint for how to catalog the container image
|
||||
Quiet bool `yaml:"quiet" mapstructure:"quiet"` // -q, indicates to not show any status output to stderr (ETUI or logging UI)
|
||||
Log logging `yaml:"log" mapstructure:"log"` // all logging-related options
|
||||
CliOptions CliOnlyOptions `yaml:"-"` // all options only available through the CLI (not via env vars or config)
|
||||
ConfigPath string `yaml:",omitempty"` // the location where the application config was read from (either from -c or discovered while loading)
|
||||
PresenterOpt presenter.Option `yaml:"-"` // -o, the native Presenter.Option to use for report formatting
|
||||
Output string `yaml:"output" mapstructure:"output"` // -o, the Presenter hint string to use for report formatting
|
||||
ScopeOpt source.Scope `yaml:"-"` // -s, the native source.Scope option to use for how to catalog the container image
|
||||
Scope string `yaml:"scope" mapstructure:"scope"` // -s, the source.Scope string hint for how to catalog the container image
|
||||
Quiet bool `yaml:"quiet" mapstructure:"quiet"` // -q, indicates to not show any status output to stderr (ETUI or logging UI)
|
||||
Log logging `yaml:"log" mapstructure:"log"` // all logging-related options
|
||||
CliOptions CliOnlyOptions `yaml:"-"` // all options only available through the CLI (not via env vars or config)
|
||||
Dev Development `mapstructure:"dev"`
|
||||
CheckForAppUpdate bool `yaml:"check-for-app-update" mapstructure:"check-for-app-update"` // whether to check for an application update on start up or not
|
||||
Anchore anchore `yaml:"anchore" mapstructure:"anchore"` // options for interacting with Anchore Engine/Enterprise
|
||||
}
|
||||
@ -53,6 +54,11 @@ type anchore struct {
|
||||
Dockerfile string `yaml:"dockerfile" mapstructure:"dockerfile"` // -d , dockerfile to attach for upload
|
||||
}
|
||||
|
||||
type Development struct {
|
||||
ProfileCPU bool `mapstructure:"profile-cpu"`
|
||||
ProfileMem bool `mapstructure:"profile-mem"`
|
||||
}
|
||||
|
||||
// LoadApplicationConfig populates the given viper object with application configuration discovered on disk
|
||||
func LoadApplicationConfig(v *viper.Viper, cliOpts CliOnlyOptions, wasHostnameSet bool) (*Application, error) {
|
||||
// the user may not have a config, and this is OK, we can use the default config + default cobra cli values instead
|
||||
@ -216,4 +222,6 @@ func setNonCliDefaultValues(v *viper.Viper) {
|
||||
v.SetDefault("log.file", "")
|
||||
v.SetDefault("log.structured", false)
|
||||
v.SetDefault("check-for-app-update", true)
|
||||
v.SetDefault("dev.profile-cpu", false)
|
||||
v.SetDefault("dev.profile-mem", false)
|
||||
}
|
||||
|
||||
@ -3,11 +3,8 @@ package python
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/anchore/syft/internal/log"
|
||||
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
|
||||
"github.com/anchore/syft/syft/source"
|
||||
@ -32,33 +29,63 @@ func (c *PackageCataloger) Name() string {
|
||||
|
||||
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing python egg and wheel installations.
|
||||
func (c *PackageCataloger) Catalog(resolver source.Resolver) ([]pkg.Package, error) {
|
||||
// nolint:prealloc
|
||||
var fileMatches []source.Location
|
||||
|
||||
for _, glob := range []string{eggMetadataGlob, wheelMetadataGlob} {
|
||||
matches, err := resolver.FilesByGlob(glob)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to find files by glob: %s", glob)
|
||||
}
|
||||
fileMatches = append(fileMatches, matches...)
|
||||
entries, err := c.getPackageEntries(resolver)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var pkgs []pkg.Package
|
||||
for _, location := range fileMatches {
|
||||
p, err := c.catalogEggOrWheel(resolver, location)
|
||||
var packages []pkg.Package
|
||||
for _, entry := range entries {
|
||||
p, err := c.catalogEggOrWheel(entry)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to catalog python package=%+v: %w", location.Path, err)
|
||||
return nil, fmt.Errorf("unable to catalog python package=%+v: %w", entry.Metadata.Location.Path, err)
|
||||
}
|
||||
if p != nil {
|
||||
pkgs = append(pkgs, *p)
|
||||
packages = append(packages, *p)
|
||||
}
|
||||
}
|
||||
return pkgs, nil
|
||||
|
||||
return packages, nil
|
||||
}
|
||||
|
||||
// getPackageEntries fetches the contents for all python packages within the given resolver.
|
||||
func (c *PackageCataloger) getPackageEntries(resolver source.Resolver) ([]*packageEntry, error) {
|
||||
var metadataLocations []source.Location
|
||||
|
||||
// find all primary record paths
|
||||
matches, err := resolver.FilesByGlob(eggMetadataGlob, wheelMetadataGlob)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to find files by glob: %w", err)
|
||||
}
|
||||
metadataLocations = append(metadataLocations, matches...)
|
||||
|
||||
// for every primary record path, craft all secondary record paths and build a request object to gather all file contents for each record
|
||||
requester := source.NewContentRequester()
|
||||
entries := make([]*packageEntry, len(metadataLocations))
|
||||
for i, metadataLocation := range metadataLocations {
|
||||
// build the entry to process (holding only path information)
|
||||
entry := newPackageEntry(resolver, metadataLocation)
|
||||
|
||||
// populate the data onto the requester object
|
||||
requester.Add(&entry.Metadata)
|
||||
if entry.FileRecord != nil {
|
||||
requester.Add(entry.FileRecord)
|
||||
}
|
||||
if entry.TopPackage != nil {
|
||||
requester.Add(entry.TopPackage)
|
||||
}
|
||||
|
||||
// keep track of the entry for later package processing
|
||||
entries[i] = entry
|
||||
}
|
||||
|
||||
// return the set of entries and execute the request for fetching contents
|
||||
return entries, requester.Execute(resolver)
|
||||
}
|
||||
|
||||
// catalogEggOrWheel takes the primary metadata file reference and returns the python package it represents.
|
||||
func (c *PackageCataloger) catalogEggOrWheel(resolver source.Resolver, metadataLocation source.Location) (*pkg.Package, error) {
|
||||
metadata, sources, err := c.assembleEggOrWheelMetadata(resolver, metadataLocation)
|
||||
func (c *PackageCataloger) catalogEggOrWheel(entry *packageEntry) (*pkg.Package, error) {
|
||||
metadata, sources, err := c.assembleEggOrWheelMetadata(entry)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -81,26 +108,45 @@ func (c *PackageCataloger) catalogEggOrWheel(resolver source.Resolver, metadataL
|
||||
}, nil
|
||||
}
|
||||
|
||||
// fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained.
|
||||
func (c *PackageCataloger) fetchRecordFiles(resolver source.Resolver, metadataLocation source.Location) (files []pkg.PythonFileRecord, sources []source.Location, err error) {
|
||||
// assembleEggOrWheelMetadata discovers and accumulates python package metadata from multiple file sources and returns a single metadata object as well as a list of files where the metadata was derived from.
|
||||
func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *packageEntry) (*pkg.PythonPackageMetadata, []source.Location, error) {
|
||||
var sources = []source.Location{entry.Metadata.Location}
|
||||
|
||||
metadata, err := parseWheelOrEggMetadata(entry.Metadata.Location.Path, strings.NewReader(entry.Metadata.Contents))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// attach any python files found for the given wheel/egg installation
|
||||
r, s, err := c.processRecordFiles(entry.FileRecord)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
sources = append(sources, s...)
|
||||
metadata.Files = r
|
||||
|
||||
// attach any top-level package names found for the given wheel/egg installation
|
||||
p, s, err := c.processTopLevelPackages(entry.TopPackage)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
sources = append(sources, s...)
|
||||
metadata.TopLevelPackages = p
|
||||
|
||||
return &metadata, sources, nil
|
||||
}
|
||||
|
||||
// processRecordFiles takes a corresponding RECORD file for the given python package metadata file and returns the set of file records contained.
|
||||
func (c *PackageCataloger) processRecordFiles(entry *source.FileData) (files []pkg.PythonFileRecord, sources []source.Location, err error) {
|
||||
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
|
||||
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
|
||||
// to reconcile the RECORD path to the same layer (or the next adjacent lower layer).
|
||||
|
||||
// lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure)
|
||||
recordPath := filepath.Join(filepath.Dir(metadataLocation.Path), "RECORD")
|
||||
recordRef := resolver.RelativeFileByPath(metadataLocation, recordPath)
|
||||
|
||||
if recordRef != nil {
|
||||
sources = append(sources, *recordRef)
|
||||
|
||||
recordContents, err := resolver.FileContentsByLocation(*recordRef)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if entry != nil {
|
||||
sources = append(sources, entry.Location)
|
||||
|
||||
// parse the record contents
|
||||
records, err := parseWheelOrEggRecord(strings.NewReader(recordContents))
|
||||
records, err := parseWheelOrEggRecord(strings.NewReader(entry.Contents))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
@ -110,26 +156,15 @@ func (c *PackageCataloger) fetchRecordFiles(resolver source.Resolver, metadataLo
|
||||
return files, sources, nil
|
||||
}
|
||||
|
||||
// fetchTopLevelPackages finds a corresponding top_level.txt file for the given python package metadata file and returns the set of package names contained.
|
||||
func (c *PackageCataloger) fetchTopLevelPackages(resolver source.Resolver, metadataLocation source.Location) (pkgs []string, sources []source.Location, err error) {
|
||||
// a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages
|
||||
parentDir := filepath.Dir(metadataLocation.Path)
|
||||
topLevelPath := filepath.Join(parentDir, "top_level.txt")
|
||||
topLevelRef := resolver.RelativeFileByPath(metadataLocation, topLevelPath)
|
||||
|
||||
if topLevelRef == nil {
|
||||
log.Warnf("missing python package top_level.txt (package=%q)", metadataLocation.Path)
|
||||
// processTopLevelPackages takes a corresponding top_level.txt file for the given python package metadata file and returns the set of package names contained.
|
||||
func (c *PackageCataloger) processTopLevelPackages(entry *source.FileData) (pkgs []string, sources []source.Location, err error) {
|
||||
if entry == nil {
|
||||
return nil, nil, nil
|
||||
}
|
||||
|
||||
sources = append(sources, *topLevelRef)
|
||||
sources = append(sources, entry.Location)
|
||||
|
||||
topLevelContents, err := resolver.FileContentsByLocation(*topLevelRef)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(strings.NewReader(topLevelContents))
|
||||
scanner := bufio.NewScanner(strings.NewReader(entry.Contents))
|
||||
for scanner.Scan() {
|
||||
pkgs = append(pkgs, scanner.Text())
|
||||
}
|
||||
@ -140,36 +175,3 @@ func (c *PackageCataloger) fetchTopLevelPackages(resolver source.Resolver, metad
|
||||
|
||||
return pkgs, sources, nil
|
||||
}
|
||||
|
||||
// assembleEggOrWheelMetadata discovers and accumulates python package metadata from multiple file sources and returns a single metadata object as well as a list of files where the metadata was derived from.
|
||||
func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver source.Resolver, metadataLocation source.Location) (*pkg.PythonPackageMetadata, []source.Location, error) {
|
||||
var sources = []source.Location{metadataLocation}
|
||||
|
||||
metadataContents, err := resolver.FileContentsByLocation(metadataLocation)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
metadata, err := parseWheelOrEggMetadata(metadataLocation.Path, strings.NewReader(metadataContents))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// attach any python files found for the given wheel/egg installation
|
||||
r, s, err := c.fetchRecordFiles(resolver, metadataLocation)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
sources = append(sources, s...)
|
||||
metadata.Files = r
|
||||
|
||||
// attach any top-level package names found for the given wheel/egg installation
|
||||
p, s, err := c.fetchTopLevelPackages(resolver, metadataLocation)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
sources = append(sources, s...)
|
||||
metadata.TopLevelPackages = p
|
||||
|
||||
return &metadata, sources, nil
|
||||
}
|
||||
|
||||
@ -8,12 +8,15 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/anchore/syft/internal/file"
|
||||
|
||||
"github.com/anchore/syft/syft/source"
|
||||
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/go-test/deep"
|
||||
)
|
||||
|
||||
// TODO: make this generic (based on maps of source.FileData) and make a generic mock to move to the source pkg
|
||||
type pythonTestResolverMock struct {
|
||||
metadataReader io.Reader
|
||||
recordReader io.Reader
|
||||
@ -68,21 +71,21 @@ func newTestResolver(metaPath, recordPath, topPath string) *pythonTestResolverMo
|
||||
}
|
||||
}
|
||||
|
||||
func (r *pythonTestResolverMock) FileContentsByLocation(ref source.Location) (string, error) {
|
||||
func (r *pythonTestResolverMock) FileContentsByLocation(location source.Location) (string, error) {
|
||||
switch {
|
||||
case r.topLevelRef != nil && ref.Path == r.topLevelRef.Path:
|
||||
case r.topLevelRef != nil && location.Path == r.topLevelRef.Path:
|
||||
b, err := ioutil.ReadAll(r.topLevelReader)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(b), nil
|
||||
case ref.Path == r.metadataRef.Path:
|
||||
case location.Path == r.metadataRef.Path:
|
||||
b, err := ioutil.ReadAll(r.metadataReader)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(b), nil
|
||||
case ref.Path == r.recordRef.Path:
|
||||
case location.Path == r.recordRef.Path:
|
||||
b, err := ioutil.ReadAll(r.recordReader)
|
||||
if err != nil {
|
||||
return "", err
|
||||
@ -92,16 +95,36 @@ func (r *pythonTestResolverMock) FileContentsByLocation(ref source.Location) (st
|
||||
return "", fmt.Errorf("invalid value given")
|
||||
}
|
||||
|
||||
func (r *pythonTestResolverMock) MultipleFileContentsByLocation(_ []source.Location) (map[source.Location]string, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
func (r *pythonTestResolverMock) MultipleFileContentsByLocation(locations []source.Location) (map[source.Location]string, error) {
|
||||
var results = make(map[source.Location]string)
|
||||
var err error
|
||||
for _, l := range locations {
|
||||
results[l], err = r.FileContentsByLocation(l)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (r *pythonTestResolverMock) FilesByPath(_ ...string) ([]source.Location, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (r *pythonTestResolverMock) FilesByGlob(_ ...string) ([]source.Location, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
func (r *pythonTestResolverMock) FilesByGlob(patterns ...string) ([]source.Location, error) {
|
||||
var results []source.Location
|
||||
for _, pattern := range patterns {
|
||||
for _, l := range []*source.Location{r.topLevelRef, r.metadataRef, r.recordRef} {
|
||||
if l == nil {
|
||||
continue
|
||||
}
|
||||
if file.GlobMatch(pattern, l.Path) {
|
||||
results = append(results, *l)
|
||||
}
|
||||
}
|
||||
}
|
||||
return results, nil
|
||||
}
|
||||
func (r *pythonTestResolverMock) RelativeFileByPath(_ source.Location, path string) *source.Location {
|
||||
switch {
|
||||
@ -224,14 +247,16 @@ func TestPythonPackageWheelCataloger(t *testing.T) {
|
||||
}
|
||||
// end patching expected values with runtime data...
|
||||
|
||||
pyPkgCataloger := NewPythonPackageCataloger()
|
||||
|
||||
actual, err := pyPkgCataloger.catalogEggOrWheel(resolver, *resolver.metadataRef)
|
||||
actual, err := NewPythonPackageCataloger().Catalog(resolver)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to catalog python package: %+v", err)
|
||||
}
|
||||
|
||||
for _, d := range deep.Equal(actual, &test.ExpectedPackage) {
|
||||
if len(actual) != 1 {
|
||||
t.Fatalf("unexpected length: %d", len(actual))
|
||||
}
|
||||
|
||||
for _, d := range deep.Equal(actual[0], test.ExpectedPackage) {
|
||||
t.Errorf("diff: %+v", d)
|
||||
}
|
||||
})
|
||||
|
||||
49
syft/cataloger/python/package_entry.go
Normal file
49
syft/cataloger/python/package_entry.go
Normal file
@ -0,0 +1,49 @@
|
||||
package python
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
type packageEntry struct {
|
||||
Metadata source.FileData
|
||||
FileRecord *source.FileData
|
||||
TopPackage *source.FileData
|
||||
}
|
||||
|
||||
// newPackageEntry returns a new packageEntry to be processed relative to what information is available in the given FileResolver.
|
||||
func newPackageEntry(resolver source.FileResolver, metadataLocation source.Location) *packageEntry {
|
||||
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
|
||||
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
|
||||
// to reconcile the RECORD path to the same layer (or a lower layer). The same is true with the top_level.txt file.
|
||||
|
||||
// lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure)
|
||||
recordPath := filepath.Join(filepath.Dir(metadataLocation.Path), "RECORD")
|
||||
recordLocation := resolver.RelativeFileByPath(metadataLocation, recordPath)
|
||||
|
||||
// a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages
|
||||
parentDir := filepath.Dir(metadataLocation.Path)
|
||||
topLevelPath := filepath.Join(parentDir, "top_level.txt")
|
||||
topLevelLocation := resolver.RelativeFileByPath(metadataLocation, topLevelPath)
|
||||
|
||||
// build an entry that will later be populated with contents when the request is executed
|
||||
entry := &packageEntry{
|
||||
Metadata: source.FileData{
|
||||
Location: metadataLocation,
|
||||
},
|
||||
}
|
||||
|
||||
if recordLocation != nil {
|
||||
entry.FileRecord = &source.FileData{
|
||||
Location: *recordLocation,
|
||||
}
|
||||
}
|
||||
|
||||
if topLevelLocation != nil {
|
||||
entry.TopPackage = &source.FileData{
|
||||
Location: *topLevelLocation,
|
||||
}
|
||||
}
|
||||
return entry
|
||||
}
|
||||
56
syft/source/content_requester.go
Normal file
56
syft/source/content_requester.go
Normal file
@ -0,0 +1,56 @@
|
||||
package source
|
||||
|
||||
import "sync"
|
||||
|
||||
// ContentRequester is an object tailored for taking source.Location objects which file contents will be resolved
|
||||
// upon invoking Execute().
|
||||
type ContentRequester struct {
|
||||
request map[Location][]*FileData
|
||||
lock sync.Mutex
|
||||
}
|
||||
|
||||
// NewContentRequester creates a new ContentRequester object with the given initial request data.
|
||||
func NewContentRequester(data ...*FileData) *ContentRequester {
|
||||
requester := &ContentRequester{
|
||||
request: make(map[Location][]*FileData),
|
||||
}
|
||||
for _, d := range data {
|
||||
requester.Add(d)
|
||||
}
|
||||
return requester
|
||||
}
|
||||
|
||||
// Add appends a new single FileData containing a source.Location to later have the contents fetched and stored within
|
||||
// the given FileData object.
|
||||
func (r *ContentRequester) Add(data *FileData) {
|
||||
r.lock.Lock()
|
||||
defer r.lock.Unlock()
|
||||
|
||||
r.request[data.Location] = append(r.request[data.Location], data)
|
||||
}
|
||||
|
||||
// Execute takes the previously provided source.Location's and resolves the file contents, storing the results within
|
||||
// the previously provided FileData objects.
|
||||
func (r *ContentRequester) Execute(resolver ContentResolver) error {
|
||||
r.lock.Lock()
|
||||
defer r.lock.Unlock()
|
||||
|
||||
var locations = make([]Location, len(r.request))
|
||||
idx := 0
|
||||
for l := range r.request {
|
||||
locations[idx] = l
|
||||
idx++
|
||||
}
|
||||
|
||||
response, err := resolver.MultipleFileContentsByLocation(locations)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for l, contents := range response {
|
||||
for i := range r.request[l] {
|
||||
r.request[l][i].Contents = contents
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
69
syft/source/content_requester_test.go
Normal file
69
syft/source/content_requester_test.go
Normal file
@ -0,0 +1,69 @@
|
||||
package source
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/anchore/stereoscope/pkg/imagetest"
|
||||
"github.com/sergi/go-diff/diffmatchpatch"
|
||||
)
|
||||
|
||||
func TestContentRequester(t *testing.T) {
|
||||
tests := []struct {
|
||||
fixture string
|
||||
expectedContents map[string]string
|
||||
}{
|
||||
{
|
||||
fixture: "image-simple",
|
||||
expectedContents: map[string]string{
|
||||
"/somefile-1.txt": "this file has contents",
|
||||
"/somefile-2.txt": "file-2 contents!",
|
||||
"/really/nested/file-3.txt": "another file!\nwith lines...",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.fixture, func(t *testing.T) {
|
||||
img, cleanup := imagetest.GetFixtureImage(t, "docker-archive", "image-simple")
|
||||
defer cleanup()
|
||||
|
||||
resolver, err := NewAllLayersResolver(img)
|
||||
if err != nil {
|
||||
t.Fatalf("could not create resolver: %+v", err)
|
||||
}
|
||||
|
||||
var data []*FileData
|
||||
for path := range test.expectedContents {
|
||||
|
||||
locations, err := resolver.FilesByPath(path)
|
||||
if err != nil {
|
||||
t.Fatalf("could not build request: %+v", err)
|
||||
}
|
||||
if len(locations) != 1 {
|
||||
t.Fatalf("bad resolver paths: %+v", locations)
|
||||
}
|
||||
|
||||
data = append(data, &FileData{
|
||||
Location: locations[0],
|
||||
})
|
||||
}
|
||||
|
||||
if err := NewContentRequester(data...).Execute(resolver); err != nil {
|
||||
t.Fatalf("could not execute request: %+v", err)
|
||||
}
|
||||
|
||||
for _, entry := range data {
|
||||
if expected, ok := test.expectedContents[entry.Location.Path]; ok {
|
||||
for expected != entry.Contents {
|
||||
t.Errorf("mismatched contents for %q", entry.Location.Path)
|
||||
dmp := diffmatchpatch.New()
|
||||
diffs := dmp.DiffMain(expected, entry.Contents, true)
|
||||
t.Errorf("diff: %s", dmp.DiffPrettyText(diffs))
|
||||
}
|
||||
continue
|
||||
}
|
||||
t.Errorf("could not find %q", entry.Location.Path)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -180,7 +180,7 @@ func TestDirectoryResolver_FilesByGlobMultiple(t *testing.T) {
|
||||
|
||||
func TestDirectoryResolver_FilesByGlobRecursive(t *testing.T) {
|
||||
t.Run("finds multiple matching files", func(t *testing.T) {
|
||||
resolver := DirectoryResolver{"test-fixtures"}
|
||||
resolver := DirectoryResolver{"test-fixtures/image-symlinks"}
|
||||
refs, err := resolver.FilesByGlob("**/*.txt")
|
||||
|
||||
if err != nil {
|
||||
|
||||
6
syft/source/file_data.go
Normal file
6
syft/source/file_data.go
Normal file
@ -0,0 +1,6 @@
|
||||
package source
|
||||
|
||||
type FileData struct {
|
||||
Location Location
|
||||
Contents string
|
||||
}
|
||||
6
syft/source/test-fixtures/image-simple/Dockerfile
Normal file
6
syft/source/test-fixtures/image-simple/Dockerfile
Normal file
@ -0,0 +1,6 @@
|
||||
# Note: changes to this file will result in updating several test values. Consider making a new image fixture instead of editing this one.
|
||||
FROM scratch
|
||||
ADD file-1.txt /somefile-1.txt
|
||||
ADD file-2.txt /somefile-2.txt
|
||||
# note: adding a directory will behave differently on docker engine v18 vs v19
|
||||
ADD target /
|
||||
1
syft/source/test-fixtures/image-simple/file-1.txt
Normal file
1
syft/source/test-fixtures/image-simple/file-1.txt
Normal file
@ -0,0 +1 @@
|
||||
this file has contents
|
||||
1
syft/source/test-fixtures/image-simple/file-2.txt
Normal file
1
syft/source/test-fixtures/image-simple/file-2.txt
Normal file
@ -0,0 +1 @@
|
||||
file-2 contents!
|
||||
@ -0,0 +1,2 @@
|
||||
another file!
|
||||
with lines...
|
||||
Loading…
x
Reference in New Issue
Block a user