From be5917a058a904099a2457aaf91a4c0dbbd34a8c Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 11 Dec 2020 13:54:42 -0500 Subject: [PATCH 1/5] add profiler dev option Signed-off-by: Alex Goodman --- cmd/root.go | 28 ++++++++++++++++++++++------ internal/config/config.go | 22 ++++++++++++++-------- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index 12c169d95..3652bd0cd 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -5,14 +5,9 @@ import ( "fmt" "io/ioutil" "os" + "runtime/pprof" "strings" - "github.com/anchore/syft/syft/distro" - - "github.com/anchore/syft/syft/pkg" - - "github.com/anchore/syft/syft/source" - "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/anchore" "github.com/anchore/syft/internal/bus" @@ -20,8 +15,11 @@ import ( "github.com/anchore/syft/internal/ui" "github.com/anchore/syft/internal/version" "github.com/anchore/syft/syft" + "github.com/anchore/syft/syft/distro" "github.com/anchore/syft/syft/event" + "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/presenter" + "github.com/anchore/syft/syft/source" "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/filters" "github.com/docker/docker/client" @@ -56,7 +54,25 @@ You can also explicitly specify the scheme to use: } os.Exit(1) } + + if appConfig.Dev.ProfileCPU { + f, err := os.Create("cpu.profile") + if err != nil { + log.Errorf("unable to create CPU profile: %+v", err) + } else { + err := pprof.StartCPUProfile(f) + if err != nil { + log.Errorf("unable to start CPU profile: %+v", err) + } + } + } + err := doRunCmd(cmd, args) + + if appConfig.Dev.ProfileCPU { + pprof.StopCPUProfile() + } + if err != nil { log.Errorf(err.Error()) os.Exit(1) diff --git a/internal/config/config.go b/internal/config/config.go index 991cde1a8..aba34db0a 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -17,14 +17,15 @@ import ( // Application is the main syft application configuration. type Application struct { - ConfigPath string `yaml:",omitempty"` // the location where the application config was read from (either from -c or discovered while loading) - PresenterOpt presenter.Option `yaml:"-"` // -o, the native Presenter.Option to use for report formatting - Output string `yaml:"output" mapstructure:"output"` // -o, the Presenter hint string to use for report formatting - ScopeOpt source.Scope `yaml:"-"` // -s, the native source.Scope option to use for how to catalog the container image - Scope string `yaml:"scope" mapstructure:"scope"` // -s, the source.Scope string hint for how to catalog the container image - Quiet bool `yaml:"quiet" mapstructure:"quiet"` // -q, indicates to not show any status output to stderr (ETUI or logging UI) - Log logging `yaml:"log" mapstructure:"log"` // all logging-related options - CliOptions CliOnlyOptions `yaml:"-"` // all options only available through the CLI (not via env vars or config) + ConfigPath string `yaml:",omitempty"` // the location where the application config was read from (either from -c or discovered while loading) + PresenterOpt presenter.Option `yaml:"-"` // -o, the native Presenter.Option to use for report formatting + Output string `yaml:"output" mapstructure:"output"` // -o, the Presenter hint string to use for report formatting + ScopeOpt source.Scope `yaml:"-"` // -s, the native source.Scope option to use for how to catalog the container image + Scope string `yaml:"scope" mapstructure:"scope"` // -s, the source.Scope string hint for how to catalog the container image + Quiet bool `yaml:"quiet" mapstructure:"quiet"` // -q, indicates to not show any status output to stderr (ETUI or logging UI) + Log logging `yaml:"log" mapstructure:"log"` // all logging-related options + CliOptions CliOnlyOptions `yaml:"-"` // all options only available through the CLI (not via env vars or config) + Dev Development `mapstructure:"dev"` CheckForAppUpdate bool `yaml:"check-for-app-update" mapstructure:"check-for-app-update"` // whether to check for an application update on start up or not Anchore anchore `yaml:"anchore" mapstructure:"anchore"` // options for interacting with Anchore Engine/Enterprise } @@ -53,6 +54,10 @@ type anchore struct { Dockerfile string `yaml:"dockerfile" mapstructure:"dockerfile"` // -d , dockerfile to attach for upload } +type Development struct { + ProfileCPU bool `mapstructure:"profile-cpu"` +} + // LoadApplicationConfig populates the given viper object with application configuration discovered on disk func LoadApplicationConfig(v *viper.Viper, cliOpts CliOnlyOptions, wasHostnameSet bool) (*Application, error) { // the user may not have a config, and this is OK, we can use the default config + default cobra cli values instead @@ -216,4 +221,5 @@ func setNonCliDefaultValues(v *viper.Viper) { v.SetDefault("log.file", "") v.SetDefault("log.structured", false) v.SetDefault("check-for-app-update", true) + v.SetDefault("dev.profile-cpu", false) } From 82c8a8e17b406d40aaecbd426c37fb734edd0bd6 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 11 Dec 2020 16:14:00 -0500 Subject: [PATCH 2/5] add mem profile option and refactor python cataloger for batch requests Signed-off-by: Alex Goodman --- .gitignore | 2 + cmd/root.go | 24 ++- go.mod | 1 + go.sum | 13 +- internal/config/config.go | 2 + syft/cataloger/python/package_cataloger.go | 188 +++++++++++++-------- 6 files changed, 141 insertions(+), 89 deletions(-) diff --git a/.gitignore b/.gitignore index 2cab71708..3edd0085e 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,5 @@ coverage.txt # macOS Finder metadata .DS_STORE + +*.profile \ No newline at end of file diff --git a/cmd/root.go b/cmd/root.go index 3652bd0cd..cce4bf0e8 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -5,9 +5,10 @@ import ( "fmt" "io/ioutil" "os" - "runtime/pprof" "strings" + "github.com/pkg/profile" + "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/anchore" "github.com/anchore/syft/internal/bus" @@ -55,24 +56,19 @@ You can also explicitly specify the scheme to use: os.Exit(1) } + if appConfig.Dev.ProfileCPU && appConfig.Dev.ProfileMem { + log.Errorf("cannot profile CPU and memory simultaneously") + os.Exit(1) + } + if appConfig.Dev.ProfileCPU { - f, err := os.Create("cpu.profile") - if err != nil { - log.Errorf("unable to create CPU profile: %+v", err) - } else { - err := pprof.StartCPUProfile(f) - if err != nil { - log.Errorf("unable to start CPU profile: %+v", err) - } - } + defer profile.Start(profile.CPUProfile).Stop() + } else if appConfig.Dev.ProfileMem { + defer profile.Start(profile.MemProfile).Stop() } err := doRunCmd(cmd, args) - if appConfig.Dev.ProfileCPU { - pprof.StopCPUProfile() - } - if err != nil { log.Errorf(err.Error()) os.Exit(1) diff --git a/go.mod b/go.mod index c44e1ef3a..5d1e7e18b 100644 --- a/go.mod +++ b/go.mod @@ -24,6 +24,7 @@ require ( github.com/olekukonko/tablewriter v0.0.4 github.com/package-url/packageurl-go v0.1.0 github.com/pelletier/go-toml v1.8.0 + github.com/pkg/profile v1.5.0 github.com/scylladb/go-set v1.0.2 github.com/sergi/go-diff v1.1.0 github.com/sirupsen/logrus v1.6.0 diff --git a/go.sum b/go.sum index 37fb9f17d..7aeccfbf9 100644 --- a/go.sum +++ b/go.sum @@ -126,7 +126,6 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= -github.com/anchore/client-go v0.0.0-20201120223920-9f812673f4d6/go.mod h1:FaODhIA06mxO1E6R32JE0TL1JWZZkmjRIAd4ULvHUKk= github.com/anchore/client-go v0.0.0-20201210022459-59e7a0749c74 h1:9kkKTIyXJC+/syUcY6KWxFoJZJ+GWwrIscF+gBY067k= github.com/anchore/client-go v0.0.0-20201210022459-59e7a0749c74/go.mod h1:FaODhIA06mxO1E6R32JE0TL1JWZZkmjRIAd4ULvHUKk= github.com/anchore/go-rpmdb v0.0.0-20201106153645-0043963c2e12 h1:xbeIbn5F52JVx3RUIajxCj8b0y+9lywspql4sFhcxWQ= @@ -135,14 +134,6 @@ github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04 h1:VzprUTpc0v github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04/go.mod h1:6dK64g27Qi1qGQZ67gFmBFvEHScy0/C8qhQhNe5B5pQ= github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b h1:e1bmaoJfZVsCYMrIZBpFxwV26CbsuoEh5muXD5I1Ods= github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b/go.mod h1:Bkc+JYWjMCF8OyZ340IMSIi2Ebf3uwByOk6ho4wne1E= -github.com/anchore/stereoscope v0.0.0-20201106140100-12e75c48f409 h1:xKSpDRjmYrEFrdMeDh4AuSUAFc99pdro6YFBKxy2um0= -github.com/anchore/stereoscope v0.0.0-20201106140100-12e75c48f409/go.mod h1:2Jja/4l0zYggW52og+nn0rut4i+OYjCf9vTyrM8RT4E= -github.com/anchore/stereoscope v0.0.0-20201130153727-b3f1fad856b0 h1:wa0hdnvBeCpI+rmzDbPG7k5SKlsGkot7aZ8Az1i/vws= -github.com/anchore/stereoscope v0.0.0-20201130153727-b3f1fad856b0/go.mod h1:2Jja/4l0zYggW52og+nn0rut4i+OYjCf9vTyrM8RT4E= -github.com/anchore/stereoscope v0.0.0-20201203153145-3f9a05a624d7 h1:G3LnRqHL/IIeQZTAMtDOJNYfSYsXLNCZX4DCiS0R0FY= -github.com/anchore/stereoscope v0.0.0-20201203153145-3f9a05a624d7/go.mod h1:2Jja/4l0zYggW52og+nn0rut4i+OYjCf9vTyrM8RT4E= -github.com/anchore/stereoscope v0.0.0-20201203222654-09e79bf5fef4 h1:XDuCqOWKyQQlKhd9kEDnyKbvSCwShKBDCsyBmD/ALYs= -github.com/anchore/stereoscope v0.0.0-20201203222654-09e79bf5fef4/go.mod h1:/dHAFjYflH/1tzhdHAcnMCjprMch+YzHJKi59m/1KCM= github.com/anchore/stereoscope v0.0.0-20201210022249-091f9bddb42e h1:vHUqHTvH9/oxdDDh1fxS9Ls9gWGytKO7XbbzcQ9MBwI= github.com/anchore/stereoscope v0.0.0-20201210022249-091f9bddb42e/go.mod h1:/dHAFjYflH/1tzhdHAcnMCjprMch+YzHJKi59m/1KCM= github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8= @@ -274,6 +265,7 @@ github.com/facebookincubator/nvdtools v0.1.4/go.mod h1:0/FIVnSEl9YHXLq3tKBPpKaI0 github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/fatih/color v1.9.0 h1:8xPHl4/q1VyqGIPif1F+1V3Y3lSmrq01EabUW3CoW5s= github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU= +github.com/fatih/set v0.2.1 h1:nn2CaJyknWE/6txyUDGwysr3G5QC6xWB/PtVjPBbeaA= github.com/fatih/set v0.2.1/go.mod h1:+RKtMCH+favT2+3YecHGxcc0b4KyVWA1QWWJUs4E0CI= github.com/fortytw2/leaktest v1.2.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= @@ -700,6 +692,8 @@ github.com/pkg/errors v0.8.1-0.20171018195549-f15c970de5b7/go.mod h1:bwawxfHBFNV github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/profile v1.5.0 h1:042Buzk+NhDI+DeSAA62RwJL8VAuZUMQZUjCsRz1Mug= +github.com/pkg/profile v1.5.0/go.mod h1:qBsxPvzyUincmltOk6iyRVxHYg4adc0OFOv72ZdLa18= github.com/pmezard/go-difflib v0.0.0-20151028094244-d8ed2627bdf0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -824,6 +818,7 @@ github.com/tetafro/godot v0.4.2/go.mod h1:/7NLHhv08H1+8DNj0MElpAACw1ajsCuf3TKNQx github.com/timakin/bodyclose v0.0.0-20190930140734-f7f2e9bca95e/go.mod h1:Qimiffbc6q9tBWlVV6x0P9sat/ao1xEkREYPPj9hphk= github.com/timakin/bodyclose v0.0.0-20200424151742-cb6215831a94 h1:ig99OeTyDwQWhPe2iw9lwfQVF1KB3Q4fpP3X7/2VBG8= github.com/timakin/bodyclose v0.0.0-20200424151742-cb6215831a94/go.mod h1:Qimiffbc6q9tBWlVV6x0P9sat/ao1xEkREYPPj9hphk= +github.com/tj/assert v0.0.0-20171129193455-018094318fb0 h1:Rw8kxzWo1mr6FSaYXjQELRe88y2KdfynXdnK72rdjtA= github.com/tj/assert v0.0.0-20171129193455-018094318fb0/go.mod h1:mZ9/Rh9oLWpLLDRpvE+3b7gP/C2YyLFYxNmcLnPTMe0= github.com/tj/go-elastic v0.0.0-20171221160941-36157cbbebc2/go.mod h1:WjeM0Oo1eNAjXGDx2yma7uG2XoyRZTq1uv3M/o7imD0= github.com/tj/go-kinesis v0.0.0-20171128231115-08b17f58cb1b/go.mod h1:/yhzCV0xPfx6jb1bBgRFjl5lytqVqZXEaeqWP8lTEao= diff --git a/internal/config/config.go b/internal/config/config.go index aba34db0a..4a22f283d 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -56,6 +56,7 @@ type anchore struct { type Development struct { ProfileCPU bool `mapstructure:"profile-cpu"` + ProfileMem bool `mapstructure:"profile-mem"` } // LoadApplicationConfig populates the given viper object with application configuration discovered on disk @@ -222,4 +223,5 @@ func setNonCliDefaultValues(v *viper.Viper) { v.SetDefault("log.structured", false) v.SetDefault("check-for-app-update", true) v.SetDefault("dev.profile-cpu", false) + v.SetDefault("dev.profile-mem", false) } diff --git a/syft/cataloger/python/package_cataloger.go b/syft/cataloger/python/package_cataloger.go index 0d42a4473..d8ead13e2 100644 --- a/syft/cataloger/python/package_cataloger.go +++ b/syft/cataloger/python/package_cataloger.go @@ -43,22 +43,101 @@ func (c *PackageCataloger) Catalog(resolver source.Resolver) ([]pkg.Package, err fileMatches = append(fileMatches, matches...) } + request, entries := filesOfInterest(resolver, fileMatches) + if err := getContents(resolver, request); err != nil { + return nil, err + } + var pkgs []pkg.Package - for _, location := range fileMatches { - p, err := c.catalogEggOrWheel(resolver, location) + for _, entry := range entries { + p, err := c.catalogEggOrWheel(entry) if err != nil { - return nil, fmt.Errorf("unable to catalog python package=%+v: %w", location.Path, err) + return nil, fmt.Errorf("unable to catalog python package=%+v: %w", entry.Metadata.Location.Path, err) } if p != nil { pkgs = append(pkgs, *p) } } + return pkgs, nil } +type FileData struct { + Location source.Location + Contents string +} + +type pythonEntry struct { + Metadata FileData + FileRecord *FileData + TopPackage *FileData +} + +func filesOfInterest(resolver source.FileResolver, metadataLocations []source.Location) (map[source.Location]*FileData, []*pythonEntry) { + var request = make(map[source.Location]*FileData) + var entries []*pythonEntry + for _, metadataLocation := range metadataLocations { + + // we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory + // or for an image... for an image the METADATA file may be present within multiple layers, so it is important + // to reconcile the RECORD path to the same layer (or the next adjacent lower layer). + + // lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure) + recordPath := filepath.Join(filepath.Dir(metadataLocation.Path), "RECORD") + recordLocation := resolver.RelativeFileByPath(metadataLocation, recordPath) + + // a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages + parentDir := filepath.Dir(metadataLocation.Path) + topLevelPath := filepath.Join(parentDir, "top_level.txt") + topLevelLocation := resolver.RelativeFileByPath(metadataLocation, topLevelPath) + + entry := &pythonEntry{ + Metadata: FileData{ + Location: metadataLocation, + }, + } + + request[entry.Metadata.Location] = &entry.Metadata + + if recordLocation != nil { + entry.FileRecord = &FileData{ + Location: *recordLocation, + } + request[entry.FileRecord.Location] = entry.FileRecord + } + + if topLevelLocation != nil { + entry.TopPackage = &FileData{ + Location: *topLevelLocation, + } + request[entry.TopPackage.Location] = entry.TopPackage + } + entries = append(entries, entry) + + } + return request, entries +} + +func getContents(resolver source.ContentResolver, request map[source.Location]*FileData) error { + var locations []source.Location + for l := range request { + locations = append(locations, l) + } + + response, err := resolver.MultipleFileContentsByLocation(locations) + if err != nil { + return err + } + + for l, contents := range response { + request[l].Contents = contents + } + return nil +} + // catalogEggOrWheel takes the primary metadata file reference and returns the python package it represents. -func (c *PackageCataloger) catalogEggOrWheel(resolver source.Resolver, metadataLocation source.Location) (*pkg.Package, error) { - metadata, sources, err := c.assembleEggOrWheelMetadata(resolver, metadataLocation) +func (c *PackageCataloger) catalogEggOrWheel(entry *pythonEntry) (*pkg.Package, error) { + metadata, sources, err := c.assembleEggOrWheelMetadata(entry) if err != nil { return nil, err } @@ -81,26 +160,45 @@ func (c *PackageCataloger) catalogEggOrWheel(resolver source.Resolver, metadataL }, nil } +// assembleEggOrWheelMetadata discovers and accumulates python package metadata from multiple file sources and returns a single metadata object as well as a list of files where the metadata was derived from. +func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *pythonEntry) (*pkg.PythonPackageMetadata, []source.Location, error) { + var sources = []source.Location{entry.Metadata.Location} + + metadata, err := parseWheelOrEggMetadata(entry.Metadata.Location.Path, strings.NewReader(entry.Metadata.Contents)) + if err != nil { + return nil, nil, err + } + + // attach any python files found for the given wheel/egg installation + r, s, err := c.fetchRecordFiles(entry.FileRecord) + if err != nil { + return nil, nil, err + } + sources = append(sources, s...) + metadata.Files = r + + // attach any top-level package names found for the given wheel/egg installation + p, s, err := c.fetchTopLevelPackages(entry.TopPackage) + if err != nil { + return nil, nil, err + } + sources = append(sources, s...) + metadata.TopLevelPackages = p + + return &metadata, sources, nil +} + // fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained. -func (c *PackageCataloger) fetchRecordFiles(resolver source.Resolver, metadataLocation source.Location) (files []pkg.PythonFileRecord, sources []source.Location, err error) { +func (c *PackageCataloger) fetchRecordFiles(entry *FileData) (files []pkg.PythonFileRecord, sources []source.Location, err error) { // we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory // or for an image... for an image the METADATA file may be present within multiple layers, so it is important // to reconcile the RECORD path to the same layer (or the next adjacent lower layer). - // lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure) - recordPath := filepath.Join(filepath.Dir(metadataLocation.Path), "RECORD") - recordRef := resolver.RelativeFileByPath(metadataLocation, recordPath) - - if recordRef != nil { - sources = append(sources, *recordRef) - - recordContents, err := resolver.FileContentsByLocation(*recordRef) - if err != nil { - return nil, nil, err - } + if entry != nil { + sources = append(sources, entry.Location) // parse the record contents - records, err := parseWheelOrEggRecord(strings.NewReader(recordContents)) + records, err := parseWheelOrEggRecord(strings.NewReader(entry.Contents)) if err != nil { return nil, nil, err } @@ -111,25 +209,16 @@ func (c *PackageCataloger) fetchRecordFiles(resolver source.Resolver, metadataLo } // fetchTopLevelPackages finds a corresponding top_level.txt file for the given python package metadata file and returns the set of package names contained. -func (c *PackageCataloger) fetchTopLevelPackages(resolver source.Resolver, metadataLocation source.Location) (pkgs []string, sources []source.Location, err error) { - // a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages - parentDir := filepath.Dir(metadataLocation.Path) - topLevelPath := filepath.Join(parentDir, "top_level.txt") - topLevelRef := resolver.RelativeFileByPath(metadataLocation, topLevelPath) - - if topLevelRef == nil { - log.Warnf("missing python package top_level.txt (package=%q)", metadataLocation.Path) +func (c *PackageCataloger) fetchTopLevelPackages(entry *FileData) (pkgs []string, sources []source.Location, err error) { + if entry == nil { + // TODO + log.Warnf("missing python package top_level.txt (package=!!)") return nil, nil, nil } - sources = append(sources, *topLevelRef) + sources = append(sources, entry.Location) - topLevelContents, err := resolver.FileContentsByLocation(*topLevelRef) - if err != nil { - return nil, nil, err - } - - scanner := bufio.NewScanner(strings.NewReader(topLevelContents)) + scanner := bufio.NewScanner(strings.NewReader(entry.Contents)) for scanner.Scan() { pkgs = append(pkgs, scanner.Text()) } @@ -140,36 +229,3 @@ func (c *PackageCataloger) fetchTopLevelPackages(resolver source.Resolver, metad return pkgs, sources, nil } - -// assembleEggOrWheelMetadata discovers and accumulates python package metadata from multiple file sources and returns a single metadata object as well as a list of files where the metadata was derived from. -func (c *PackageCataloger) assembleEggOrWheelMetadata(resolver source.Resolver, metadataLocation source.Location) (*pkg.PythonPackageMetadata, []source.Location, error) { - var sources = []source.Location{metadataLocation} - - metadataContents, err := resolver.FileContentsByLocation(metadataLocation) - if err != nil { - return nil, nil, err - } - - metadata, err := parseWheelOrEggMetadata(metadataLocation.Path, strings.NewReader(metadataContents)) - if err != nil { - return nil, nil, err - } - - // attach any python files found for the given wheel/egg installation - r, s, err := c.fetchRecordFiles(resolver, metadataLocation) - if err != nil { - return nil, nil, err - } - sources = append(sources, s...) - metadata.Files = r - - // attach any top-level package names found for the given wheel/egg installation - p, s, err := c.fetchTopLevelPackages(resolver, metadataLocation) - if err != nil { - return nil, nil, err - } - sources = append(sources, s...) - metadata.TopLevelPackages = p - - return &metadata, sources, nil -} From e4a3e433b60186b1e1d77f5d9aa88a26a51b8840 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 11 Dec 2020 17:10:45 -0500 Subject: [PATCH 3/5] add content requested and refactor python cataloger to use it Signed-off-by: Alex Goodman --- syft/cataloger/python/package_cataloger.go | 102 +++++++----------- .../python/package_cataloger_test.go | 49 ++++++--- syft/source/content_requester.go | 48 +++++++++ syft/source/file_data.go | 6 ++ 4 files changed, 132 insertions(+), 73 deletions(-) create mode 100644 syft/source/content_requester.go create mode 100644 syft/source/file_data.go diff --git a/syft/cataloger/python/package_cataloger.go b/syft/cataloger/python/package_cataloger.go index d8ead13e2..247e305e3 100644 --- a/syft/cataloger/python/package_cataloger.go +++ b/syft/cataloger/python/package_cataloger.go @@ -18,6 +18,12 @@ const ( wheelMetadataGlob = "**/*dist-info/METADATA" ) +type pythonPackageData struct { + Metadata source.FileData + FileRecord *source.FileData + TopPackage *source.FileData +} + type PackageCataloger struct{} // NewPythonPackageCataloger returns a new cataloger for python packages within egg or wheel installation directories. @@ -32,55 +38,43 @@ func (c *PackageCataloger) Name() string { // Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing python egg and wheel installations. func (c *PackageCataloger) Catalog(resolver source.Resolver) ([]pkg.Package, error) { - // nolint:prealloc - var fileMatches []source.Location - - for _, glob := range []string{eggMetadataGlob, wheelMetadataGlob} { - matches, err := resolver.FilesByGlob(glob) - if err != nil { - return nil, fmt.Errorf("failed to find files by glob: %s", glob) - } - fileMatches = append(fileMatches, matches...) - } - - request, entries := filesOfInterest(resolver, fileMatches) - if err := getContents(resolver, request); err != nil { + entries, err := c.getPythonPackageEntries(resolver) + if err != nil { return nil, err } - var pkgs []pkg.Package + var packages []pkg.Package for _, entry := range entries { p, err := c.catalogEggOrWheel(entry) if err != nil { return nil, fmt.Errorf("unable to catalog python package=%+v: %w", entry.Metadata.Location.Path, err) } if p != nil { - pkgs = append(pkgs, *p) + packages = append(packages, *p) } } - return pkgs, nil + return packages, nil } -type FileData struct { - Location source.Location - Contents string -} +func (c *PackageCataloger) getPythonPackageEntries(resolver source.Resolver) ([]*pythonPackageData, error) { + var metadataLocations []source.Location -type pythonEntry struct { - Metadata FileData - FileRecord *FileData - TopPackage *FileData -} - -func filesOfInterest(resolver source.FileResolver, metadataLocations []source.Location) (map[source.Location]*FileData, []*pythonEntry) { - var request = make(map[source.Location]*FileData) - var entries []*pythonEntry - for _, metadataLocation := range metadataLocations { + // find all primary record paths + matches, err := resolver.FilesByGlob(eggMetadataGlob, wheelMetadataGlob) + if err != nil { + return nil, fmt.Errorf("failed to find files by glob: %w", err) + } + metadataLocations = append(metadataLocations, matches...) + // for every primary record path, craft all secondary record paths and build a request object to gather all file contents for each record + var requester = source.NewContentRequester() + var entries = make([]*pythonPackageData, len(metadataLocations)) + for i, metadataLocation := range metadataLocations { // we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory // or for an image... for an image the METADATA file may be present within multiple layers, so it is important - // to reconcile the RECORD path to the same layer (or the next adjacent lower layer). + // to reconcile the RECORD path to the same layer (or the next adjacent lower layer). The same is true with + // the top_level.txt file. // lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure) recordPath := filepath.Join(filepath.Dir(metadataLocation.Path), "RECORD") @@ -91,52 +85,39 @@ func filesOfInterest(resolver source.FileResolver, metadataLocations []source.Lo topLevelPath := filepath.Join(parentDir, "top_level.txt") topLevelLocation := resolver.RelativeFileByPath(metadataLocation, topLevelPath) - entry := &pythonEntry{ - Metadata: FileData{ + // build an entry that will later be populated with contents when the request is executed + entry := &pythonPackageData{ + Metadata: source.FileData{ Location: metadataLocation, }, } - request[entry.Metadata.Location] = &entry.Metadata + requester.Add(&entry.Metadata) if recordLocation != nil { - entry.FileRecord = &FileData{ + entry.FileRecord = &source.FileData{ Location: *recordLocation, } - request[entry.FileRecord.Location] = entry.FileRecord + requester.Add(entry.FileRecord) } if topLevelLocation != nil { - entry.TopPackage = &FileData{ + entry.TopPackage = &source.FileData{ Location: *topLevelLocation, } - request[entry.TopPackage.Location] = entry.TopPackage + requester.Add(entry.TopPackage) } - entries = append(entries, entry) - } - return request, entries -} - -func getContents(resolver source.ContentResolver, request map[source.Location]*FileData) error { - var locations []source.Location - for l := range request { - locations = append(locations, l) + // keep the entry for processing later + entries[i] = entry } - response, err := resolver.MultipleFileContentsByLocation(locations) - if err != nil { - return err - } - - for l, contents := range response { - request[l].Contents = contents - } - return nil + // return the set of entries and execute the request for fetching contents + return entries, requester.Execute(resolver) } // catalogEggOrWheel takes the primary metadata file reference and returns the python package it represents. -func (c *PackageCataloger) catalogEggOrWheel(entry *pythonEntry) (*pkg.Package, error) { +func (c *PackageCataloger) catalogEggOrWheel(entry *pythonPackageData) (*pkg.Package, error) { metadata, sources, err := c.assembleEggOrWheelMetadata(entry) if err != nil { return nil, err @@ -161,7 +142,7 @@ func (c *PackageCataloger) catalogEggOrWheel(entry *pythonEntry) (*pkg.Package, } // assembleEggOrWheelMetadata discovers and accumulates python package metadata from multiple file sources and returns a single metadata object as well as a list of files where the metadata was derived from. -func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *pythonEntry) (*pkg.PythonPackageMetadata, []source.Location, error) { +func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *pythonPackageData) (*pkg.PythonPackageMetadata, []source.Location, error) { var sources = []source.Location{entry.Metadata.Location} metadata, err := parseWheelOrEggMetadata(entry.Metadata.Location.Path, strings.NewReader(entry.Metadata.Contents)) @@ -189,7 +170,7 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *pythonEntry) (*pkg. } // fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained. -func (c *PackageCataloger) fetchRecordFiles(entry *FileData) (files []pkg.PythonFileRecord, sources []source.Location, err error) { +func (c *PackageCataloger) fetchRecordFiles(entry *source.FileData) (files []pkg.PythonFileRecord, sources []source.Location, err error) { // we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory // or for an image... for an image the METADATA file may be present within multiple layers, so it is important // to reconcile the RECORD path to the same layer (or the next adjacent lower layer). @@ -209,9 +190,8 @@ func (c *PackageCataloger) fetchRecordFiles(entry *FileData) (files []pkg.Python } // fetchTopLevelPackages finds a corresponding top_level.txt file for the given python package metadata file and returns the set of package names contained. -func (c *PackageCataloger) fetchTopLevelPackages(entry *FileData) (pkgs []string, sources []source.Location, err error) { +func (c *PackageCataloger) fetchTopLevelPackages(entry *source.FileData) (pkgs []string, sources []source.Location, err error) { if entry == nil { - // TODO log.Warnf("missing python package top_level.txt (package=!!)") return nil, nil, nil } diff --git a/syft/cataloger/python/package_cataloger_test.go b/syft/cataloger/python/package_cataloger_test.go index 2ad16da73..c94b7f564 100644 --- a/syft/cataloger/python/package_cataloger_test.go +++ b/syft/cataloger/python/package_cataloger_test.go @@ -8,12 +8,15 @@ import ( "strings" "testing" + "github.com/anchore/syft/internal/file" + "github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/pkg" "github.com/go-test/deep" ) +// TODO: make this generic (based on maps of source.FileData) and make a generic mock to move to the source pkg type pythonTestResolverMock struct { metadataReader io.Reader recordReader io.Reader @@ -68,21 +71,21 @@ func newTestResolver(metaPath, recordPath, topPath string) *pythonTestResolverMo } } -func (r *pythonTestResolverMock) FileContentsByLocation(ref source.Location) (string, error) { +func (r *pythonTestResolverMock) FileContentsByLocation(location source.Location) (string, error) { switch { - case r.topLevelRef != nil && ref.Path == r.topLevelRef.Path: + case r.topLevelRef != nil && location.Path == r.topLevelRef.Path: b, err := ioutil.ReadAll(r.topLevelReader) if err != nil { return "", err } return string(b), nil - case ref.Path == r.metadataRef.Path: + case location.Path == r.metadataRef.Path: b, err := ioutil.ReadAll(r.metadataReader) if err != nil { return "", err } return string(b), nil - case ref.Path == r.recordRef.Path: + case location.Path == r.recordRef.Path: b, err := ioutil.ReadAll(r.recordReader) if err != nil { return "", err @@ -92,16 +95,36 @@ func (r *pythonTestResolverMock) FileContentsByLocation(ref source.Location) (st return "", fmt.Errorf("invalid value given") } -func (r *pythonTestResolverMock) MultipleFileContentsByLocation(_ []source.Location) (map[source.Location]string, error) { - return nil, fmt.Errorf("not implemented") +func (r *pythonTestResolverMock) MultipleFileContentsByLocation(locations []source.Location) (map[source.Location]string, error) { + var results = make(map[source.Location]string) + var err error + for _, l := range locations { + results[l], err = r.FileContentsByLocation(l) + if err != nil { + return nil, err + } + } + + return results, nil } func (r *pythonTestResolverMock) FilesByPath(_ ...string) ([]source.Location, error) { return nil, fmt.Errorf("not implemented") } -func (r *pythonTestResolverMock) FilesByGlob(_ ...string) ([]source.Location, error) { - return nil, fmt.Errorf("not implemented") +func (r *pythonTestResolverMock) FilesByGlob(patterns ...string) ([]source.Location, error) { + var results []source.Location + for _, pattern := range patterns { + for _, l := range []*source.Location{r.topLevelRef, r.metadataRef, r.recordRef} { + if l == nil { + continue + } + if file.GlobMatch(pattern, l.Path) { + results = append(results, *l) + } + } + } + return results, nil } func (r *pythonTestResolverMock) RelativeFileByPath(_ source.Location, path string) *source.Location { switch { @@ -224,14 +247,16 @@ func TestPythonPackageWheelCataloger(t *testing.T) { } // end patching expected values with runtime data... - pyPkgCataloger := NewPythonPackageCataloger() - - actual, err := pyPkgCataloger.catalogEggOrWheel(resolver, *resolver.metadataRef) + actual, err := NewPythonPackageCataloger().Catalog(resolver) if err != nil { t.Fatalf("failed to catalog python package: %+v", err) } - for _, d := range deep.Equal(actual, &test.ExpectedPackage) { + if len(actual) != 1 { + t.Fatalf("unexpected length: %d", len(actual)) + } + + for _, d := range deep.Equal(actual[0], test.ExpectedPackage) { t.Errorf("diff: %+v", d) } }) diff --git a/syft/source/content_requester.go b/syft/source/content_requester.go new file mode 100644 index 000000000..ea7b7b5a8 --- /dev/null +++ b/syft/source/content_requester.go @@ -0,0 +1,48 @@ +package source + +import "sync" + +type ContentRequester struct { + request map[Location][]*FileData + lock sync.Mutex +} + +func NewContentRequester(data ...*FileData) *ContentRequester { + requester := &ContentRequester{ + request: make(map[Location][]*FileData), + } + for _, d := range data { + requester.Add(d) + } + return requester +} + +func (b *ContentRequester) Add(data *FileData) { + b.lock.Lock() + defer b.lock.Unlock() + b.request[data.Location] = append(b.request[data.Location], data) +} + +func (b *ContentRequester) Execute(resolver ContentResolver) error { + b.lock.Lock() + defer b.lock.Unlock() + + var locations = make([]Location, len(b.request)) + idx := 0 + for l := range b.request { + locations[idx] = l + idx++ + } + + response, err := resolver.MultipleFileContentsByLocation(locations) + if err != nil { + return err + } + + for l, contents := range response { + for i := range b.request[l] { + b.request[l][i].Contents = contents + } + } + return nil +} diff --git a/syft/source/file_data.go b/syft/source/file_data.go new file mode 100644 index 000000000..843acc4a5 --- /dev/null +++ b/syft/source/file_data.go @@ -0,0 +1,6 @@ +package source + +type FileData struct { + Location Location + Contents string +} From 45fed7c69b7d4c73b9b23d394990ff455b3c3801 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Mon, 14 Dec 2020 11:25:27 -0500 Subject: [PATCH 4/5] break out packageEntry into a separate file Signed-off-by: Alex Goodman --- syft/cataloger/python/package_cataloger.go | 72 ++++++---------------- syft/cataloger/python/package_entry.go | 49 +++++++++++++++ 2 files changed, 68 insertions(+), 53 deletions(-) create mode 100644 syft/cataloger/python/package_entry.go diff --git a/syft/cataloger/python/package_cataloger.go b/syft/cataloger/python/package_cataloger.go index 247e305e3..885d19dca 100644 --- a/syft/cataloger/python/package_cataloger.go +++ b/syft/cataloger/python/package_cataloger.go @@ -3,11 +3,8 @@ package python import ( "bufio" "fmt" - "path/filepath" "strings" - "github.com/anchore/syft/internal/log" - "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/source" @@ -18,12 +15,6 @@ const ( wheelMetadataGlob = "**/*dist-info/METADATA" ) -type pythonPackageData struct { - Metadata source.FileData - FileRecord *source.FileData - TopPackage *source.FileData -} - type PackageCataloger struct{} // NewPythonPackageCataloger returns a new cataloger for python packages within egg or wheel installation directories. @@ -38,7 +29,7 @@ func (c *PackageCataloger) Name() string { // Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing python egg and wheel installations. func (c *PackageCataloger) Catalog(resolver source.Resolver) ([]pkg.Package, error) { - entries, err := c.getPythonPackageEntries(resolver) + entries, err := c.getPackageEntries(resolver) if err != nil { return nil, err } @@ -57,7 +48,8 @@ func (c *PackageCataloger) Catalog(resolver source.Resolver) ([]pkg.Package, err return packages, nil } -func (c *PackageCataloger) getPythonPackageEntries(resolver source.Resolver) ([]*pythonPackageData, error) { +// getPackageEntries fetches the contents for all python packages within the given resolver. +func (c *PackageCataloger) getPackageEntries(resolver source.Resolver) ([]*packageEntry, error) { var metadataLocations []source.Location // find all primary record paths @@ -68,47 +60,22 @@ func (c *PackageCataloger) getPythonPackageEntries(resolver source.Resolver) ([] metadataLocations = append(metadataLocations, matches...) // for every primary record path, craft all secondary record paths and build a request object to gather all file contents for each record - var requester = source.NewContentRequester() - var entries = make([]*pythonPackageData, len(metadataLocations)) + requester := source.NewContentRequester() + entries := make([]*packageEntry, len(metadataLocations)) for i, metadataLocation := range metadataLocations { - // we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory - // or for an image... for an image the METADATA file may be present within multiple layers, so it is important - // to reconcile the RECORD path to the same layer (or the next adjacent lower layer). The same is true with - // the top_level.txt file. - - // lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure) - recordPath := filepath.Join(filepath.Dir(metadataLocation.Path), "RECORD") - recordLocation := resolver.RelativeFileByPath(metadataLocation, recordPath) - - // a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages - parentDir := filepath.Dir(metadataLocation.Path) - topLevelPath := filepath.Join(parentDir, "top_level.txt") - topLevelLocation := resolver.RelativeFileByPath(metadataLocation, topLevelPath) - - // build an entry that will later be populated with contents when the request is executed - entry := &pythonPackageData{ - Metadata: source.FileData{ - Location: metadataLocation, - }, - } + // build the entry to process (holding only path information) + entry := newPackageEntry(resolver, metadataLocation) + // populate the data onto the requester object requester.Add(&entry.Metadata) - - if recordLocation != nil { - entry.FileRecord = &source.FileData{ - Location: *recordLocation, - } + if entry.FileRecord != nil { requester.Add(entry.FileRecord) } - - if topLevelLocation != nil { - entry.TopPackage = &source.FileData{ - Location: *topLevelLocation, - } + if entry.TopPackage != nil { requester.Add(entry.TopPackage) } - // keep the entry for processing later + // keep track of the entry for later package processing entries[i] = entry } @@ -117,7 +84,7 @@ func (c *PackageCataloger) getPythonPackageEntries(resolver source.Resolver) ([] } // catalogEggOrWheel takes the primary metadata file reference and returns the python package it represents. -func (c *PackageCataloger) catalogEggOrWheel(entry *pythonPackageData) (*pkg.Package, error) { +func (c *PackageCataloger) catalogEggOrWheel(entry *packageEntry) (*pkg.Package, error) { metadata, sources, err := c.assembleEggOrWheelMetadata(entry) if err != nil { return nil, err @@ -142,7 +109,7 @@ func (c *PackageCataloger) catalogEggOrWheel(entry *pythonPackageData) (*pkg.Pac } // assembleEggOrWheelMetadata discovers and accumulates python package metadata from multiple file sources and returns a single metadata object as well as a list of files where the metadata was derived from. -func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *pythonPackageData) (*pkg.PythonPackageMetadata, []source.Location, error) { +func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *packageEntry) (*pkg.PythonPackageMetadata, []source.Location, error) { var sources = []source.Location{entry.Metadata.Location} metadata, err := parseWheelOrEggMetadata(entry.Metadata.Location.Path, strings.NewReader(entry.Metadata.Contents)) @@ -151,7 +118,7 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *pythonPackageData) } // attach any python files found for the given wheel/egg installation - r, s, err := c.fetchRecordFiles(entry.FileRecord) + r, s, err := c.processRecordFiles(entry.FileRecord) if err != nil { return nil, nil, err } @@ -159,7 +126,7 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *pythonPackageData) metadata.Files = r // attach any top-level package names found for the given wheel/egg installation - p, s, err := c.fetchTopLevelPackages(entry.TopPackage) + p, s, err := c.processTopLevelPackages(entry.TopPackage) if err != nil { return nil, nil, err } @@ -169,8 +136,8 @@ func (c *PackageCataloger) assembleEggOrWheelMetadata(entry *pythonPackageData) return &metadata, sources, nil } -// fetchRecordFiles finds a corresponding RECORD file for the given python package metadata file and returns the set of file records contained. -func (c *PackageCataloger) fetchRecordFiles(entry *source.FileData) (files []pkg.PythonFileRecord, sources []source.Location, err error) { +// processRecordFiles takes a corresponding RECORD file for the given python package metadata file and returns the set of file records contained. +func (c *PackageCataloger) processRecordFiles(entry *source.FileData) (files []pkg.PythonFileRecord, sources []source.Location, err error) { // we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory // or for an image... for an image the METADATA file may be present within multiple layers, so it is important // to reconcile the RECORD path to the same layer (or the next adjacent lower layer). @@ -189,10 +156,9 @@ func (c *PackageCataloger) fetchRecordFiles(entry *source.FileData) (files []pkg return files, sources, nil } -// fetchTopLevelPackages finds a corresponding top_level.txt file for the given python package metadata file and returns the set of package names contained. -func (c *PackageCataloger) fetchTopLevelPackages(entry *source.FileData) (pkgs []string, sources []source.Location, err error) { +// processTopLevelPackages takes a corresponding top_level.txt file for the given python package metadata file and returns the set of package names contained. +func (c *PackageCataloger) processTopLevelPackages(entry *source.FileData) (pkgs []string, sources []source.Location, err error) { if entry == nil { - log.Warnf("missing python package top_level.txt (package=!!)") return nil, nil, nil } diff --git a/syft/cataloger/python/package_entry.go b/syft/cataloger/python/package_entry.go new file mode 100644 index 000000000..66e2e906b --- /dev/null +++ b/syft/cataloger/python/package_entry.go @@ -0,0 +1,49 @@ +package python + +import ( + "path/filepath" + + "github.com/anchore/syft/syft/source" +) + +type packageEntry struct { + Metadata source.FileData + FileRecord *source.FileData + TopPackage *source.FileData +} + +// newPackageEntry returns a new packageEntry to be processed relative to what information is available in the given FileResolver. +func newPackageEntry(resolver source.FileResolver, metadataLocation source.Location) *packageEntry { + // we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory + // or for an image... for an image the METADATA file may be present within multiple layers, so it is important + // to reconcile the RECORD path to the same layer (or a lower layer). The same is true with the top_level.txt file. + + // lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure) + recordPath := filepath.Join(filepath.Dir(metadataLocation.Path), "RECORD") + recordLocation := resolver.RelativeFileByPath(metadataLocation, recordPath) + + // a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages + parentDir := filepath.Dir(metadataLocation.Path) + topLevelPath := filepath.Join(parentDir, "top_level.txt") + topLevelLocation := resolver.RelativeFileByPath(metadataLocation, topLevelPath) + + // build an entry that will later be populated with contents when the request is executed + entry := &packageEntry{ + Metadata: source.FileData{ + Location: metadataLocation, + }, + } + + if recordLocation != nil { + entry.FileRecord = &source.FileData{ + Location: *recordLocation, + } + } + + if topLevelLocation != nil { + entry.TopPackage = &source.FileData{ + Location: *topLevelLocation, + } + } + return entry +} From d94d7a7d807ac83a03c8b950db974c0857225e21 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Mon, 14 Dec 2020 21:03:49 -0500 Subject: [PATCH 5/5] add tests for content requester object Signed-off-by: Alex Goodman --- syft/source/content_requester.go | 30 +++++--- syft/source/content_requester_test.go | 69 +++++++++++++++++++ syft/source/directory_resolver_test.go | 2 +- .../test-fixtures/image-simple/Dockerfile | 6 ++ .../test-fixtures/image-simple/file-1.txt | 1 + .../test-fixtures/image-simple/file-2.txt | 1 + .../target/really/nested/file-3.txt | 2 + 7 files changed, 99 insertions(+), 12 deletions(-) create mode 100644 syft/source/content_requester_test.go create mode 100644 syft/source/test-fixtures/image-simple/Dockerfile create mode 100644 syft/source/test-fixtures/image-simple/file-1.txt create mode 100644 syft/source/test-fixtures/image-simple/file-2.txt create mode 100644 syft/source/test-fixtures/image-simple/target/really/nested/file-3.txt diff --git a/syft/source/content_requester.go b/syft/source/content_requester.go index ea7b7b5a8..ef99513c0 100644 --- a/syft/source/content_requester.go +++ b/syft/source/content_requester.go @@ -2,11 +2,14 @@ package source import "sync" +// ContentRequester is an object tailored for taking source.Location objects which file contents will be resolved +// upon invoking Execute(). type ContentRequester struct { request map[Location][]*FileData lock sync.Mutex } +// NewContentRequester creates a new ContentRequester object with the given initial request data. func NewContentRequester(data ...*FileData) *ContentRequester { requester := &ContentRequester{ request: make(map[Location][]*FileData), @@ -17,19 +20,24 @@ func NewContentRequester(data ...*FileData) *ContentRequester { return requester } -func (b *ContentRequester) Add(data *FileData) { - b.lock.Lock() - defer b.lock.Unlock() - b.request[data.Location] = append(b.request[data.Location], data) +// Add appends a new single FileData containing a source.Location to later have the contents fetched and stored within +// the given FileData object. +func (r *ContentRequester) Add(data *FileData) { + r.lock.Lock() + defer r.lock.Unlock() + + r.request[data.Location] = append(r.request[data.Location], data) } -func (b *ContentRequester) Execute(resolver ContentResolver) error { - b.lock.Lock() - defer b.lock.Unlock() +// Execute takes the previously provided source.Location's and resolves the file contents, storing the results within +// the previously provided FileData objects. +func (r *ContentRequester) Execute(resolver ContentResolver) error { + r.lock.Lock() + defer r.lock.Unlock() - var locations = make([]Location, len(b.request)) + var locations = make([]Location, len(r.request)) idx := 0 - for l := range b.request { + for l := range r.request { locations[idx] = l idx++ } @@ -40,8 +48,8 @@ func (b *ContentRequester) Execute(resolver ContentResolver) error { } for l, contents := range response { - for i := range b.request[l] { - b.request[l][i].Contents = contents + for i := range r.request[l] { + r.request[l][i].Contents = contents } } return nil diff --git a/syft/source/content_requester_test.go b/syft/source/content_requester_test.go new file mode 100644 index 000000000..1ca914703 --- /dev/null +++ b/syft/source/content_requester_test.go @@ -0,0 +1,69 @@ +package source + +import ( + "testing" + + "github.com/anchore/stereoscope/pkg/imagetest" + "github.com/sergi/go-diff/diffmatchpatch" +) + +func TestContentRequester(t *testing.T) { + tests := []struct { + fixture string + expectedContents map[string]string + }{ + { + fixture: "image-simple", + expectedContents: map[string]string{ + "/somefile-1.txt": "this file has contents", + "/somefile-2.txt": "file-2 contents!", + "/really/nested/file-3.txt": "another file!\nwith lines...", + }, + }, + } + + for _, test := range tests { + t.Run(test.fixture, func(t *testing.T) { + img, cleanup := imagetest.GetFixtureImage(t, "docker-archive", "image-simple") + defer cleanup() + + resolver, err := NewAllLayersResolver(img) + if err != nil { + t.Fatalf("could not create resolver: %+v", err) + } + + var data []*FileData + for path := range test.expectedContents { + + locations, err := resolver.FilesByPath(path) + if err != nil { + t.Fatalf("could not build request: %+v", err) + } + if len(locations) != 1 { + t.Fatalf("bad resolver paths: %+v", locations) + } + + data = append(data, &FileData{ + Location: locations[0], + }) + } + + if err := NewContentRequester(data...).Execute(resolver); err != nil { + t.Fatalf("could not execute request: %+v", err) + } + + for _, entry := range data { + if expected, ok := test.expectedContents[entry.Location.Path]; ok { + for expected != entry.Contents { + t.Errorf("mismatched contents for %q", entry.Location.Path) + dmp := diffmatchpatch.New() + diffs := dmp.DiffMain(expected, entry.Contents, true) + t.Errorf("diff: %s", dmp.DiffPrettyText(diffs)) + } + continue + } + t.Errorf("could not find %q", entry.Location.Path) + } + }) + } +} diff --git a/syft/source/directory_resolver_test.go b/syft/source/directory_resolver_test.go index f2bcad89f..fdc82971f 100644 --- a/syft/source/directory_resolver_test.go +++ b/syft/source/directory_resolver_test.go @@ -180,7 +180,7 @@ func TestDirectoryResolver_FilesByGlobMultiple(t *testing.T) { func TestDirectoryResolver_FilesByGlobRecursive(t *testing.T) { t.Run("finds multiple matching files", func(t *testing.T) { - resolver := DirectoryResolver{"test-fixtures"} + resolver := DirectoryResolver{"test-fixtures/image-symlinks"} refs, err := resolver.FilesByGlob("**/*.txt") if err != nil { diff --git a/syft/source/test-fixtures/image-simple/Dockerfile b/syft/source/test-fixtures/image-simple/Dockerfile new file mode 100644 index 000000000..62fb151e4 --- /dev/null +++ b/syft/source/test-fixtures/image-simple/Dockerfile @@ -0,0 +1,6 @@ +# Note: changes to this file will result in updating several test values. Consider making a new image fixture instead of editing this one. +FROM scratch +ADD file-1.txt /somefile-1.txt +ADD file-2.txt /somefile-2.txt +# note: adding a directory will behave differently on docker engine v18 vs v19 +ADD target / diff --git a/syft/source/test-fixtures/image-simple/file-1.txt b/syft/source/test-fixtures/image-simple/file-1.txt new file mode 100644 index 000000000..985d3408e --- /dev/null +++ b/syft/source/test-fixtures/image-simple/file-1.txt @@ -0,0 +1 @@ +this file has contents \ No newline at end of file diff --git a/syft/source/test-fixtures/image-simple/file-2.txt b/syft/source/test-fixtures/image-simple/file-2.txt new file mode 100644 index 000000000..396d08bbc --- /dev/null +++ b/syft/source/test-fixtures/image-simple/file-2.txt @@ -0,0 +1 @@ +file-2 contents! \ No newline at end of file diff --git a/syft/source/test-fixtures/image-simple/target/really/nested/file-3.txt b/syft/source/test-fixtures/image-simple/target/really/nested/file-3.txt new file mode 100644 index 000000000..f85472c93 --- /dev/null +++ b/syft/source/test-fixtures/image-simple/target/really/nested/file-3.txt @@ -0,0 +1,2 @@ +another file! +with lines... \ No newline at end of file