diff --git a/README.md b/README.md index a627a8a52..266a88693 100644 --- a/README.md +++ b/README.md @@ -244,6 +244,18 @@ exclude: # cataloging packages is exposed through the packages and power-user subcommands package: + + # search within archives that do contain a file index to search against (zip) + # note: for now this only applies to the java package cataloger + # SYFT_PACKAGE_SEARCH_INDEXED_ARCHIVES env var + search-indexed-archives: true + + # search within archives that do not contain a file index to search against (tar, tar.gz, tar.bz2, etc) + # note: enabling this may result in a performance impact since all discovered compressed tars will be decompressed + # note: for now this only applies to the java package cataloger + # SYFT_PACKAGE_SEARCH_UNINDEXED_ARCHIVES env var + search-unindexed-archives: false + cataloger: # enable/disable cataloging of packages # SYFT_PACKAGE_CATALOGER_ENABLED env var diff --git a/cmd/packages.go b/cmd/packages.go index f68f131ee..ca4b29f44 100644 --- a/cmd/packages.go +++ b/cmd/packages.go @@ -17,6 +17,7 @@ import ( "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/event" "github.com/anchore/syft/syft/format" + "github.com/anchore/syft/syft/pkg/cataloger" "github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/source" "github.com/pkg/profile" @@ -98,7 +99,7 @@ func setPackageFlags(flags *pflag.FlagSet) { // Formatting & Input options ////////////////////////////////////////////// flags.StringP( - "scope", "s", source.SquashedScope.String(), + "scope", "s", cataloger.DefaultSearchConfig().Scope.String(), fmt.Sprintf("selection of layers to catalog, options=%v", source.AllScopes)) flags.StringP( diff --git a/cmd/tasks.go b/cmd/tasks.go index 624eb78a3..ce6bf064d 100644 --- a/cmd/tasks.go +++ b/cmd/tasks.go @@ -45,7 +45,7 @@ func generateCatalogPackagesTask() (task, error) { } task := func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { - packageCatalog, relationships, theDistro, err := syft.CatalogPackages(src, appConfig.Package.Cataloger.ScopeOpt) + packageCatalog, relationships, theDistro, err := syft.CatalogPackages(src, appConfig.Package.ToConfig()) if err != nil { return nil, err } diff --git a/go.mod b/go.mod index bdb41703b..cb80d81df 100644 --- a/go.mod +++ b/go.mod @@ -18,7 +18,7 @@ require ( // go: warning: github.com/andybalholm/brotli@v1.0.1: retracted by module author: occasional panics and data corruption github.com/andybalholm/brotli v1.0.4 // indirect github.com/antihax/optional v1.0.0 - github.com/bmatcuk/doublestar/v2 v2.0.4 + github.com/bmatcuk/doublestar/v4 v4.0.2 github.com/docker/docker v20.10.11+incompatible github.com/dustin/go-humanize v1.0.0 github.com/facebookincubator/nvdtools v0.1.4 diff --git a/go.sum b/go.sum index 1698f8483..6470dce87 100644 --- a/go.sum +++ b/go.sum @@ -135,8 +135,6 @@ github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edY github.com/bketelsen/crypt v0.0.4/go.mod h1:aI6NrJ0pMGgvZKL1iVgXLnfIFJtfV+bKCoqOes/6LfM= github.com/blang/semver v3.1.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= -github.com/bmatcuk/doublestar/v2 v2.0.4 h1:6I6oUiT/sU27eE2OFcWqBhL1SwjyvQuOssxT4a1yidI= -github.com/bmatcuk/doublestar/v2 v2.0.4/go.mod h1:QMmcs3H2AUQICWhfzLXz+IYln8lRQmTZRptLie8RgRw= github.com/bmatcuk/doublestar/v4 v4.0.2 h1:X0krlUVAVmtr2cRoTqR8aDMrDqnB36ht8wpWTiQ3jsA= github.com/bmatcuk/doublestar/v4 v4.0.2/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= diff --git a/internal/config/application.go b/internal/config/application.go index 56b2654b0..54264bef9 100644 --- a/internal/config/application.go +++ b/internal/config/application.go @@ -38,7 +38,7 @@ type Application struct { CliOptions CliOnlyOptions `yaml:"-" json:"-"` // all options only available through the CLI (not via env vars or config) Dev development `yaml:"dev" json:"dev" mapstructure:"dev"` Log logging `yaml:"log" json:"log" mapstructure:"log"` // all logging-related options - Package packages `yaml:"package" json:"package" mapstructure:"package"` + Package pkg `yaml:"package" json:"package" mapstructure:"package"` FileMetadata FileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"` FileClassification fileClassification `yaml:"file-classification" json:"file-classification" mapstructure:"file-classification"` FileContents fileContents `yaml:"file-contents" json:"file-contents" mapstructure:"file-contents"` diff --git a/internal/config/cataloger_options.go b/internal/config/cataloger_options.go index 2b78bb0c5..1fb2992b4 100644 --- a/internal/config/cataloger_options.go +++ b/internal/config/cataloger_options.go @@ -3,6 +3,8 @@ package config import ( "fmt" + "github.com/spf13/viper" + "github.com/anchore/syft/syft/source" ) @@ -12,6 +14,10 @@ type catalogerOptions struct { ScopeOpt source.Scope `yaml:"-" json:"-"` } +func (cfg catalogerOptions) loadDefaultValues(v *viper.Viper) { + v.SetDefault("package.cataloger.enabled", true) +} + func (cfg *catalogerOptions) parseConfigValues() error { scopeOption := source.ParseScope(cfg.Scope) if scopeOption == source.UnknownScope { diff --git a/internal/config/packages.go b/internal/config/packages.go deleted file mode 100644 index be306d9e5..000000000 --- a/internal/config/packages.go +++ /dev/null @@ -1,15 +0,0 @@ -package config - -import "github.com/spf13/viper" - -type packages struct { - Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"` -} - -func (cfg packages) loadDefaultValues(v *viper.Viper) { - v.SetDefault("package.cataloger.enabled", true) -} - -func (cfg *packages) parseConfigValues() error { - return cfg.Cataloger.parseConfigValues() -} diff --git a/internal/config/pkg.go b/internal/config/pkg.go new file mode 100644 index 000000000..2e695a995 --- /dev/null +++ b/internal/config/pkg.go @@ -0,0 +1,33 @@ +package config + +import ( + "github.com/anchore/syft/syft/pkg/cataloger" + "github.com/spf13/viper" +) + +type pkg struct { + Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"` + SearchUnindexedArchives bool `yaml:"search-unindexed-archives" json:"search-unindexed-archives" mapstructure:"search-unindexed-archives"` + SearchIndexedArchives bool `yaml:"search-indexed-archives" json:"search-indexed-archives" mapstructure:"search-indexed-archives"` +} + +func (cfg pkg) loadDefaultValues(v *viper.Viper) { + cfg.Cataloger.loadDefaultValues(v) + c := cataloger.DefaultSearchConfig() + v.SetDefault("package.search-unindexed-archives", c.IncludeUnindexedArchives) + v.SetDefault("package.search-indexed-archives", c.IncludeIndexedArchives) +} + +func (cfg *pkg) parseConfigValues() error { + return cfg.Cataloger.parseConfigValues() +} + +func (cfg pkg) ToConfig() cataloger.Config { + return cataloger.Config{ + Search: cataloger.SearchConfig{ + IncludeIndexedArchives: cfg.SearchIndexedArchives, + IncludeUnindexedArchives: cfg.SearchUnindexedArchives, + Scope: cfg.Cataloger.ScopeOpt, + }, + } +} diff --git a/internal/file/copy.go b/internal/file/copy.go new file mode 100644 index 000000000..d569dcc4c --- /dev/null +++ b/internal/file/copy.go @@ -0,0 +1,19 @@ +package file + +import ( + "errors" + "fmt" + "io" +) + +const perFileReadLimit = 2 * GB + +// safeCopy limits the copy from the reader. This is useful when extracting files from archives to +// protect against decompression bomb attacks. +func safeCopy(writer io.Writer, reader io.Reader) error { + numBytes, err := io.Copy(writer, io.LimitReader(reader, perFileReadLimit)) + if numBytes >= perFileReadLimit || errors.Is(err, io.EOF) { + return fmt.Errorf("zip read limit hit (potential decompression bomb attack)") + } + return nil +} diff --git a/internal/file/tar_file_traversal.go b/internal/file/tar_file_traversal.go new file mode 100644 index 000000000..4280935c6 --- /dev/null +++ b/internal/file/tar_file_traversal.go @@ -0,0 +1,64 @@ +package file + +import ( + "fmt" + "io/ioutil" + "path/filepath" + + "github.com/bmatcuk/doublestar/v4" + "github.com/mholt/archiver/v3" +) + +// ExtractGlobsFromTarToUniqueTempFile extracts paths matching the given globs within the given archive to a temporary directory, returning file openers for each file extracted. +func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...string) (map[string]Opener, error) { + results := make(map[string]Opener) + + // don't allow for full traversal, only select traversal from given paths + if len(globs) == 0 { + return results, nil + } + + visitor := func(file archiver.File) error { + defer file.Close() + + // ignore directories + if file.FileInfo.IsDir() { + return nil + } + + // ignore any filename that doesn't match the given globs... + if !matchesAnyGlob(file.Name(), globs...) { + return nil + } + + // we have a file we want to extract.... + tempfilePrefix := filepath.Base(filepath.Clean(file.Name())) + "-" + tempFile, err := ioutil.TempFile(dir, tempfilePrefix) + if err != nil { + return fmt.Errorf("unable to create temp file: %w", err) + } + // we shouldn't try and keep the tempfile open as the returned result may have several files, which takes up + // resources (leading to "too many open files"). Instead we'll return a file opener to the caller which + // provides a ReadCloser. It is up to the caller to handle closing the file explicitly. + defer tempFile.Close() + + if err := safeCopy(tempFile, file.ReadCloser); err != nil { + return fmt.Errorf("unable to copy source=%q for tar=%q: %w", file.Name(), archivePath, err) + } + + results[file.Name()] = Opener{path: tempFile.Name()} + + return nil + } + + return results, archiver.Walk(archivePath, visitor) +} + +func matchesAnyGlob(name string, globs ...string) bool { + for _, glob := range globs { + if matches, err := doublestar.PathMatch(glob, name); err == nil && matches { + return true + } + } + return false +} diff --git a/internal/file/zip_file_traversal.go b/internal/file/zip_file_traversal.go index f9bdae7e3..e71502e6e 100644 --- a/internal/file/zip_file_traversal.go +++ b/internal/file/zip_file_traversal.go @@ -3,9 +3,7 @@ package file import ( "archive/zip" "bytes" - "errors" "fmt" - "io" "io/ioutil" "os" "path/filepath" @@ -22,8 +20,6 @@ const ( GB ) -const perFileReadLimit = 2 * GB - type errZipSlipDetected struct { Prefix string JoinArgs []string @@ -110,21 +106,10 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m return fmt.Errorf("unable to extract directories, only files: %s", file.Name) } - // limit the zip reader on each file read to prevent decompression bomb attacks - numBytes, err := io.Copy(tempFile, io.LimitReader(zippedFile, perFileReadLimit)) - if numBytes >= perFileReadLimit || errors.Is(err, io.EOF) { - return fmt.Errorf("zip read limit hit (potential decompression bomb attack)") - } - if err != nil { + if err := safeCopy(tempFile, zippedFile); err != nil { return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err) } - // the file pointer is at the end due to the copy operation, reset back to the beginning - _, err = tempFile.Seek(0, io.SeekStart) - if err != nil { - return fmt.Errorf("unable to reset file pointer (%s): %w", tempFile.Name(), err) - } - results[file.Name] = Opener{path: tempFile.Name()} return nil @@ -153,13 +138,7 @@ func ContentsFromZip(archivePath string, paths ...string) (map[string]string, er } var buffer bytes.Buffer - - // limit the zip reader on each file read to prevent decompression bomb attacks - numBytes, err := io.Copy(&buffer, io.LimitReader(zippedFile, perFileReadLimit)) - if numBytes >= perFileReadLimit || errors.Is(err, io.EOF) { - return fmt.Errorf("zip read limit hit (potential decompression bomb attack)") - } - if err != nil { + if err := safeCopy(&buffer, zippedFile); err != nil { return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err) } @@ -228,12 +207,7 @@ func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) err return fmt.Errorf("unable to create dest file=%q from zip=%q: %w", expandedFilePath, archivePath, err) } - // limit the zip reader on each file read to prevent decompression bomb attacks - numBytes, err := io.Copy(outputFile, io.LimitReader(zippedFile, perFileReadLimit)) - if numBytes >= perFileReadLimit || errors.Is(err, io.EOF) { - return fmt.Errorf("zip read limit hit (potential decompression bomb attack)") - } - if err != nil { + if err := safeCopy(outputFile, zippedFile); err != nil { return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.Name, outputFile.Name(), archivePath, err) } diff --git a/syft/file/generate_search_patterns.go b/syft/file/generate_search_patterns.go index 1673f464c..5e2c074dc 100644 --- a/syft/file/generate_search_patterns.go +++ b/syft/file/generate_search_patterns.go @@ -4,7 +4,7 @@ import ( "fmt" "regexp" - "github.com/bmatcuk/doublestar/v2" + "github.com/bmatcuk/doublestar/v4" "github.com/hashicorp/go-multierror" ) diff --git a/syft/lib.go b/syft/lib.go index 376315335..244312b03 100644 --- a/syft/lib.go +++ b/syft/lib.go @@ -1,5 +1,5 @@ /* -A "one-stop-shop" for helper utilities for all major functionality provided by child packages of the syft library. +Package syft is a "one-stop-shop" for helper utilities for all major functionality provided by child packages of the syft library. Here is what the main execution path for syft does: @@ -34,8 +34,8 @@ import ( // CatalogPackages takes an inventory of packages from the given image from a particular perspective // (e.g. squashed source, all-layers source). Returns the discovered set of packages, the identified Linux // distribution, and the source object used to wrap the data source. -func CatalogPackages(src *source.Source, scope source.Scope) (*pkg.Catalog, []artifact.Relationship, *distro.Distro, error) { - resolver, err := src.FileResolver(scope) +func CatalogPackages(src *source.Source, cfg cataloger.Config) (*pkg.Catalog, []artifact.Relationship, *distro.Distro, error) { + resolver, err := src.FileResolver(cfg.Search.Scope) if err != nil { return nil, nil, nil, fmt.Errorf("unable to determine resolver while cataloging packages: %w", err) } @@ -53,13 +53,13 @@ func CatalogPackages(src *source.Source, scope source.Scope) (*pkg.Catalog, []ar switch src.Metadata.Scheme { case source.ImageScheme: log.Info("cataloging image") - catalogers = cataloger.ImageCatalogers() + catalogers = cataloger.ImageCatalogers(cfg) case source.FileScheme: log.Info("cataloging file") - catalogers = cataloger.AllCatalogers() + catalogers = cataloger.AllCatalogers(cfg) case source.DirectoryScheme: log.Info("cataloging directory") - catalogers = cataloger.DirectoryCatalogers() + catalogers = cataloger.DirectoryCatalogers(cfg) default: return nil, nil, nil, fmt.Errorf("unable to determine cataloger set from scheme=%+v", src.Metadata.Scheme) } diff --git a/syft/pkg/cataloger/cataloger.go b/syft/pkg/cataloger/cataloger.go index bac9fa5db..f44efeb4b 100644 --- a/syft/pkg/cataloger/cataloger.go +++ b/syft/pkg/cataloger/cataloger.go @@ -32,7 +32,7 @@ type Cataloger interface { } // ImageCatalogers returns a slice of locally implemented catalogers that are fit for detecting installations of packages. -func ImageCatalogers() []Cataloger { +func ImageCatalogers(cfg Config) []Cataloger { return []Cataloger{ ruby.NewGemSpecCataloger(), python.NewPythonPackageCataloger(), @@ -40,14 +40,14 @@ func ImageCatalogers() []Cataloger { javascript.NewJavascriptPackageCataloger(), deb.NewDpkgdbCataloger(), rpmdb.NewRpmdbCataloger(), - java.NewJavaCataloger(), + java.NewJavaCataloger(cfg.Java()), apkdb.NewApkdbCataloger(), golang.NewGoModuleBinaryCataloger(), } } // DirectoryCatalogers returns a slice of locally implemented catalogers that are fit for detecting packages from index files (and select installations) -func DirectoryCatalogers() []Cataloger { +func DirectoryCatalogers(cfg Config) []Cataloger { return []Cataloger{ ruby.NewGemFileLockCataloger(), python.NewPythonIndexCataloger(), @@ -56,7 +56,7 @@ func DirectoryCatalogers() []Cataloger { javascript.NewJavascriptLockCataloger(), deb.NewDpkgdbCataloger(), rpmdb.NewRpmdbCataloger(), - java.NewJavaCataloger(), + java.NewJavaCataloger(cfg.Java()), apkdb.NewApkdbCataloger(), golang.NewGoModuleBinaryCataloger(), golang.NewGoModFileCataloger(), @@ -65,7 +65,7 @@ func DirectoryCatalogers() []Cataloger { } // AllCatalogers returns all implemented catalogers -func AllCatalogers() []Cataloger { +func AllCatalogers(cfg Config) []Cataloger { return []Cataloger{ ruby.NewGemFileLockCataloger(), ruby.NewGemSpecCataloger(), @@ -75,7 +75,7 @@ func AllCatalogers() []Cataloger { javascript.NewJavascriptPackageCataloger(), deb.NewDpkgdbCataloger(), rpmdb.NewRpmdbCataloger(), - java.NewJavaCataloger(), + java.NewJavaCataloger(cfg.Java()), apkdb.NewApkdbCataloger(), golang.NewGoModuleBinaryCataloger(), golang.NewGoModFileCataloger(), diff --git a/syft/pkg/cataloger/config.go b/syft/pkg/cataloger/config.go new file mode 100644 index 000000000..4e82957c0 --- /dev/null +++ b/syft/pkg/cataloger/config.go @@ -0,0 +1,22 @@ +package cataloger + +import ( + "github.com/anchore/syft/syft/pkg/cataloger/java" +) + +type Config struct { + Search SearchConfig +} + +func DefaultConfig() Config { + return Config{ + Search: DefaultSearchConfig(), + } +} + +func (c Config) Java() java.Config { + return java.Config{ + SearchUnindexedArchives: c.Search.IncludeUnindexedArchives, + SearchIndexedArchives: c.Search.IncludeIndexedArchives, + } +} diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index 7cb013a47..e2f859b17 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -64,7 +64,11 @@ func uniquePkgKey(p *pkg.Package) string { // newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover // and parse nested archives or ignore them. func newJavaArchiveParser(virtualPath string, reader io.Reader, detectNested bool) (*archiveParser, func(), error) { - contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(reader) + // fetch the last element of the virtual path + virtualElements := strings.Split(virtualPath, ":") + currentFilepath := virtualElements[len(virtualElements)-1] + + contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(currentFilepath, reader) if err != nil { return nil, cleanupFn, fmt.Errorf("unable to process java archive: %w", err) } @@ -74,10 +78,6 @@ func newJavaArchiveParser(virtualPath string, reader io.Reader, detectNested boo return nil, cleanupFn, fmt.Errorf("unable to read files from java archive: %w", err) } - // fetch the last element of the virtual path - virtualElements := strings.Split(virtualPath, ":") - currentFilepath := virtualElements[len(virtualElements)-1] - return &archiveParser{ fileManifest: fileManifest, virtualPath: virtualPath, @@ -198,34 +198,33 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([ return pkgs, nil } -// discoverPkgsFromNestedArchives finds Java archives within Java archives, returning all listed Java packages found and -// associating each discovered package to the given parent package. func (j *archiveParser) discoverPkgsFromNestedArchives(parentPkg *pkg.Package) ([]*pkg.Package, []artifact.Relationship, error) { - var pkgs []*pkg.Package - var relationships []artifact.Relationship + // we know that all java archives are zip formatted files, so we can use the shared zip helper + return discoverPkgsFromZip(j.virtualPath, j.archivePath, j.contentPath, j.fileManifest, parentPkg) +} +// discoverPkgsFromZip finds Java archives within Java archives, returning all listed Java packages found and +// associating each discovered package to the given parent package. +func discoverPkgsFromZip(virtualPath, archivePath, contentPath string, fileManifest file.ZipFileManifest, parentPkg *pkg.Package) ([]*pkg.Package, []artifact.Relationship, error) { // search and parse pom.properties files & fetch the contents - openers, err := file.ExtractFromZipToUniqueTempFile(j.archivePath, j.contentPath, j.fileManifest.GlobMatch(archiveFormatGlobs...)...) + openers, err := file.ExtractFromZipToUniqueTempFile(archivePath, contentPath, fileManifest.GlobMatch(archiveFormatGlobs...)...) if err != nil { return nil, nil, fmt.Errorf("unable to extract files from zip: %w", err) } - // discover nested artifacts - for archivePath, archiveOpener := range openers { - archiveReadCloser, err := archiveOpener.Open() + return discoverPkgsFromOpeners(virtualPath, openers, parentPkg) +} + +// discoverPkgsFromOpeners finds Java archives within the given files and associates them with the given parent package. +func discoverPkgsFromOpeners(virtualPath string, openers map[string]file.Opener, parentPkg *pkg.Package) ([]*pkg.Package, []artifact.Relationship, error) { + var pkgs []*pkg.Package + var relationships []artifact.Relationship + + for pathWithinArchive, archiveOpener := range openers { + nestedPkgs, nestedRelationships, err := discoverPkgsFromOpener(virtualPath, pathWithinArchive, archiveOpener) if err != nil { - return nil, nil, fmt.Errorf("unable to open archived file from tempdir: %w", err) - } - nestedPath := fmt.Sprintf("%s:%s", j.virtualPath, archivePath) - nestedPkgs, nestedRelationships, err := parseJavaArchive(nestedPath, archiveReadCloser) - if err != nil { - if closeErr := archiveReadCloser.Close(); closeErr != nil { - log.Warnf("unable to close archived file from tempdir: %+v", closeErr) - } - return nil, nil, fmt.Errorf("unable to process nested java archive (%s): %w", archivePath, err) - } - if err = archiveReadCloser.Close(); err != nil { - return nil, nil, fmt.Errorf("unable to close archived file from tempdir: %w", err) + log.Warnf("unable to discover java packages from opener (%s): %+v", virtualPath, err) + continue } // attach the parent package to all discovered packages that are not already associated with a java archive @@ -245,6 +244,27 @@ func (j *archiveParser) discoverPkgsFromNestedArchives(parentPkg *pkg.Package) ( return pkgs, relationships, nil } +// discoverPkgsFromOpener finds Java archives within the given file. +func discoverPkgsFromOpener(virtualPath, pathWithinArchive string, archiveOpener file.Opener) ([]*pkg.Package, []artifact.Relationship, error) { + archiveReadCloser, err := archiveOpener.Open() + if err != nil { + return nil, nil, fmt.Errorf("unable to open archived file from tempdir: %w", err) + } + defer func() { + if closeErr := archiveReadCloser.Close(); closeErr != nil { + log.Warnf("unable to close archived file from tempdir: %+v", closeErr) + } + }() + + nestedPath := fmt.Sprintf("%s:%s", virtualPath, pathWithinArchive) + nestedPkgs, nestedRelationships, err := parseJavaArchive(nestedPath, archiveReadCloser) + if err != nil { + return nil, nil, fmt.Errorf("unable to process nested java archive (%s): %w", pathWithinArchive, err) + } + + return nestedPkgs, nestedRelationships, nil +} + func pomPropertiesByParentPath(archivePath string, extractPaths []string, virtualPath string) (map[string]pkg.PomProperties, error) { contentsOfMavenPropertiesFiles, err := file.ContentsFromZip(archivePath, extractPaths...) if err != nil { diff --git a/syft/pkg/cataloger/java/archive_parser_test.go b/syft/pkg/cataloger/java/archive_parser_test.go index fa9275db5..ac1b881c0 100644 --- a/syft/pkg/cataloger/java/archive_parser_test.go +++ b/syft/pkg/cataloger/java/archive_parser_test.go @@ -6,18 +6,17 @@ import ( "io" "os" "os/exec" + "path" "path/filepath" "strings" "syscall" "testing" - "github.com/stretchr/testify/assert" - "github.com/anchore/syft/internal" - "github.com/anchore/syft/syft/pkg" "github.com/go-test/deep" "github.com/gookit/color" + "github.com/stretchr/testify/assert" ) func generateJavaBuildFixture(t *testing.T, fixturePath string) { @@ -227,7 +226,7 @@ func TestParseJar(t *testing.T) { } for _, test := range tests { - t.Run(test.fixture, func(t *testing.T) { + t.Run(path.Base(test.fixture), func(t *testing.T) { generateJavaBuildFixture(t, test.fixture) diff --git a/syft/pkg/cataloger/java/cataloger.go b/syft/pkg/cataloger/java/cataloger.go index 036561bf8..3befe88b5 100644 --- a/syft/pkg/cataloger/java/cataloger.go +++ b/syft/pkg/cataloger/java/cataloger.go @@ -8,11 +8,27 @@ import ( ) // NewJavaCataloger returns a new Java archive cataloger object. -func NewJavaCataloger() *common.GenericCataloger { +func NewJavaCataloger(cfg Config) *common.GenericCataloger { globParsers := make(map[string]common.ParserFn) + + // java archive formats for _, pattern := range archiveFormatGlobs { globParsers[pattern] = parseJavaArchive } + if cfg.SearchIndexedArchives { + // java archives wrapped within zip files + for _, pattern := range genericZipGlobs { + globParsers[pattern] = parseZipWrappedJavaArchive + } + } + + if cfg.SearchUnindexedArchives { + // java archives wrapped within tar files + for _, pattern := range genericTarGlobs { + globParsers[pattern] = parseTarWrappedJavaArchive + } + } + return common.NewGenericCataloger(nil, globParsers, "java-cataloger") } diff --git a/syft/pkg/cataloger/java/config.go b/syft/pkg/cataloger/java/config.go new file mode 100644 index 000000000..84b940ac1 --- /dev/null +++ b/syft/pkg/cataloger/java/config.go @@ -0,0 +1,6 @@ +package java + +type Config struct { + SearchUnindexedArchives bool + SearchIndexedArchives bool +} diff --git a/syft/pkg/cataloger/java/save_archive_to_tmp.go b/syft/pkg/cataloger/java/save_archive_to_tmp.go index 69d786693..ff7c83a57 100644 --- a/syft/pkg/cataloger/java/save_archive_to_tmp.go +++ b/syft/pkg/cataloger/java/save_archive_to_tmp.go @@ -5,25 +5,27 @@ import ( "io" "io/ioutil" "os" + "path" "path/filepath" "github.com/anchore/syft/internal/log" ) -func saveArchiveToTmp(reader io.Reader) (string, string, func(), error) { - tempDir, err := ioutil.TempDir("", "syft-jar-contents-") +func saveArchiveToTmp(archiveVirtualPath string, reader io.Reader) (string, string, func(), error) { + name := path.Base(archiveVirtualPath) + tempDir, err := ioutil.TempDir("", "syft-archive-contents-") if err != nil { - return "", "", func() {}, fmt.Errorf("unable to create tempdir for jar processing: %w", err) + return "", "", func() {}, fmt.Errorf("unable to create tempdir for archive processing: %w", err) } cleanupFn := func() { err = os.RemoveAll(tempDir) if err != nil { - log.Errorf("unable to cleanup jar tempdir: %+v", err) + log.Errorf("unable to cleanup archive tempdir: %+v", err) } } - archivePath := filepath.Join(tempDir, "archive") + archivePath := filepath.Join(tempDir, "archive-"+name) contentDir := filepath.Join(tempDir, "contents") err = os.Mkdir(contentDir, 0755) diff --git a/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go b/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go new file mode 100644 index 000000000..4d0a60420 --- /dev/null +++ b/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go @@ -0,0 +1,68 @@ +package java + +import ( + "fmt" + "io" + + "github.com/anchore/syft/internal/file" + + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/common" +) + +// integrity check +var _ common.ParserFn = parseTarWrappedJavaArchive + +var genericTarGlobs = []string{ + "**/*.tar", + // gzipped tar + "**/*.tar.gz", + "**/*.tgz", + // bzip2 + "**/*.tar.bz", + "**/*.tar.bz2", + "**/*.tbz", + "**/*.tbz2", + // brotli + "**/*.tar.br", + "**/*.tbr", + // lz4 + "**/*.tar.lz4", + "**/*.tlz4", + // sz + "**/*.tar.sz", + "**/*.tsz", + // xz + "**/*.tar.xz", + "**/*.txz", + // zst + "**/*.tar.zst", +} + +// TODO: when the generic archive cataloger is implemented, this should be removed (https://github.com/anchore/syft/issues/246) + +// parseTarWrappedJavaArchive is a parser function for java archive contents contained within arbitrary tar files. +// note: for compressed tars this is an extremely expensive operation and can lead to performance degradation. This is +// due to the fact that there is no central directory header (say as in zip), which means that in order to get +// a file listing within the archive you must decompress the entire archive and seek through all of the entries. +func parseTarWrappedJavaArchive(virtualPath string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { + contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(virtualPath, reader) + // note: even on error, we should always run cleanup functions + defer cleanupFn() + if err != nil { + return nil, nil, err + } + + // look for java archives within the tar archive + return discoverPkgsFromTar(virtualPath, archivePath, contentPath) +} + +func discoverPkgsFromTar(virtualPath, archivePath, contentPath string) ([]*pkg.Package, []artifact.Relationship, error) { + openers, err := file.ExtractGlobsFromTarToUniqueTempFile(archivePath, contentPath, archiveFormatGlobs...) + if err != nil { + return nil, nil, fmt.Errorf("unable to extract files from tar: %w", err) + } + + return discoverPkgsFromOpeners(virtualPath, openers, nil) +} diff --git a/syft/pkg/cataloger/java/tar_wrapped_archive_parser_test.go b/syft/pkg/cataloger/java/tar_wrapped_archive_parser_test.go new file mode 100644 index 000000000..8a1f73162 --- /dev/null +++ b/syft/pkg/cataloger/java/tar_wrapped_archive_parser_test.go @@ -0,0 +1,53 @@ +package java + +import ( + "os" + "path" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/stretchr/testify/assert" +) + +func Test_parseTarWrappedJavaArchive(t *testing.T) { + tests := []struct { + fixture string + expected []string + }{ + { + fixture: "test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.tar", + expected: []string{ + "example-java-app-maven", + "joda-time", + }, + }, + { + fixture: "test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.tar.gz", + expected: []string{ + "example-java-app-maven", + "joda-time", + }, + }, + } + for _, test := range tests { + t.Run(path.Base(test.fixture), func(t *testing.T) { + generateJavaBuildFixture(t, test.fixture) + + fixture, err := os.Open(test.fixture) + if err != nil { + t.Fatalf("failed to open fixture: %+v", err) + } + + actualPkgs, _, err := parseTarWrappedJavaArchive(test.fixture, fixture) + require.NoError(t, err) + + var actualNames []string + for _, p := range actualPkgs { + actualNames = append(actualNames, p.Name) + } + + assert.ElementsMatch(t, test.expected, actualNames) + }) + } +} diff --git a/syft/pkg/cataloger/java/test-fixtures/java-builds/.gitignore b/syft/pkg/cataloger/java/test-fixtures/java-builds/.gitignore index b954422b0..1685225cc 100644 --- a/syft/pkg/cataloger/java/test-fixtures/java-builds/.gitignore +++ b/syft/pkg/cataloger/java/test-fixtures/java-builds/.gitignore @@ -1,4 +1,4 @@ -/packages/sb +/packages/* *.fingerprint # maven when running in a volume may spit out directories like this **/\?/ diff --git a/syft/pkg/cataloger/java/test-fixtures/java-builds/Makefile b/syft/pkg/cataloger/java/test-fixtures/java-builds/Makefile index 8c36af965..77141ad3e 100644 --- a/syft/pkg/cataloger/java/test-fixtures/java-builds/Makefile +++ b/syft/pkg/cataloger/java/test-fixtures/java-builds/Makefile @@ -4,14 +4,29 @@ ifndef PKGSDIR $(error PKGSDIR is not set) endif -all: $(PKGSDIR)/example-java-app-maven-0.1.0.jar $(PKGSDIR)/example-java-app-gradle-0.1.0.jar $(PKGSDIR)/example-jenkins-plugin.hpi $(PKGSDIR)/spring-boot-0.0.1-SNAPSHOT.jar +all: jars archives clean: clean-examples rm -f $(PKGSDIR)/* clean-examples: clean-gradle clean-maven clean-jenkins clean-nestedjar -.PHONY: maven gradle clean clean-gradle clean-maven clean-jenkins clean-examples clean-nestedjar +.PHONY: maven gradle clean clean-gradle clean-maven clean-jenkins clean-examples clean-nestedjar jars archives + +jars: $(PKGSDIR)/example-java-app-maven-0.1.0.jar $(PKGSDIR)/example-java-app-gradle-0.1.0.jar $(PKGSDIR)/example-jenkins-plugin.hpi $(PKGSDIR)/spring-boot-0.0.1-SNAPSHOT.jar + +archives: $(PKGSDIR)/example-java-app-maven-0.1.0.zip $(PKGSDIR)/example-java-app-maven-0.1.0.tar $(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz + +# jars within archives... + +$(PKGSDIR)/example-java-app-maven-0.1.0.zip: $(PKGSDIR)/example-java-app-maven-0.1.0.jar + zip $(PKGSDIR)/example-java-app-maven-0.1.0.zip $(PKGSDIR)/example-java-app-maven-0.1.0.jar + +$(PKGSDIR)/example-java-app-maven-0.1.0.tar: $(PKGSDIR)/example-java-app-maven-0.1.0.jar + tar -cvf $(PKGSDIR)/example-java-app-maven-0.1.0.tar $(PKGSDIR)/example-java-app-maven-0.1.0.jar + +$(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz: $(PKGSDIR)/example-java-app-maven-0.1.0.jar + tar -czvf $(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz $(PKGSDIR)/example-java-app-maven-0.1.0.jar # Nested jar... diff --git a/syft/pkg/cataloger/java/zip_wrapped_archive_parser.go b/syft/pkg/cataloger/java/zip_wrapped_archive_parser.go new file mode 100644 index 000000000..6e32ed428 --- /dev/null +++ b/syft/pkg/cataloger/java/zip_wrapped_archive_parser.go @@ -0,0 +1,43 @@ +package java + +import ( + "fmt" + "io" + + "github.com/anchore/syft/internal/file" + + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/common" +) + +// integrity check +var _ common.ParserFn = parseZipWrappedJavaArchive + +var genericZipGlobs = []string{ + "**/*.zip", +} + +// TODO: when the generic archive cataloger is implemented, this should be removed (https://github.com/anchore/syft/issues/246) + +// parseZipWrappedJavaArchive is a parser function for java archive contents contained within arbitrary zip files. +func parseZipWrappedJavaArchive(virtualPath string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { + contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(virtualPath, reader) + // note: even on error, we should always run cleanup functions + defer cleanupFn() + if err != nil { + return nil, nil, err + } + + // we use our zip helper functions instead of that from the archiver package or the standard lib. Why? These helper + // functions support zips with shell scripts prepended to the file. Specifically, the helpers use the central + // header at the end of the file to determine where the beginning of the zip payload is (unlike the standard lib + // or archiver). + fileManifest, err := file.NewZipFileManifest(archivePath) + if err != nil { + return nil, nil, fmt.Errorf("unable to read files from java archive: %w", err) + } + + // look for java archives within the zip archive + return discoverPkgsFromZip(virtualPath, archivePath, contentPath, fileManifest, nil) +} diff --git a/syft/pkg/cataloger/java/zip_wrapped_archive_parser_test.go b/syft/pkg/cataloger/java/zip_wrapped_archive_parser_test.go new file mode 100644 index 000000000..5f062fd25 --- /dev/null +++ b/syft/pkg/cataloger/java/zip_wrapped_archive_parser_test.go @@ -0,0 +1,45 @@ +package java + +import ( + "os" + "path" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func Test_parseZipWrappedJavaArchive(t *testing.T) { + tests := []struct { + fixture string + expected []string + }{ + { + fixture: "test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.zip", + expected: []string{ + "example-java-app-maven", + "joda-time", + }, + }, + } + for _, test := range tests { + t.Run(path.Base(test.fixture), func(t *testing.T) { + generateJavaBuildFixture(t, test.fixture) + + fixture, err := os.Open(test.fixture) + if err != nil { + t.Fatalf("failed to open fixture: %+v", err) + } + + actualPkgs, _, err := parseZipWrappedJavaArchive(test.fixture, fixture) + require.NoError(t, err) + + var actualNames []string + for _, p := range actualPkgs { + actualNames = append(actualNames, p.Name) + } + + assert.ElementsMatch(t, test.expected, actualNames) + }) + } +} diff --git a/syft/pkg/cataloger/javascript/parse_package_json.go b/syft/pkg/cataloger/javascript/parse_package_json.go index 9b0486d2c..64910616a 100644 --- a/syft/pkg/cataloger/javascript/parse_package_json.go +++ b/syft/pkg/cataloger/javascript/parse_package_json.go @@ -162,7 +162,7 @@ func (p PackageJSON) licensesFromJSON() ([]string, error) { } // parsePackageJSON parses a package.json and returns the discovered JavaScript packages. -func parsePackageJSON(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { +func parsePackageJSON(path string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) { var packages []*pkg.Package dec := json.NewDecoder(reader) @@ -175,7 +175,7 @@ func parsePackageJSON(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Re } if !p.hasNameAndVersionValues() { - log.Debug("encountered package.json file without a name and/or version field, ignoring this file") + log.Debugf("encountered package.json file without a name and/or version field, ignoring (path=%q)", path) return nil, nil, nil } diff --git a/syft/pkg/cataloger/search_config.go b/syft/pkg/cataloger/search_config.go new file mode 100644 index 000000000..f92dc9928 --- /dev/null +++ b/syft/pkg/cataloger/search_config.go @@ -0,0 +1,17 @@ +package cataloger + +import "github.com/anchore/syft/syft/source" + +type SearchConfig struct { + IncludeIndexedArchives bool + IncludeUnindexedArchives bool + Scope source.Scope +} + +func DefaultSearchConfig() SearchConfig { + return SearchConfig{ + IncludeIndexedArchives: true, + IncludeUnindexedArchives: false, + Scope: source.SquashedScope, + } +} diff --git a/syft/pkg/relationships_by_file_ownership.go b/syft/pkg/relationships_by_file_ownership.go index 62ba262c8..691dbbacb 100644 --- a/syft/pkg/relationships_by_file_ownership.go +++ b/syft/pkg/relationships_by_file_ownership.go @@ -3,7 +3,7 @@ package pkg import ( "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/artifact" - "github.com/bmatcuk/doublestar/v2" + "github.com/bmatcuk/doublestar/v4" "github.com/scylladb/go-set/strset" ) diff --git a/syft/source/mock_resolver.go b/syft/source/mock_resolver.go index 114233806..51d7edc49 100644 --- a/syft/source/mock_resolver.go +++ b/syft/source/mock_resolver.go @@ -5,7 +5,7 @@ import ( "io" "os" - "github.com/bmatcuk/doublestar/v2" + "github.com/bmatcuk/doublestar/v4" ) var _ FileResolver = (*MockResolver)(nil) diff --git a/syft/source/source.go b/syft/source/source.go index 236f70315..2f3ab8e80 100644 --- a/syft/source/source.go +++ b/syft/source/source.go @@ -16,7 +16,7 @@ import ( "github.com/anchore/stereoscope" "github.com/anchore/stereoscope/pkg/image" "github.com/anchore/syft/internal/log" - "github.com/bmatcuk/doublestar/v2" + "github.com/bmatcuk/doublestar/v4" "github.com/mholt/archiver/v3" "github.com/spf13/afero" ) diff --git a/test/cli/packages_cmd_test.go b/test/cli/packages_cmd_test.go index a605995b7..30fe87005 100644 --- a/test/cli/packages_cmd_test.go +++ b/test/cli/packages_cmd_test.go @@ -158,6 +158,21 @@ func TestPackagesCmdFlags(t *testing.T) { assertSuccessfulReturnCode, }, }, + { + name: "responds-to-package-cataloger-search-options", + args: []string{"packages", "-vv"}, + env: map[string]string{ + "SYFT_PACKAGE_SEARCH_UNINDEXED_ARCHIVES": "true", + "SYFT_PACKAGE_SEARCH_INDEXED_ARCHIVES": "false", + }, + assertions: []traitAssertion{ + // the application config in the log matches that of what we expect to have been configured. Note: + // we are not testing further wiring of this option, only that the config responds to + // package-cataloger-level options. + assertInOutput("search-unindexed-archives: true"), + assertInOutput("search-indexed-archives: false"), + }, + }, } for _, test := range tests { diff --git a/test/integration/catalog_packages_test.go b/test/integration/catalog_packages_test.go index 5ba3c9103..1ef285a86 100644 --- a/test/integration/catalog_packages_test.go +++ b/test/integration/catalog_packages_test.go @@ -20,7 +20,7 @@ func BenchmarkImagePackageCatalogers(b *testing.B) { tarPath := imagetest.GetFixtureImageTarPath(b, fixtureImageName) var pc *pkg.Catalog - for _, c := range cataloger.ImageCatalogers() { + for _, c := range cataloger.ImageCatalogers(cataloger.DefaultConfig()) { // in case of future alteration where state is persisted, assume no dependency is safe to reuse theSource, cleanupSource, err := source.New("docker-archive:"+tarPath, nil, nil) b.Cleanup(cleanupSource) diff --git a/test/integration/utils_test.go b/test/integration/utils_test.go index 5a52c1a97..6a6c65d09 100644 --- a/test/integration/utils_test.go +++ b/test/integration/utils_test.go @@ -3,6 +3,8 @@ package integration import ( "testing" + "github.com/anchore/syft/syft/pkg/cataloger" + "github.com/anchore/syft/syft/sbom" "github.com/anchore/stereoscope/pkg/imagetest" @@ -20,7 +22,10 @@ func catalogFixtureImage(t *testing.T, fixtureImageName string) (sbom.SBOM, *sou t.Fatalf("unable to get source: %+v", err) } - pkgCatalog, relationships, actualDistro, err := syft.CatalogPackages(theSource, source.SquashedScope) + // TODO: this would be better with functional options (after/during API refactor) + c := cataloger.DefaultConfig() + c.Search.Scope = source.SquashedScope + pkgCatalog, relationships, actualDistro, err := syft.CatalogPackages(theSource, c) if err != nil { t.Fatalf("failed to catalog image: %+v", err) } @@ -51,7 +56,10 @@ func catalogDirectory(t *testing.T, dir string) (sbom.SBOM, *source.Source) { t.Fatalf("unable to get source: %+v", err) } - pkgCatalog, relationships, actualDistro, err := syft.CatalogPackages(theSource, source.AllLayersScope) + // TODO: this would be better with functional options (after/during API refactor) + c := cataloger.DefaultConfig() + c.Search.Scope = source.AllLayersScope + pkgCatalog, relationships, actualDistro, err := syft.CatalogPackages(theSource, c) if err != nil { t.Fatalf("failed to catalog image: %+v", err) }