Add support for searching for jars within archives (#734)

* add support for searching jars within archives

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add package cataloger config options

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* address review comments + factor out safeCopy helper

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* update config docs regarding package archive search options

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* show that unindexed archive cataloging defaults to false

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* remove lies about -s

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* address review comments

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* update search archive note about java

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2022-01-06 16:40:51 -05:00 committed by GitHub
parent 01dc78ccc3
commit 38c4b17847
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
35 changed files with 530 additions and 109 deletions

View File

@ -244,6 +244,18 @@ exclude:
# cataloging packages is exposed through the packages and power-user subcommands
package:
# search within archives that do contain a file index to search against (zip)
# note: for now this only applies to the java package cataloger
# SYFT_PACKAGE_SEARCH_INDEXED_ARCHIVES env var
search-indexed-archives: true
# search within archives that do not contain a file index to search against (tar, tar.gz, tar.bz2, etc)
# note: enabling this may result in a performance impact since all discovered compressed tars will be decompressed
# note: for now this only applies to the java package cataloger
# SYFT_PACKAGE_SEARCH_UNINDEXED_ARCHIVES env var
search-unindexed-archives: false
cataloger:
# enable/disable cataloging of packages
# SYFT_PACKAGE_CATALOGER_ENABLED env var

View File

@ -17,6 +17,7 @@ import (
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/event"
"github.com/anchore/syft/syft/format"
"github.com/anchore/syft/syft/pkg/cataloger"
"github.com/anchore/syft/syft/sbom"
"github.com/anchore/syft/syft/source"
"github.com/pkg/profile"
@ -98,7 +99,7 @@ func setPackageFlags(flags *pflag.FlagSet) {
// Formatting & Input options //////////////////////////////////////////////
flags.StringP(
"scope", "s", source.SquashedScope.String(),
"scope", "s", cataloger.DefaultSearchConfig().Scope.String(),
fmt.Sprintf("selection of layers to catalog, options=%v", source.AllScopes))
flags.StringP(

View File

@ -45,7 +45,7 @@ func generateCatalogPackagesTask() (task, error) {
}
task := func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
packageCatalog, relationships, theDistro, err := syft.CatalogPackages(src, appConfig.Package.Cataloger.ScopeOpt)
packageCatalog, relationships, theDistro, err := syft.CatalogPackages(src, appConfig.Package.ToConfig())
if err != nil {
return nil, err
}

2
go.mod
View File

@ -18,7 +18,7 @@ require (
// go: warning: github.com/andybalholm/brotli@v1.0.1: retracted by module author: occasional panics and data corruption
github.com/andybalholm/brotli v1.0.4 // indirect
github.com/antihax/optional v1.0.0
github.com/bmatcuk/doublestar/v2 v2.0.4
github.com/bmatcuk/doublestar/v4 v4.0.2
github.com/docker/docker v20.10.11+incompatible
github.com/dustin/go-humanize v1.0.0
github.com/facebookincubator/nvdtools v0.1.4

2
go.sum
View File

@ -135,8 +135,6 @@ github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edY
github.com/bketelsen/crypt v0.0.4/go.mod h1:aI6NrJ0pMGgvZKL1iVgXLnfIFJtfV+bKCoqOes/6LfM=
github.com/blang/semver v3.1.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
github.com/bmatcuk/doublestar/v2 v2.0.4 h1:6I6oUiT/sU27eE2OFcWqBhL1SwjyvQuOssxT4a1yidI=
github.com/bmatcuk/doublestar/v2 v2.0.4/go.mod h1:QMmcs3H2AUQICWhfzLXz+IYln8lRQmTZRptLie8RgRw=
github.com/bmatcuk/doublestar/v4 v4.0.2 h1:X0krlUVAVmtr2cRoTqR8aDMrDqnB36ht8wpWTiQ3jsA=
github.com/bmatcuk/doublestar/v4 v4.0.2/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc=
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4=

View File

@ -38,7 +38,7 @@ type Application struct {
CliOptions CliOnlyOptions `yaml:"-" json:"-"` // all options only available through the CLI (not via env vars or config)
Dev development `yaml:"dev" json:"dev" mapstructure:"dev"`
Log logging `yaml:"log" json:"log" mapstructure:"log"` // all logging-related options
Package packages `yaml:"package" json:"package" mapstructure:"package"`
Package pkg `yaml:"package" json:"package" mapstructure:"package"`
FileMetadata FileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"`
FileClassification fileClassification `yaml:"file-classification" json:"file-classification" mapstructure:"file-classification"`
FileContents fileContents `yaml:"file-contents" json:"file-contents" mapstructure:"file-contents"`

View File

@ -3,6 +3,8 @@ package config
import (
"fmt"
"github.com/spf13/viper"
"github.com/anchore/syft/syft/source"
)
@ -12,6 +14,10 @@ type catalogerOptions struct {
ScopeOpt source.Scope `yaml:"-" json:"-"`
}
func (cfg catalogerOptions) loadDefaultValues(v *viper.Viper) {
v.SetDefault("package.cataloger.enabled", true)
}
func (cfg *catalogerOptions) parseConfigValues() error {
scopeOption := source.ParseScope(cfg.Scope)
if scopeOption == source.UnknownScope {

View File

@ -1,15 +0,0 @@
package config
import "github.com/spf13/viper"
type packages struct {
Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"`
}
func (cfg packages) loadDefaultValues(v *viper.Viper) {
v.SetDefault("package.cataloger.enabled", true)
}
func (cfg *packages) parseConfigValues() error {
return cfg.Cataloger.parseConfigValues()
}

33
internal/config/pkg.go Normal file
View File

@ -0,0 +1,33 @@
package config
import (
"github.com/anchore/syft/syft/pkg/cataloger"
"github.com/spf13/viper"
)
type pkg struct {
Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"`
SearchUnindexedArchives bool `yaml:"search-unindexed-archives" json:"search-unindexed-archives" mapstructure:"search-unindexed-archives"`
SearchIndexedArchives bool `yaml:"search-indexed-archives" json:"search-indexed-archives" mapstructure:"search-indexed-archives"`
}
func (cfg pkg) loadDefaultValues(v *viper.Viper) {
cfg.Cataloger.loadDefaultValues(v)
c := cataloger.DefaultSearchConfig()
v.SetDefault("package.search-unindexed-archives", c.IncludeUnindexedArchives)
v.SetDefault("package.search-indexed-archives", c.IncludeIndexedArchives)
}
func (cfg *pkg) parseConfigValues() error {
return cfg.Cataloger.parseConfigValues()
}
func (cfg pkg) ToConfig() cataloger.Config {
return cataloger.Config{
Search: cataloger.SearchConfig{
IncludeIndexedArchives: cfg.SearchIndexedArchives,
IncludeUnindexedArchives: cfg.SearchUnindexedArchives,
Scope: cfg.Cataloger.ScopeOpt,
},
}
}

19
internal/file/copy.go Normal file
View File

@ -0,0 +1,19 @@
package file
import (
"errors"
"fmt"
"io"
)
const perFileReadLimit = 2 * GB
// safeCopy limits the copy from the reader. This is useful when extracting files from archives to
// protect against decompression bomb attacks.
func safeCopy(writer io.Writer, reader io.Reader) error {
numBytes, err := io.Copy(writer, io.LimitReader(reader, perFileReadLimit))
if numBytes >= perFileReadLimit || errors.Is(err, io.EOF) {
return fmt.Errorf("zip read limit hit (potential decompression bomb attack)")
}
return nil
}

View File

@ -0,0 +1,64 @@
package file
import (
"fmt"
"io/ioutil"
"path/filepath"
"github.com/bmatcuk/doublestar/v4"
"github.com/mholt/archiver/v3"
)
// ExtractGlobsFromTarToUniqueTempFile extracts paths matching the given globs within the given archive to a temporary directory, returning file openers for each file extracted.
func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...string) (map[string]Opener, error) {
results := make(map[string]Opener)
// don't allow for full traversal, only select traversal from given paths
if len(globs) == 0 {
return results, nil
}
visitor := func(file archiver.File) error {
defer file.Close()
// ignore directories
if file.FileInfo.IsDir() {
return nil
}
// ignore any filename that doesn't match the given globs...
if !matchesAnyGlob(file.Name(), globs...) {
return nil
}
// we have a file we want to extract....
tempfilePrefix := filepath.Base(filepath.Clean(file.Name())) + "-"
tempFile, err := ioutil.TempFile(dir, tempfilePrefix)
if err != nil {
return fmt.Errorf("unable to create temp file: %w", err)
}
// we shouldn't try and keep the tempfile open as the returned result may have several files, which takes up
// resources (leading to "too many open files"). Instead we'll return a file opener to the caller which
// provides a ReadCloser. It is up to the caller to handle closing the file explicitly.
defer tempFile.Close()
if err := safeCopy(tempFile, file.ReadCloser); err != nil {
return fmt.Errorf("unable to copy source=%q for tar=%q: %w", file.Name(), archivePath, err)
}
results[file.Name()] = Opener{path: tempFile.Name()}
return nil
}
return results, archiver.Walk(archivePath, visitor)
}
func matchesAnyGlob(name string, globs ...string) bool {
for _, glob := range globs {
if matches, err := doublestar.PathMatch(glob, name); err == nil && matches {
return true
}
}
return false
}

View File

@ -3,9 +3,7 @@ package file
import (
"archive/zip"
"bytes"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
@ -22,8 +20,6 @@ const (
GB
)
const perFileReadLimit = 2 * GB
type errZipSlipDetected struct {
Prefix string
JoinArgs []string
@ -110,21 +106,10 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m
return fmt.Errorf("unable to extract directories, only files: %s", file.Name)
}
// limit the zip reader on each file read to prevent decompression bomb attacks
numBytes, err := io.Copy(tempFile, io.LimitReader(zippedFile, perFileReadLimit))
if numBytes >= perFileReadLimit || errors.Is(err, io.EOF) {
return fmt.Errorf("zip read limit hit (potential decompression bomb attack)")
}
if err != nil {
if err := safeCopy(tempFile, zippedFile); err != nil {
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err)
}
// the file pointer is at the end due to the copy operation, reset back to the beginning
_, err = tempFile.Seek(0, io.SeekStart)
if err != nil {
return fmt.Errorf("unable to reset file pointer (%s): %w", tempFile.Name(), err)
}
results[file.Name] = Opener{path: tempFile.Name()}
return nil
@ -153,13 +138,7 @@ func ContentsFromZip(archivePath string, paths ...string) (map[string]string, er
}
var buffer bytes.Buffer
// limit the zip reader on each file read to prevent decompression bomb attacks
numBytes, err := io.Copy(&buffer, io.LimitReader(zippedFile, perFileReadLimit))
if numBytes >= perFileReadLimit || errors.Is(err, io.EOF) {
return fmt.Errorf("zip read limit hit (potential decompression bomb attack)")
}
if err != nil {
if err := safeCopy(&buffer, zippedFile); err != nil {
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err)
}
@ -228,12 +207,7 @@ func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) err
return fmt.Errorf("unable to create dest file=%q from zip=%q: %w", expandedFilePath, archivePath, err)
}
// limit the zip reader on each file read to prevent decompression bomb attacks
numBytes, err := io.Copy(outputFile, io.LimitReader(zippedFile, perFileReadLimit))
if numBytes >= perFileReadLimit || errors.Is(err, io.EOF) {
return fmt.Errorf("zip read limit hit (potential decompression bomb attack)")
}
if err != nil {
if err := safeCopy(outputFile, zippedFile); err != nil {
return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.Name, outputFile.Name(), archivePath, err)
}

View File

@ -4,7 +4,7 @@ import (
"fmt"
"regexp"
"github.com/bmatcuk/doublestar/v2"
"github.com/bmatcuk/doublestar/v4"
"github.com/hashicorp/go-multierror"
)

View File

@ -1,5 +1,5 @@
/*
A "one-stop-shop" for helper utilities for all major functionality provided by child packages of the syft library.
Package syft is a "one-stop-shop" for helper utilities for all major functionality provided by child packages of the syft library.
Here is what the main execution path for syft does:
@ -34,8 +34,8 @@ import (
// CatalogPackages takes an inventory of packages from the given image from a particular perspective
// (e.g. squashed source, all-layers source). Returns the discovered set of packages, the identified Linux
// distribution, and the source object used to wrap the data source.
func CatalogPackages(src *source.Source, scope source.Scope) (*pkg.Catalog, []artifact.Relationship, *distro.Distro, error) {
resolver, err := src.FileResolver(scope)
func CatalogPackages(src *source.Source, cfg cataloger.Config) (*pkg.Catalog, []artifact.Relationship, *distro.Distro, error) {
resolver, err := src.FileResolver(cfg.Search.Scope)
if err != nil {
return nil, nil, nil, fmt.Errorf("unable to determine resolver while cataloging packages: %w", err)
}
@ -53,13 +53,13 @@ func CatalogPackages(src *source.Source, scope source.Scope) (*pkg.Catalog, []ar
switch src.Metadata.Scheme {
case source.ImageScheme:
log.Info("cataloging image")
catalogers = cataloger.ImageCatalogers()
catalogers = cataloger.ImageCatalogers(cfg)
case source.FileScheme:
log.Info("cataloging file")
catalogers = cataloger.AllCatalogers()
catalogers = cataloger.AllCatalogers(cfg)
case source.DirectoryScheme:
log.Info("cataloging directory")
catalogers = cataloger.DirectoryCatalogers()
catalogers = cataloger.DirectoryCatalogers(cfg)
default:
return nil, nil, nil, fmt.Errorf("unable to determine cataloger set from scheme=%+v", src.Metadata.Scheme)
}

View File

@ -32,7 +32,7 @@ type Cataloger interface {
}
// ImageCatalogers returns a slice of locally implemented catalogers that are fit for detecting installations of packages.
func ImageCatalogers() []Cataloger {
func ImageCatalogers(cfg Config) []Cataloger {
return []Cataloger{
ruby.NewGemSpecCataloger(),
python.NewPythonPackageCataloger(),
@ -40,14 +40,14 @@ func ImageCatalogers() []Cataloger {
javascript.NewJavascriptPackageCataloger(),
deb.NewDpkgdbCataloger(),
rpmdb.NewRpmdbCataloger(),
java.NewJavaCataloger(),
java.NewJavaCataloger(cfg.Java()),
apkdb.NewApkdbCataloger(),
golang.NewGoModuleBinaryCataloger(),
}
}
// DirectoryCatalogers returns a slice of locally implemented catalogers that are fit for detecting packages from index files (and select installations)
func DirectoryCatalogers() []Cataloger {
func DirectoryCatalogers(cfg Config) []Cataloger {
return []Cataloger{
ruby.NewGemFileLockCataloger(),
python.NewPythonIndexCataloger(),
@ -56,7 +56,7 @@ func DirectoryCatalogers() []Cataloger {
javascript.NewJavascriptLockCataloger(),
deb.NewDpkgdbCataloger(),
rpmdb.NewRpmdbCataloger(),
java.NewJavaCataloger(),
java.NewJavaCataloger(cfg.Java()),
apkdb.NewApkdbCataloger(),
golang.NewGoModuleBinaryCataloger(),
golang.NewGoModFileCataloger(),
@ -65,7 +65,7 @@ func DirectoryCatalogers() []Cataloger {
}
// AllCatalogers returns all implemented catalogers
func AllCatalogers() []Cataloger {
func AllCatalogers(cfg Config) []Cataloger {
return []Cataloger{
ruby.NewGemFileLockCataloger(),
ruby.NewGemSpecCataloger(),
@ -75,7 +75,7 @@ func AllCatalogers() []Cataloger {
javascript.NewJavascriptPackageCataloger(),
deb.NewDpkgdbCataloger(),
rpmdb.NewRpmdbCataloger(),
java.NewJavaCataloger(),
java.NewJavaCataloger(cfg.Java()),
apkdb.NewApkdbCataloger(),
golang.NewGoModuleBinaryCataloger(),
golang.NewGoModFileCataloger(),

View File

@ -0,0 +1,22 @@
package cataloger
import (
"github.com/anchore/syft/syft/pkg/cataloger/java"
)
type Config struct {
Search SearchConfig
}
func DefaultConfig() Config {
return Config{
Search: DefaultSearchConfig(),
}
}
func (c Config) Java() java.Config {
return java.Config{
SearchUnindexedArchives: c.Search.IncludeUnindexedArchives,
SearchIndexedArchives: c.Search.IncludeIndexedArchives,
}
}

View File

@ -64,7 +64,11 @@ func uniquePkgKey(p *pkg.Package) string {
// newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover
// and parse nested archives or ignore them.
func newJavaArchiveParser(virtualPath string, reader io.Reader, detectNested bool) (*archiveParser, func(), error) {
contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(reader)
// fetch the last element of the virtual path
virtualElements := strings.Split(virtualPath, ":")
currentFilepath := virtualElements[len(virtualElements)-1]
contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(currentFilepath, reader)
if err != nil {
return nil, cleanupFn, fmt.Errorf("unable to process java archive: %w", err)
}
@ -74,10 +78,6 @@ func newJavaArchiveParser(virtualPath string, reader io.Reader, detectNested boo
return nil, cleanupFn, fmt.Errorf("unable to read files from java archive: %w", err)
}
// fetch the last element of the virtual path
virtualElements := strings.Split(virtualPath, ":")
currentFilepath := virtualElements[len(virtualElements)-1]
return &archiveParser{
fileManifest: fileManifest,
virtualPath: virtualPath,
@ -198,34 +198,33 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([
return pkgs, nil
}
// discoverPkgsFromNestedArchives finds Java archives within Java archives, returning all listed Java packages found and
// associating each discovered package to the given parent package.
func (j *archiveParser) discoverPkgsFromNestedArchives(parentPkg *pkg.Package) ([]*pkg.Package, []artifact.Relationship, error) {
var pkgs []*pkg.Package
var relationships []artifact.Relationship
// we know that all java archives are zip formatted files, so we can use the shared zip helper
return discoverPkgsFromZip(j.virtualPath, j.archivePath, j.contentPath, j.fileManifest, parentPkg)
}
// discoverPkgsFromZip finds Java archives within Java archives, returning all listed Java packages found and
// associating each discovered package to the given parent package.
func discoverPkgsFromZip(virtualPath, archivePath, contentPath string, fileManifest file.ZipFileManifest, parentPkg *pkg.Package) ([]*pkg.Package, []artifact.Relationship, error) {
// search and parse pom.properties files & fetch the contents
openers, err := file.ExtractFromZipToUniqueTempFile(j.archivePath, j.contentPath, j.fileManifest.GlobMatch(archiveFormatGlobs...)...)
openers, err := file.ExtractFromZipToUniqueTempFile(archivePath, contentPath, fileManifest.GlobMatch(archiveFormatGlobs...)...)
if err != nil {
return nil, nil, fmt.Errorf("unable to extract files from zip: %w", err)
}
// discover nested artifacts
for archivePath, archiveOpener := range openers {
archiveReadCloser, err := archiveOpener.Open()
return discoverPkgsFromOpeners(virtualPath, openers, parentPkg)
}
// discoverPkgsFromOpeners finds Java archives within the given files and associates them with the given parent package.
func discoverPkgsFromOpeners(virtualPath string, openers map[string]file.Opener, parentPkg *pkg.Package) ([]*pkg.Package, []artifact.Relationship, error) {
var pkgs []*pkg.Package
var relationships []artifact.Relationship
for pathWithinArchive, archiveOpener := range openers {
nestedPkgs, nestedRelationships, err := discoverPkgsFromOpener(virtualPath, pathWithinArchive, archiveOpener)
if err != nil {
return nil, nil, fmt.Errorf("unable to open archived file from tempdir: %w", err)
}
nestedPath := fmt.Sprintf("%s:%s", j.virtualPath, archivePath)
nestedPkgs, nestedRelationships, err := parseJavaArchive(nestedPath, archiveReadCloser)
if err != nil {
if closeErr := archiveReadCloser.Close(); closeErr != nil {
log.Warnf("unable to close archived file from tempdir: %+v", closeErr)
}
return nil, nil, fmt.Errorf("unable to process nested java archive (%s): %w", archivePath, err)
}
if err = archiveReadCloser.Close(); err != nil {
return nil, nil, fmt.Errorf("unable to close archived file from tempdir: %w", err)
log.Warnf("unable to discover java packages from opener (%s): %+v", virtualPath, err)
continue
}
// attach the parent package to all discovered packages that are not already associated with a java archive
@ -245,6 +244,27 @@ func (j *archiveParser) discoverPkgsFromNestedArchives(parentPkg *pkg.Package) (
return pkgs, relationships, nil
}
// discoverPkgsFromOpener finds Java archives within the given file.
func discoverPkgsFromOpener(virtualPath, pathWithinArchive string, archiveOpener file.Opener) ([]*pkg.Package, []artifact.Relationship, error) {
archiveReadCloser, err := archiveOpener.Open()
if err != nil {
return nil, nil, fmt.Errorf("unable to open archived file from tempdir: %w", err)
}
defer func() {
if closeErr := archiveReadCloser.Close(); closeErr != nil {
log.Warnf("unable to close archived file from tempdir: %+v", closeErr)
}
}()
nestedPath := fmt.Sprintf("%s:%s", virtualPath, pathWithinArchive)
nestedPkgs, nestedRelationships, err := parseJavaArchive(nestedPath, archiveReadCloser)
if err != nil {
return nil, nil, fmt.Errorf("unable to process nested java archive (%s): %w", pathWithinArchive, err)
}
return nestedPkgs, nestedRelationships, nil
}
func pomPropertiesByParentPath(archivePath string, extractPaths []string, virtualPath string) (map[string]pkg.PomProperties, error) {
contentsOfMavenPropertiesFiles, err := file.ContentsFromZip(archivePath, extractPaths...)
if err != nil {

View File

@ -6,18 +6,17 @@ import (
"io"
"os"
"os/exec"
"path"
"path/filepath"
"strings"
"syscall"
"testing"
"github.com/stretchr/testify/assert"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/pkg"
"github.com/go-test/deep"
"github.com/gookit/color"
"github.com/stretchr/testify/assert"
)
func generateJavaBuildFixture(t *testing.T, fixturePath string) {
@ -227,7 +226,7 @@ func TestParseJar(t *testing.T) {
}
for _, test := range tests {
t.Run(test.fixture, func(t *testing.T) {
t.Run(path.Base(test.fixture), func(t *testing.T) {
generateJavaBuildFixture(t, test.fixture)

View File

@ -8,11 +8,27 @@ import (
)
// NewJavaCataloger returns a new Java archive cataloger object.
func NewJavaCataloger() *common.GenericCataloger {
func NewJavaCataloger(cfg Config) *common.GenericCataloger {
globParsers := make(map[string]common.ParserFn)
// java archive formats
for _, pattern := range archiveFormatGlobs {
globParsers[pattern] = parseJavaArchive
}
if cfg.SearchIndexedArchives {
// java archives wrapped within zip files
for _, pattern := range genericZipGlobs {
globParsers[pattern] = parseZipWrappedJavaArchive
}
}
if cfg.SearchUnindexedArchives {
// java archives wrapped within tar files
for _, pattern := range genericTarGlobs {
globParsers[pattern] = parseTarWrappedJavaArchive
}
}
return common.NewGenericCataloger(nil, globParsers, "java-cataloger")
}

View File

@ -0,0 +1,6 @@
package java
type Config struct {
SearchUnindexedArchives bool
SearchIndexedArchives bool
}

View File

@ -5,25 +5,27 @@ import (
"io"
"io/ioutil"
"os"
"path"
"path/filepath"
"github.com/anchore/syft/internal/log"
)
func saveArchiveToTmp(reader io.Reader) (string, string, func(), error) {
tempDir, err := ioutil.TempDir("", "syft-jar-contents-")
func saveArchiveToTmp(archiveVirtualPath string, reader io.Reader) (string, string, func(), error) {
name := path.Base(archiveVirtualPath)
tempDir, err := ioutil.TempDir("", "syft-archive-contents-")
if err != nil {
return "", "", func() {}, fmt.Errorf("unable to create tempdir for jar processing: %w", err)
return "", "", func() {}, fmt.Errorf("unable to create tempdir for archive processing: %w", err)
}
cleanupFn := func() {
err = os.RemoveAll(tempDir)
if err != nil {
log.Errorf("unable to cleanup jar tempdir: %+v", err)
log.Errorf("unable to cleanup archive tempdir: %+v", err)
}
}
archivePath := filepath.Join(tempDir, "archive")
archivePath := filepath.Join(tempDir, "archive-"+name)
contentDir := filepath.Join(tempDir, "contents")
err = os.Mkdir(contentDir, 0755)

View File

@ -0,0 +1,68 @@
package java
import (
"fmt"
"io"
"github.com/anchore/syft/internal/file"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common"
)
// integrity check
var _ common.ParserFn = parseTarWrappedJavaArchive
var genericTarGlobs = []string{
"**/*.tar",
// gzipped tar
"**/*.tar.gz",
"**/*.tgz",
// bzip2
"**/*.tar.bz",
"**/*.tar.bz2",
"**/*.tbz",
"**/*.tbz2",
// brotli
"**/*.tar.br",
"**/*.tbr",
// lz4
"**/*.tar.lz4",
"**/*.tlz4",
// sz
"**/*.tar.sz",
"**/*.tsz",
// xz
"**/*.tar.xz",
"**/*.txz",
// zst
"**/*.tar.zst",
}
// TODO: when the generic archive cataloger is implemented, this should be removed (https://github.com/anchore/syft/issues/246)
// parseTarWrappedJavaArchive is a parser function for java archive contents contained within arbitrary tar files.
// note: for compressed tars this is an extremely expensive operation and can lead to performance degradation. This is
// due to the fact that there is no central directory header (say as in zip), which means that in order to get
// a file listing within the archive you must decompress the entire archive and seek through all of the entries.
func parseTarWrappedJavaArchive(virtualPath string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(virtualPath, reader)
// note: even on error, we should always run cleanup functions
defer cleanupFn()
if err != nil {
return nil, nil, err
}
// look for java archives within the tar archive
return discoverPkgsFromTar(virtualPath, archivePath, contentPath)
}
func discoverPkgsFromTar(virtualPath, archivePath, contentPath string) ([]*pkg.Package, []artifact.Relationship, error) {
openers, err := file.ExtractGlobsFromTarToUniqueTempFile(archivePath, contentPath, archiveFormatGlobs...)
if err != nil {
return nil, nil, fmt.Errorf("unable to extract files from tar: %w", err)
}
return discoverPkgsFromOpeners(virtualPath, openers, nil)
}

View File

@ -0,0 +1,53 @@
package java
import (
"os"
"path"
"testing"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/assert"
)
func Test_parseTarWrappedJavaArchive(t *testing.T) {
tests := []struct {
fixture string
expected []string
}{
{
fixture: "test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.tar",
expected: []string{
"example-java-app-maven",
"joda-time",
},
},
{
fixture: "test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.tar.gz",
expected: []string{
"example-java-app-maven",
"joda-time",
},
},
}
for _, test := range tests {
t.Run(path.Base(test.fixture), func(t *testing.T) {
generateJavaBuildFixture(t, test.fixture)
fixture, err := os.Open(test.fixture)
if err != nil {
t.Fatalf("failed to open fixture: %+v", err)
}
actualPkgs, _, err := parseTarWrappedJavaArchive(test.fixture, fixture)
require.NoError(t, err)
var actualNames []string
for _, p := range actualPkgs {
actualNames = append(actualNames, p.Name)
}
assert.ElementsMatch(t, test.expected, actualNames)
})
}
}

View File

@ -1,4 +1,4 @@
/packages/sb
/packages/*
*.fingerprint
# maven when running in a volume may spit out directories like this
**/\?/

View File

@ -4,14 +4,29 @@ ifndef PKGSDIR
$(error PKGSDIR is not set)
endif
all: $(PKGSDIR)/example-java-app-maven-0.1.0.jar $(PKGSDIR)/example-java-app-gradle-0.1.0.jar $(PKGSDIR)/example-jenkins-plugin.hpi $(PKGSDIR)/spring-boot-0.0.1-SNAPSHOT.jar
all: jars archives
clean: clean-examples
rm -f $(PKGSDIR)/*
clean-examples: clean-gradle clean-maven clean-jenkins clean-nestedjar
.PHONY: maven gradle clean clean-gradle clean-maven clean-jenkins clean-examples clean-nestedjar
.PHONY: maven gradle clean clean-gradle clean-maven clean-jenkins clean-examples clean-nestedjar jars archives
jars: $(PKGSDIR)/example-java-app-maven-0.1.0.jar $(PKGSDIR)/example-java-app-gradle-0.1.0.jar $(PKGSDIR)/example-jenkins-plugin.hpi $(PKGSDIR)/spring-boot-0.0.1-SNAPSHOT.jar
archives: $(PKGSDIR)/example-java-app-maven-0.1.0.zip $(PKGSDIR)/example-java-app-maven-0.1.0.tar $(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz
# jars within archives...
$(PKGSDIR)/example-java-app-maven-0.1.0.zip: $(PKGSDIR)/example-java-app-maven-0.1.0.jar
zip $(PKGSDIR)/example-java-app-maven-0.1.0.zip $(PKGSDIR)/example-java-app-maven-0.1.0.jar
$(PKGSDIR)/example-java-app-maven-0.1.0.tar: $(PKGSDIR)/example-java-app-maven-0.1.0.jar
tar -cvf $(PKGSDIR)/example-java-app-maven-0.1.0.tar $(PKGSDIR)/example-java-app-maven-0.1.0.jar
$(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz: $(PKGSDIR)/example-java-app-maven-0.1.0.jar
tar -czvf $(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz $(PKGSDIR)/example-java-app-maven-0.1.0.jar
# Nested jar...

View File

@ -0,0 +1,43 @@
package java
import (
"fmt"
"io"
"github.com/anchore/syft/internal/file"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common"
)
// integrity check
var _ common.ParserFn = parseZipWrappedJavaArchive
var genericZipGlobs = []string{
"**/*.zip",
}
// TODO: when the generic archive cataloger is implemented, this should be removed (https://github.com/anchore/syft/issues/246)
// parseZipWrappedJavaArchive is a parser function for java archive contents contained within arbitrary zip files.
func parseZipWrappedJavaArchive(virtualPath string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(virtualPath, reader)
// note: even on error, we should always run cleanup functions
defer cleanupFn()
if err != nil {
return nil, nil, err
}
// we use our zip helper functions instead of that from the archiver package or the standard lib. Why? These helper
// functions support zips with shell scripts prepended to the file. Specifically, the helpers use the central
// header at the end of the file to determine where the beginning of the zip payload is (unlike the standard lib
// or archiver).
fileManifest, err := file.NewZipFileManifest(archivePath)
if err != nil {
return nil, nil, fmt.Errorf("unable to read files from java archive: %w", err)
}
// look for java archives within the zip archive
return discoverPkgsFromZip(virtualPath, archivePath, contentPath, fileManifest, nil)
}

View File

@ -0,0 +1,45 @@
package java
import (
"os"
"path"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func Test_parseZipWrappedJavaArchive(t *testing.T) {
tests := []struct {
fixture string
expected []string
}{
{
fixture: "test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.zip",
expected: []string{
"example-java-app-maven",
"joda-time",
},
},
}
for _, test := range tests {
t.Run(path.Base(test.fixture), func(t *testing.T) {
generateJavaBuildFixture(t, test.fixture)
fixture, err := os.Open(test.fixture)
if err != nil {
t.Fatalf("failed to open fixture: %+v", err)
}
actualPkgs, _, err := parseZipWrappedJavaArchive(test.fixture, fixture)
require.NoError(t, err)
var actualNames []string
for _, p := range actualPkgs {
actualNames = append(actualNames, p.Name)
}
assert.ElementsMatch(t, test.expected, actualNames)
})
}
}

View File

@ -162,7 +162,7 @@ func (p PackageJSON) licensesFromJSON() ([]string, error) {
}
// parsePackageJSON parses a package.json and returns the discovered JavaScript packages.
func parsePackageJSON(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
func parsePackageJSON(path string, reader io.Reader) ([]*pkg.Package, []artifact.Relationship, error) {
var packages []*pkg.Package
dec := json.NewDecoder(reader)
@ -175,7 +175,7 @@ func parsePackageJSON(_ string, reader io.Reader) ([]*pkg.Package, []artifact.Re
}
if !p.hasNameAndVersionValues() {
log.Debug("encountered package.json file without a name and/or version field, ignoring this file")
log.Debugf("encountered package.json file without a name and/or version field, ignoring (path=%q)", path)
return nil, nil, nil
}

View File

@ -0,0 +1,17 @@
package cataloger
import "github.com/anchore/syft/syft/source"
type SearchConfig struct {
IncludeIndexedArchives bool
IncludeUnindexedArchives bool
Scope source.Scope
}
func DefaultSearchConfig() SearchConfig {
return SearchConfig{
IncludeIndexedArchives: true,
IncludeUnindexedArchives: false,
Scope: source.SquashedScope,
}
}

View File

@ -3,7 +3,7 @@ package pkg
import (
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/bmatcuk/doublestar/v2"
"github.com/bmatcuk/doublestar/v4"
"github.com/scylladb/go-set/strset"
)

View File

@ -5,7 +5,7 @@ import (
"io"
"os"
"github.com/bmatcuk/doublestar/v2"
"github.com/bmatcuk/doublestar/v4"
)
var _ FileResolver = (*MockResolver)(nil)

View File

@ -16,7 +16,7 @@ import (
"github.com/anchore/stereoscope"
"github.com/anchore/stereoscope/pkg/image"
"github.com/anchore/syft/internal/log"
"github.com/bmatcuk/doublestar/v2"
"github.com/bmatcuk/doublestar/v4"
"github.com/mholt/archiver/v3"
"github.com/spf13/afero"
)

View File

@ -158,6 +158,21 @@ func TestPackagesCmdFlags(t *testing.T) {
assertSuccessfulReturnCode,
},
},
{
name: "responds-to-package-cataloger-search-options",
args: []string{"packages", "-vv"},
env: map[string]string{
"SYFT_PACKAGE_SEARCH_UNINDEXED_ARCHIVES": "true",
"SYFT_PACKAGE_SEARCH_INDEXED_ARCHIVES": "false",
},
assertions: []traitAssertion{
// the application config in the log matches that of what we expect to have been configured. Note:
// we are not testing further wiring of this option, only that the config responds to
// package-cataloger-level options.
assertInOutput("search-unindexed-archives: true"),
assertInOutput("search-indexed-archives: false"),
},
},
}
for _, test := range tests {

View File

@ -20,7 +20,7 @@ func BenchmarkImagePackageCatalogers(b *testing.B) {
tarPath := imagetest.GetFixtureImageTarPath(b, fixtureImageName)
var pc *pkg.Catalog
for _, c := range cataloger.ImageCatalogers() {
for _, c := range cataloger.ImageCatalogers(cataloger.DefaultConfig()) {
// in case of future alteration where state is persisted, assume no dependency is safe to reuse
theSource, cleanupSource, err := source.New("docker-archive:"+tarPath, nil, nil)
b.Cleanup(cleanupSource)

View File

@ -3,6 +3,8 @@ package integration
import (
"testing"
"github.com/anchore/syft/syft/pkg/cataloger"
"github.com/anchore/syft/syft/sbom"
"github.com/anchore/stereoscope/pkg/imagetest"
@ -20,7 +22,10 @@ func catalogFixtureImage(t *testing.T, fixtureImageName string) (sbom.SBOM, *sou
t.Fatalf("unable to get source: %+v", err)
}
pkgCatalog, relationships, actualDistro, err := syft.CatalogPackages(theSource, source.SquashedScope)
// TODO: this would be better with functional options (after/during API refactor)
c := cataloger.DefaultConfig()
c.Search.Scope = source.SquashedScope
pkgCatalog, relationships, actualDistro, err := syft.CatalogPackages(theSource, c)
if err != nil {
t.Fatalf("failed to catalog image: %+v", err)
}
@ -51,7 +56,10 @@ func catalogDirectory(t *testing.T, dir string) (sbom.SBOM, *source.Source) {
t.Fatalf("unable to get source: %+v", err)
}
pkgCatalog, relationships, actualDistro, err := syft.CatalogPackages(theSource, source.AllLayersScope)
// TODO: this would be better with functional options (after/during API refactor)
c := cataloger.DefaultConfig()
c.Search.Scope = source.AllLayersScope
pkgCatalog, relationships, actualDistro, err := syft.CatalogPackages(theSource, c)
if err != nil {
t.Fatalf("failed to catalog image: %+v", err)
}