diff --git a/.gitignore b/.gitignore index ab7501da4..e4c0ffcd7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +go.work +go.work.sum /.bin CHANGELOG.md VERSION diff --git a/README.md b/README.md index 9a63a6d5f..70c293f67 100644 --- a/README.md +++ b/README.md @@ -605,7 +605,7 @@ file-metadata: # SYFT_FILE_METADATA_CATALOGER_SCOPE env var scope: "squashed" - # the file digest algorithms to use when cataloging files (options: "sha256", "md5", "sha1") + # the file digest algorithms to use when cataloging files (options: "md5", "sha1", "sha224", "sha256", "sha384", "sha512") # SYFT_FILE_METADATA_DIGESTS env var digests: ["sha256"] @@ -643,11 +643,27 @@ secrets: # SYFT_SECRETS_EXCLUDE_PATTERN_NAMES env var exclude-pattern-names: [] +# options that apply to all scan sources +source: + # alias name for the source + # SYFT_SOURCE_NAME env var; --source-name flag + name: "" + + # alias version for the source + # SYFT_SOURCE_VERSION env var; --source-version flag + version: "" + + # options affecting the file source type + file: + # the file digest algorithms to use on the scanned file (options: "md5", "sha1", "sha224", "sha256", "sha384", "sha512") + digests: ["sha256"] + # options when pulling directly from a registry via the "registry:" scheme registry: # skip TLS verification when communicating with the registry # SYFT_REGISTRY_INSECURE_SKIP_TLS_VERIFY env var insecure-skip-tls-verify: false + # use http instead of https when connecting to the registry # SYFT_REGISTRY_INSECURE_USE_HTTP env var insecure-use-http: false diff --git a/cmd/syft/cli/attest/attest.go b/cmd/syft/cli/attest/attest.go index 9ed452b35..cdd25ad32 100644 --- a/cmd/syft/cli/attest/attest.go +++ b/cmd/syft/cli/attest/attest.go @@ -18,6 +18,7 @@ import ( "github.com/anchore/syft/cmd/syft/cli/packages" "github.com/anchore/syft/internal/bus" "github.com/anchore/syft/internal/config" + "github.com/anchore/syft/internal/file" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/ui" "github.com/anchore/syft/syft" @@ -74,18 +75,23 @@ func buildSBOM(app *config.Application, userInput string, errs chan error) (*sbo } } + hashers, err := file.Hashers(app.Source.File.Digests...) + if err != nil { + return nil, fmt.Errorf("invalid hash: %w", err) + } + src, err := detection.NewSource( source.DetectionSourceConfig{ Alias: source.Alias{ - Name: app.SourceName, - Version: app.SourceVersion, + Name: app.Source.Name, + Version: app.Source.Version, }, RegistryOptions: app.Registry.ToOptions(), Platform: platform, Exclude: source.ExcludeConfig{ Paths: app.Exclusions, }, - DigestAlgorithms: nil, + DigestAlgorithms: hashers, }, ) diff --git a/cmd/syft/cli/eventloop/tasks.go b/cmd/syft/cli/eventloop/tasks.go index 4c0456542..b6121d0da 100644 --- a/cmd/syft/cli/eventloop/tasks.go +++ b/cmd/syft/cli/eventloop/tasks.go @@ -1,13 +1,10 @@ package eventloop import ( - "crypto" - "fmt" - "github.com/anchore/syft/internal/config" + "github.com/anchore/syft/internal/file" "github.com/anchore/syft/syft" "github.com/anchore/syft/syft/artifact" - "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/file/cataloger/filecontent" "github.com/anchore/syft/syft/file/cataloger/filedigest" "github.com/anchore/syft/syft/file/cataloger/filemetadata" @@ -89,23 +86,9 @@ func generateCatalogFileDigestsTask(app *config.Application) (Task, error) { return nil, nil } - supportedHashAlgorithms := make(map[string]crypto.Hash) - for _, h := range []crypto.Hash{ - crypto.MD5, - crypto.SHA1, - crypto.SHA256, - } { - supportedHashAlgorithms[file.DigestAlgorithmName(h)] = h - } - - var hashes []crypto.Hash - for _, hashStr := range app.FileMetadata.Digests { - name := file.CleanDigestAlgorithmName(hashStr) - hashObj, ok := supportedHashAlgorithms[name] - if !ok { - return nil, fmt.Errorf("unsupported hash algorithm: %s", hashStr) - } - hashes = append(hashes, hashObj) + hashes, err := file.Hashers(app.FileMetadata.Digests...) + if err != nil { + return nil, err } digestsCataloger := filedigest.NewCataloger(hashes) diff --git a/cmd/syft/cli/options/packages.go b/cmd/syft/cli/options/packages.go index f6992a948..bea013b69 100644 --- a/cmd/syft/cli/options/packages.go +++ b/cmd/syft/cli/options/packages.go @@ -86,11 +86,11 @@ func bindPackageConfigOptions(flags *pflag.FlagSet, v *viper.Viper) error { return err } - if err := v.BindPFlag("source-name", flags.Lookup("source-name")); err != nil { + if err := v.BindPFlag("source.name", flags.Lookup("source-name")); err != nil { return err } - if err := v.BindPFlag("source-version", flags.Lookup("source-version")); err != nil { + if err := v.BindPFlag("source.version", flags.Lookup("source-version")); err != nil { return err } diff --git a/cmd/syft/cli/packages/packages.go b/cmd/syft/cli/packages/packages.go index da5258193..53b2b860d 100644 --- a/cmd/syft/cli/packages/packages.go +++ b/cmd/syft/cli/packages/packages.go @@ -13,6 +13,7 @@ import ( "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/bus" "github.com/anchore/syft/internal/config" + "github.com/anchore/syft/internal/file" "github.com/anchore/syft/internal/ui" "github.com/anchore/syft/internal/version" "github.com/anchore/syft/syft" @@ -77,18 +78,24 @@ func execWorker(app *config.Application, userInput string, writer sbom.Writer) < } } + hashers, err := file.Hashers(app.Source.File.Digests...) + if err != nil { + errs <- fmt.Errorf("invalid hash: %w", err) + return + } + src, err := detection.NewSource( source.DetectionSourceConfig{ Alias: source.Alias{ - Name: app.SourceName, - Version: app.SourceVersion, + Name: app.Source.Name, + Version: app.Source.Version, }, RegistryOptions: app.Registry.ToOptions(), Platform: platform, Exclude: source.ExcludeConfig{ Paths: app.Exclusions, }, - DigestAlgorithms: nil, + DigestAlgorithms: hashers, }, ) diff --git a/cmd/syft/cli/poweruser/poweruser.go b/cmd/syft/cli/poweruser/poweruser.go index dd1b758fe..e37455ece 100644 --- a/cmd/syft/cli/poweruser/poweruser.go +++ b/cmd/syft/cli/poweruser/poweruser.go @@ -94,8 +94,8 @@ func execWorker(app *config.Application, userInput string, writer sbom.Writer) < src, err := detection.NewSource( source.DetectionSourceConfig{ Alias: source.Alias{ - Name: app.SourceName, - Version: app.SourceVersion, + Name: app.Source.Name, + Version: app.Source.Version, }, RegistryOptions: app.Registry.ToOptions(), Platform: platform, diff --git a/internal/config/application.go b/internal/config/application.go index 9f3274265..ea8541002 100644 --- a/internal/config/application.go +++ b/internal/config/application.go @@ -61,8 +61,7 @@ type Application struct { Exclusions []string `yaml:"exclude" json:"exclude" mapstructure:"exclude"` Platform string `yaml:"platform" json:"platform" mapstructure:"platform"` Name string `yaml:"name" json:"name" mapstructure:"name"` - SourceName string `yaml:"source-name" json:"source-name" mapstructure:"source-name"` - SourceVersion string `yaml:"source-version" json:"source-version" mapstructure:"source-version"` + Source sourceCfg `yaml:"source" json:"source" mapstructure:"source"` Parallelism int `yaml:"parallelism" json:"parallelism" mapstructure:"parallelism"` // the number of catalog workers to run in parallel DefaultImagePullSource string `yaml:"default-image-pull-source" json:"default-image-pull-source" mapstructure:"default-image-pull-source"` // specify default image pull source } @@ -147,8 +146,8 @@ func (cfg *Application) parseConfigValues() error { if cfg.Name != "" { log.Warnf("name parameter is deprecated. please use: source-name. name will be removed in a future version") - if cfg.SourceName == "" { - cfg.SourceName = cfg.Name + if cfg.Source.Name == "" { + cfg.Source.Name = cfg.Name } } diff --git a/internal/config/source.go b/internal/config/source.go new file mode 100644 index 000000000..5346f994f --- /dev/null +++ b/internal/config/source.go @@ -0,0 +1,17 @@ +package config + +import "github.com/spf13/viper" + +type sourceCfg struct { + Name string `json:"name" yaml:"name" mapstructure:"name"` + Version string `json:"version" yaml:"version" mapstructure:"version"` + File fileSource `json:"file" yaml:"file" mapstructure:"file"` +} + +type fileSource struct { + Digests []string `json:"digests" yaml:"digests" mapstructure:"digests"` +} + +func (cfg sourceCfg) loadDefaultValues(v *viper.Viper) { + v.SetDefault("source.file.digests", []string{"sha256"}) +} diff --git a/internal/file/digest.go b/internal/file/digest.go new file mode 100644 index 000000000..4bc8c4232 --- /dev/null +++ b/internal/file/digest.go @@ -0,0 +1,76 @@ +package file + +import ( + "crypto" + "fmt" + "hash" + "io" + "strings" + + "github.com/anchore/syft/syft/file" +) + +func supportedHashAlgorithms() []crypto.Hash { + return []crypto.Hash{ + crypto.MD5, + crypto.SHA1, + crypto.SHA224, + crypto.SHA256, + crypto.SHA384, + crypto.SHA512, + } +} + +func NewDigestsFromFile(closer io.ReadCloser, hashes []crypto.Hash) ([]file.Digest, error) { + // create a set of hasher objects tied together with a single writer to feed content into + hashers := make([]hash.Hash, len(hashes)) + writers := make([]io.Writer, len(hashes)) + for idx, hashObj := range hashes { + hashers[idx] = hashObj.New() + writers[idx] = hashers[idx] + } + + size, err := io.Copy(io.MultiWriter(writers...), closer) + if err != nil { + return nil, err + } + + if size == 0 { + return make([]file.Digest, 0), nil + } + + result := make([]file.Digest, len(hashes)) + // only capture digests when there is content. It is important to do this based on SIZE and not + // FILE TYPE. The reasoning is that it is possible for a tar to be crafted with a header-only + // file type but a body is still allowed. + for idx, hasher := range hashers { + result[idx] = file.Digest{ + Algorithm: CleanDigestAlgorithmName(hashes[idx].String()), + Value: fmt.Sprintf("%+x", hasher.Sum(nil)), + } + } + + return result, nil +} + +func Hashers(names ...string) ([]crypto.Hash, error) { + hashByName := make(map[string]crypto.Hash) + for _, h := range supportedHashAlgorithms() { + hashByName[CleanDigestAlgorithmName(h.String())] = h + } + + var hashers []crypto.Hash + for _, hashStr := range names { + hashObj, ok := hashByName[CleanDigestAlgorithmName(hashStr)] + if !ok { + return nil, fmt.Errorf("unsupported hash algorithm: %s", hashStr) + } + hashers = append(hashers, hashObj) + } + return hashers, nil +} + +func CleanDigestAlgorithmName(name string) string { + lower := strings.ToLower(name) + return strings.ReplaceAll(lower, "-", "") +} diff --git a/internal/file/digest_test.go b/internal/file/digest_test.go new file mode 100644 index 000000000..df50798f2 --- /dev/null +++ b/internal/file/digest_test.go @@ -0,0 +1,132 @@ +package file + +import ( + "crypto" + "os" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/syft/file" +) + +func TestCleanDigestAlgorithmName(t *testing.T) { + tests := []struct { + name string + input string + want string + }{ + { + name: "go case", + input: "SHA-256", + want: "sha256", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, CleanDigestAlgorithmName(tt.input)) + }) + } +} + +func TestNewDigestsFromFile(t *testing.T) { + require.NotEmpty(t, supportedHashAlgorithms()) + + tests := []struct { + name string + fixture string + hashes []crypto.Hash + want []file.Digest + wantErr require.ErrorAssertionFunc + }{ + { + name: "check supported hash algorithms", + fixture: "test-fixtures/digest.txt", + hashes: supportedHashAlgorithms(), + want: []file.Digest{ + { + Algorithm: "md5", + Value: "e8818a24402ae7f8b874cdd9350c1b51", + }, + { + Algorithm: "sha1", + Value: "eea4671d168c81fd52e615ed9fb3531a526f4748", + }, + { + Algorithm: "sha224", + Value: "fd993e84c7afb449d34bcae7c5ee118f5c73b50170da05171523b22c", + }, + { + Algorithm: "sha256", + Value: "cbf1a703b7e4a67529d6e17114880dfa9f879f3749872e1a9d4a20ac509165ad", + }, + { + Algorithm: "sha384", + Value: "1eaded3f17fb8d7b731c9175a0f355d3a35575c3cb6cdda46a5272b632968d7257a5e6437d0efae599a81a1b2dcc81ba", + }, + { + Algorithm: "sha512", + Value: "b49d5995456edba144dce750eaa8eae12af8fd08c076d401fcf78aac4172080feb70baaa5ed8c1b05046ec278446330fbf77e8ca9e60c03945ded761a641a7e1", + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + + fh, err := os.Open(tt.fixture) + require.NoError(t, err) + + got, err := NewDigestsFromFile(fh, tt.hashes) + tt.wantErr(t, err) + if err != nil { + return + } + assert.Equal(t, tt.want, got) + }) + } +} + +func TestHashers(t *testing.T) { + tests := []struct { + name string + names []string + want []crypto.Hash + wantErr require.ErrorAssertionFunc + }{ + { + name: "check supported hash algorithms", + names: []string{"MD-5", "shA1", "sHa224", "sha---256", "sha384", "sha512"}, + want: []crypto.Hash{ + crypto.MD5, + crypto.SHA1, + crypto.SHA224, + crypto.SHA256, + crypto.SHA384, + crypto.SHA512, + }, + }, + { + name: "error on unsupported hash algorithm", + names: []string{"made-up"}, + wantErr: require.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + got, err := Hashers(tt.names...) + tt.wantErr(t, err) + if err != nil { + return + } + assert.Equal(t, tt.want, got) + }) + } +} diff --git a/internal/file/test-fixtures/digest.txt b/internal/file/test-fixtures/digest.txt new file mode 100644 index 000000000..887a7ea37 --- /dev/null +++ b/internal/file/test-fixtures/digest.txt @@ -0,0 +1 @@ +hello, file! \ No newline at end of file diff --git a/syft/file/cataloger/filedigest/cataloger.go b/syft/file/cataloger/filedigest/cataloger.go index e06c05a35..31a4367ab 100644 --- a/syft/file/cataloger/filedigest/cataloger.go +++ b/syft/file/cataloger/filedigest/cataloger.go @@ -10,10 +10,11 @@ import ( stereoscopeFile "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/bus" + intFile "github.com/anchore/syft/internal/file" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/event" "github.com/anchore/syft/syft/file" - internal2 "github.com/anchore/syft/syft/file/cataloger/internal" + intCataloger "github.com/anchore/syft/syft/file/cataloger/internal" ) var ErrUndigestableFile = errors.New("undigestable file") @@ -33,7 +34,7 @@ func (i *Cataloger) Catalog(resolver file.Resolver, coordinates ...file.Coordina var locations []file.Location if len(coordinates) == 0 { - locations = internal2.AllRegularFiles(resolver) + locations = intCataloger.AllRegularFiles(resolver) } else { for _, c := range coordinates { locations = append(locations, file.NewLocationFromCoordinates(c)) @@ -82,7 +83,7 @@ func (i *Cataloger) catalogLocation(resolver file.Resolver, location file.Locati } defer internal.CloseAndLogError(contentReader, location.VirtualPath) - digests, err := file.NewDigestsFromFile(contentReader, i.hashes) + digests, err := intFile.NewDigestsFromFile(contentReader, i.hashes) if err != nil { return nil, internal.ErrPath{Context: "digests-cataloger", Path: location.RealPath, Err: err} } diff --git a/syft/file/cataloger/filedigest/cataloger_test.go b/syft/file/cataloger/filedigest/cataloger_test.go index 1ed1af588..9ebaceed0 100644 --- a/syft/file/cataloger/filedigest/cataloger_test.go +++ b/syft/file/cataloger/filedigest/cataloger_test.go @@ -13,6 +13,7 @@ import ( stereoscopeFile "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/imagetest" + intFile "github.com/anchore/syft/internal/file" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/source" ) @@ -40,7 +41,7 @@ func testDigests(t testing.TB, root string, files []string, hashes ...crypto.Has h := hash.New() h.Write(b) digests[file.NewLocation(f).Coordinates] = append(digests[file.NewLocation(f).Coordinates], file.Digest{ - Algorithm: file.CleanDigestAlgorithmName(hash.String()), + Algorithm: intFile.CleanDigestAlgorithmName(hash.String()), Value: fmt.Sprintf("%x", h.Sum(nil)), }) } diff --git a/syft/file/digest.go b/syft/file/digest.go index 23219e688..87b53dbb8 100644 --- a/syft/file/digest.go +++ b/syft/file/digest.go @@ -1,76 +1,6 @@ package file -import ( - "crypto" - "fmt" - "hash" - "io" - "strings" -) - type Digest struct { Algorithm string `json:"algorithm"` Value string `json:"value"` } - -func NewDigestsFromFile(closer io.ReadCloser, hashes []crypto.Hash) ([]Digest, error) { - // create a set of hasher objects tied together with a single writer to feed content into - hashers := make([]hash.Hash, len(hashes)) - writers := make([]io.Writer, len(hashes)) - for idx, hashObj := range hashes { - hashers[idx] = hashObj.New() - writers[idx] = hashers[idx] - } - - size, err := io.Copy(io.MultiWriter(writers...), closer) - if err != nil { - return nil, err - } - - if size == 0 { - return make([]Digest, 0), nil - } - - result := make([]Digest, len(hashes)) - // only capture digests when there is content. It is important to do this based on SIZE and not - // FILE TYPE. The reasoning is that it is possible for a tar to be crafted with a header-only - // file type but a body is still allowed. - for idx, hasher := range hashers { - result[idx] = Digest{ - Algorithm: DigestAlgorithmName(hashes[idx]), - Value: fmt.Sprintf("%+x", hasher.Sum(nil)), - } - } - - return result, nil -} - -func Hashers(names ...string) ([]crypto.Hash, error) { - supportedHashAlgorithms := make(map[string]crypto.Hash) - for _, h := range []crypto.Hash{ - crypto.MD5, - crypto.SHA1, - crypto.SHA256, - } { - supportedHashAlgorithms[DigestAlgorithmName(h)] = h - } - - var hashers []crypto.Hash - for _, hashStr := range names { - hashObj, ok := supportedHashAlgorithms[CleanDigestAlgorithmName(hashStr)] - if !ok { - return nil, fmt.Errorf("unsupported hash algorithm: %s", hashStr) - } - hashers = append(hashers, hashObj) - } - return hashers, nil -} - -func DigestAlgorithmName(hash crypto.Hash) string { - return CleanDigestAlgorithmName(hash.String()) -} - -func CleanDigestAlgorithmName(name string) string { - lower := strings.ToLower(name) - return strings.ReplaceAll(lower, "-", "") -} diff --git a/syft/formats/common/cyclonedxhelpers/external_references.go b/syft/formats/common/cyclonedxhelpers/external_references.go index da657de60..59f388717 100644 --- a/syft/formats/common/cyclonedxhelpers/external_references.go +++ b/syft/formats/common/cyclonedxhelpers/external_references.go @@ -6,6 +6,7 @@ import ( "github.com/CycloneDX/cyclonedx-go" + "github.com/anchore/syft/internal/file" syftFile "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/pkg" ) @@ -116,7 +117,7 @@ func decodeExternalReferences(c *cyclonedx.Component, metadata interface{}) { if ref.Hashes != nil { for _, hash := range *ref.Hashes { digests = append(digests, syftFile.Digest{ - Algorithm: syftFile.CleanDigestAlgorithmName(string(hash.Algorithm)), + Algorithm: file.CleanDigestAlgorithmName(string(hash.Algorithm)), Value: hash.Value, }) } diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index a1efd022d..ea216e906 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -179,7 +179,7 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) { defer archiveCloser.Close() // grab and assign digest for the entire archive - digests, err := file.NewDigestsFromFile(archiveCloser, javaArchiveHashes) + digests, err := intFile.NewDigestsFromFile(archiveCloser, javaArchiveHashes) if err != nil { log.Warnf("failed to create digest for file=%q: %+v", j.archivePath, err) } diff --git a/syft/source/file_source.go b/syft/source/file_source.go index 2025d0856..5adc81d97 100644 --- a/syft/source/file_source.go +++ b/syft/source/file_source.go @@ -13,6 +13,7 @@ import ( "github.com/opencontainers/go-digest" stereoFile "github.com/anchore/stereoscope/pkg/file" + intFile "github.com/anchore/syft/internal/file" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" @@ -67,7 +68,7 @@ func NewFromFile(cfg FileConfig) (*FileSource, error) { defer fh.Close() - digests, err = file.NewDigestsFromFile(fh, cfg.DigestAlgorithms) + digests, err = intFile.NewDigestsFromFile(fh, cfg.DigestAlgorithms) if err != nil { return nil, fmt.Errorf("unable to calculate digests for file=%q: %w", cfg.Path, err) }