add file source digest support (#1914)

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Alex Goodman 2023-07-05 13:47:13 -04:00 committed by GitHub
parent 6280146c81
commit cfbb9f703b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 286 additions and 113 deletions

2
.gitignore vendored
View File

@ -1,3 +1,5 @@
go.work
go.work.sum
/.bin
CHANGELOG.md
VERSION

View File

@ -605,7 +605,7 @@ file-metadata:
# SYFT_FILE_METADATA_CATALOGER_SCOPE env var
scope: "squashed"
# the file digest algorithms to use when cataloging files (options: "sha256", "md5", "sha1")
# the file digest algorithms to use when cataloging files (options: "md5", "sha1", "sha224", "sha256", "sha384", "sha512")
# SYFT_FILE_METADATA_DIGESTS env var
digests: ["sha256"]
@ -643,11 +643,27 @@ secrets:
# SYFT_SECRETS_EXCLUDE_PATTERN_NAMES env var
exclude-pattern-names: []
# options that apply to all scan sources
source:
# alias name for the source
# SYFT_SOURCE_NAME env var; --source-name flag
name: ""
# alias version for the source
# SYFT_SOURCE_VERSION env var; --source-version flag
version: ""
# options affecting the file source type
file:
# the file digest algorithms to use on the scanned file (options: "md5", "sha1", "sha224", "sha256", "sha384", "sha512")
digests: ["sha256"]
# options when pulling directly from a registry via the "registry:" scheme
registry:
# skip TLS verification when communicating with the registry
# SYFT_REGISTRY_INSECURE_SKIP_TLS_VERIFY env var
insecure-skip-tls-verify: false
# use http instead of https when connecting to the registry
# SYFT_REGISTRY_INSECURE_USE_HTTP env var
insecure-use-http: false

View File

@ -18,6 +18,7 @@ import (
"github.com/anchore/syft/cmd/syft/cli/packages"
"github.com/anchore/syft/internal/bus"
"github.com/anchore/syft/internal/config"
"github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/ui"
"github.com/anchore/syft/syft"
@ -74,18 +75,23 @@ func buildSBOM(app *config.Application, userInput string, errs chan error) (*sbo
}
}
hashers, err := file.Hashers(app.Source.File.Digests...)
if err != nil {
return nil, fmt.Errorf("invalid hash: %w", err)
}
src, err := detection.NewSource(
source.DetectionSourceConfig{
Alias: source.Alias{
Name: app.SourceName,
Version: app.SourceVersion,
Name: app.Source.Name,
Version: app.Source.Version,
},
RegistryOptions: app.Registry.ToOptions(),
Platform: platform,
Exclude: source.ExcludeConfig{
Paths: app.Exclusions,
},
DigestAlgorithms: nil,
DigestAlgorithms: hashers,
},
)

View File

@ -1,13 +1,10 @@
package eventloop
import (
"crypto"
"fmt"
"github.com/anchore/syft/internal/config"
"github.com/anchore/syft/internal/file"
"github.com/anchore/syft/syft"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/file/cataloger/filecontent"
"github.com/anchore/syft/syft/file/cataloger/filedigest"
"github.com/anchore/syft/syft/file/cataloger/filemetadata"
@ -89,23 +86,9 @@ func generateCatalogFileDigestsTask(app *config.Application) (Task, error) {
return nil, nil
}
supportedHashAlgorithms := make(map[string]crypto.Hash)
for _, h := range []crypto.Hash{
crypto.MD5,
crypto.SHA1,
crypto.SHA256,
} {
supportedHashAlgorithms[file.DigestAlgorithmName(h)] = h
}
var hashes []crypto.Hash
for _, hashStr := range app.FileMetadata.Digests {
name := file.CleanDigestAlgorithmName(hashStr)
hashObj, ok := supportedHashAlgorithms[name]
if !ok {
return nil, fmt.Errorf("unsupported hash algorithm: %s", hashStr)
}
hashes = append(hashes, hashObj)
hashes, err := file.Hashers(app.FileMetadata.Digests...)
if err != nil {
return nil, err
}
digestsCataloger := filedigest.NewCataloger(hashes)

View File

@ -86,11 +86,11 @@ func bindPackageConfigOptions(flags *pflag.FlagSet, v *viper.Viper) error {
return err
}
if err := v.BindPFlag("source-name", flags.Lookup("source-name")); err != nil {
if err := v.BindPFlag("source.name", flags.Lookup("source-name")); err != nil {
return err
}
if err := v.BindPFlag("source-version", flags.Lookup("source-version")); err != nil {
if err := v.BindPFlag("source.version", flags.Lookup("source-version")); err != nil {
return err
}

View File

@ -13,6 +13,7 @@ import (
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/bus"
"github.com/anchore/syft/internal/config"
"github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/ui"
"github.com/anchore/syft/internal/version"
"github.com/anchore/syft/syft"
@ -77,18 +78,24 @@ func execWorker(app *config.Application, userInput string, writer sbom.Writer) <
}
}
hashers, err := file.Hashers(app.Source.File.Digests...)
if err != nil {
errs <- fmt.Errorf("invalid hash: %w", err)
return
}
src, err := detection.NewSource(
source.DetectionSourceConfig{
Alias: source.Alias{
Name: app.SourceName,
Version: app.SourceVersion,
Name: app.Source.Name,
Version: app.Source.Version,
},
RegistryOptions: app.Registry.ToOptions(),
Platform: platform,
Exclude: source.ExcludeConfig{
Paths: app.Exclusions,
},
DigestAlgorithms: nil,
DigestAlgorithms: hashers,
},
)

View File

@ -94,8 +94,8 @@ func execWorker(app *config.Application, userInput string, writer sbom.Writer) <
src, err := detection.NewSource(
source.DetectionSourceConfig{
Alias: source.Alias{
Name: app.SourceName,
Version: app.SourceVersion,
Name: app.Source.Name,
Version: app.Source.Version,
},
RegistryOptions: app.Registry.ToOptions(),
Platform: platform,

View File

@ -61,8 +61,7 @@ type Application struct {
Exclusions []string `yaml:"exclude" json:"exclude" mapstructure:"exclude"`
Platform string `yaml:"platform" json:"platform" mapstructure:"platform"`
Name string `yaml:"name" json:"name" mapstructure:"name"`
SourceName string `yaml:"source-name" json:"source-name" mapstructure:"source-name"`
SourceVersion string `yaml:"source-version" json:"source-version" mapstructure:"source-version"`
Source sourceCfg `yaml:"source" json:"source" mapstructure:"source"`
Parallelism int `yaml:"parallelism" json:"parallelism" mapstructure:"parallelism"` // the number of catalog workers to run in parallel
DefaultImagePullSource string `yaml:"default-image-pull-source" json:"default-image-pull-source" mapstructure:"default-image-pull-source"` // specify default image pull source
}
@ -147,8 +146,8 @@ func (cfg *Application) parseConfigValues() error {
if cfg.Name != "" {
log.Warnf("name parameter is deprecated. please use: source-name. name will be removed in a future version")
if cfg.SourceName == "" {
cfg.SourceName = cfg.Name
if cfg.Source.Name == "" {
cfg.Source.Name = cfg.Name
}
}

17
internal/config/source.go Normal file
View File

@ -0,0 +1,17 @@
package config
import "github.com/spf13/viper"
type sourceCfg struct {
Name string `json:"name" yaml:"name" mapstructure:"name"`
Version string `json:"version" yaml:"version" mapstructure:"version"`
File fileSource `json:"file" yaml:"file" mapstructure:"file"`
}
type fileSource struct {
Digests []string `json:"digests" yaml:"digests" mapstructure:"digests"`
}
func (cfg sourceCfg) loadDefaultValues(v *viper.Viper) {
v.SetDefault("source.file.digests", []string{"sha256"})
}

76
internal/file/digest.go Normal file
View File

@ -0,0 +1,76 @@
package file
import (
"crypto"
"fmt"
"hash"
"io"
"strings"
"github.com/anchore/syft/syft/file"
)
func supportedHashAlgorithms() []crypto.Hash {
return []crypto.Hash{
crypto.MD5,
crypto.SHA1,
crypto.SHA224,
crypto.SHA256,
crypto.SHA384,
crypto.SHA512,
}
}
func NewDigestsFromFile(closer io.ReadCloser, hashes []crypto.Hash) ([]file.Digest, error) {
// create a set of hasher objects tied together with a single writer to feed content into
hashers := make([]hash.Hash, len(hashes))
writers := make([]io.Writer, len(hashes))
for idx, hashObj := range hashes {
hashers[idx] = hashObj.New()
writers[idx] = hashers[idx]
}
size, err := io.Copy(io.MultiWriter(writers...), closer)
if err != nil {
return nil, err
}
if size == 0 {
return make([]file.Digest, 0), nil
}
result := make([]file.Digest, len(hashes))
// only capture digests when there is content. It is important to do this based on SIZE and not
// FILE TYPE. The reasoning is that it is possible for a tar to be crafted with a header-only
// file type but a body is still allowed.
for idx, hasher := range hashers {
result[idx] = file.Digest{
Algorithm: CleanDigestAlgorithmName(hashes[idx].String()),
Value: fmt.Sprintf("%+x", hasher.Sum(nil)),
}
}
return result, nil
}
func Hashers(names ...string) ([]crypto.Hash, error) {
hashByName := make(map[string]crypto.Hash)
for _, h := range supportedHashAlgorithms() {
hashByName[CleanDigestAlgorithmName(h.String())] = h
}
var hashers []crypto.Hash
for _, hashStr := range names {
hashObj, ok := hashByName[CleanDigestAlgorithmName(hashStr)]
if !ok {
return nil, fmt.Errorf("unsupported hash algorithm: %s", hashStr)
}
hashers = append(hashers, hashObj)
}
return hashers, nil
}
func CleanDigestAlgorithmName(name string) string {
lower := strings.ToLower(name)
return strings.ReplaceAll(lower, "-", "")
}

View File

@ -0,0 +1,132 @@
package file
import (
"crypto"
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/file"
)
func TestCleanDigestAlgorithmName(t *testing.T) {
tests := []struct {
name string
input string
want string
}{
{
name: "go case",
input: "SHA-256",
want: "sha256",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.want, CleanDigestAlgorithmName(tt.input))
})
}
}
func TestNewDigestsFromFile(t *testing.T) {
require.NotEmpty(t, supportedHashAlgorithms())
tests := []struct {
name string
fixture string
hashes []crypto.Hash
want []file.Digest
wantErr require.ErrorAssertionFunc
}{
{
name: "check supported hash algorithms",
fixture: "test-fixtures/digest.txt",
hashes: supportedHashAlgorithms(),
want: []file.Digest{
{
Algorithm: "md5",
Value: "e8818a24402ae7f8b874cdd9350c1b51",
},
{
Algorithm: "sha1",
Value: "eea4671d168c81fd52e615ed9fb3531a526f4748",
},
{
Algorithm: "sha224",
Value: "fd993e84c7afb449d34bcae7c5ee118f5c73b50170da05171523b22c",
},
{
Algorithm: "sha256",
Value: "cbf1a703b7e4a67529d6e17114880dfa9f879f3749872e1a9d4a20ac509165ad",
},
{
Algorithm: "sha384",
Value: "1eaded3f17fb8d7b731c9175a0f355d3a35575c3cb6cdda46a5272b632968d7257a5e6437d0efae599a81a1b2dcc81ba",
},
{
Algorithm: "sha512",
Value: "b49d5995456edba144dce750eaa8eae12af8fd08c076d401fcf78aac4172080feb70baaa5ed8c1b05046ec278446330fbf77e8ca9e60c03945ded761a641a7e1",
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.wantErr == nil {
tt.wantErr = require.NoError
}
fh, err := os.Open(tt.fixture)
require.NoError(t, err)
got, err := NewDigestsFromFile(fh, tt.hashes)
tt.wantErr(t, err)
if err != nil {
return
}
assert.Equal(t, tt.want, got)
})
}
}
func TestHashers(t *testing.T) {
tests := []struct {
name string
names []string
want []crypto.Hash
wantErr require.ErrorAssertionFunc
}{
{
name: "check supported hash algorithms",
names: []string{"MD-5", "shA1", "sHa224", "sha---256", "sha384", "sha512"},
want: []crypto.Hash{
crypto.MD5,
crypto.SHA1,
crypto.SHA224,
crypto.SHA256,
crypto.SHA384,
crypto.SHA512,
},
},
{
name: "error on unsupported hash algorithm",
names: []string{"made-up"},
wantErr: require.Error,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.wantErr == nil {
tt.wantErr = require.NoError
}
got, err := Hashers(tt.names...)
tt.wantErr(t, err)
if err != nil {
return
}
assert.Equal(t, tt.want, got)
})
}
}

View File

@ -0,0 +1 @@
hello, file!

View File

@ -10,10 +10,11 @@ import (
stereoscopeFile "github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/bus"
intFile "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/event"
"github.com/anchore/syft/syft/file"
internal2 "github.com/anchore/syft/syft/file/cataloger/internal"
intCataloger "github.com/anchore/syft/syft/file/cataloger/internal"
)
var ErrUndigestableFile = errors.New("undigestable file")
@ -33,7 +34,7 @@ func (i *Cataloger) Catalog(resolver file.Resolver, coordinates ...file.Coordina
var locations []file.Location
if len(coordinates) == 0 {
locations = internal2.AllRegularFiles(resolver)
locations = intCataloger.AllRegularFiles(resolver)
} else {
for _, c := range coordinates {
locations = append(locations, file.NewLocationFromCoordinates(c))
@ -82,7 +83,7 @@ func (i *Cataloger) catalogLocation(resolver file.Resolver, location file.Locati
}
defer internal.CloseAndLogError(contentReader, location.VirtualPath)
digests, err := file.NewDigestsFromFile(contentReader, i.hashes)
digests, err := intFile.NewDigestsFromFile(contentReader, i.hashes)
if err != nil {
return nil, internal.ErrPath{Context: "digests-cataloger", Path: location.RealPath, Err: err}
}

View File

@ -13,6 +13,7 @@ import (
stereoscopeFile "github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/stereoscope/pkg/imagetest"
intFile "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/source"
)
@ -40,7 +41,7 @@ func testDigests(t testing.TB, root string, files []string, hashes ...crypto.Has
h := hash.New()
h.Write(b)
digests[file.NewLocation(f).Coordinates] = append(digests[file.NewLocation(f).Coordinates], file.Digest{
Algorithm: file.CleanDigestAlgorithmName(hash.String()),
Algorithm: intFile.CleanDigestAlgorithmName(hash.String()),
Value: fmt.Sprintf("%x", h.Sum(nil)),
})
}

View File

@ -1,76 +1,6 @@
package file
import (
"crypto"
"fmt"
"hash"
"io"
"strings"
)
type Digest struct {
Algorithm string `json:"algorithm"`
Value string `json:"value"`
}
func NewDigestsFromFile(closer io.ReadCloser, hashes []crypto.Hash) ([]Digest, error) {
// create a set of hasher objects tied together with a single writer to feed content into
hashers := make([]hash.Hash, len(hashes))
writers := make([]io.Writer, len(hashes))
for idx, hashObj := range hashes {
hashers[idx] = hashObj.New()
writers[idx] = hashers[idx]
}
size, err := io.Copy(io.MultiWriter(writers...), closer)
if err != nil {
return nil, err
}
if size == 0 {
return make([]Digest, 0), nil
}
result := make([]Digest, len(hashes))
// only capture digests when there is content. It is important to do this based on SIZE and not
// FILE TYPE. The reasoning is that it is possible for a tar to be crafted with a header-only
// file type but a body is still allowed.
for idx, hasher := range hashers {
result[idx] = Digest{
Algorithm: DigestAlgorithmName(hashes[idx]),
Value: fmt.Sprintf("%+x", hasher.Sum(nil)),
}
}
return result, nil
}
func Hashers(names ...string) ([]crypto.Hash, error) {
supportedHashAlgorithms := make(map[string]crypto.Hash)
for _, h := range []crypto.Hash{
crypto.MD5,
crypto.SHA1,
crypto.SHA256,
} {
supportedHashAlgorithms[DigestAlgorithmName(h)] = h
}
var hashers []crypto.Hash
for _, hashStr := range names {
hashObj, ok := supportedHashAlgorithms[CleanDigestAlgorithmName(hashStr)]
if !ok {
return nil, fmt.Errorf("unsupported hash algorithm: %s", hashStr)
}
hashers = append(hashers, hashObj)
}
return hashers, nil
}
func DigestAlgorithmName(hash crypto.Hash) string {
return CleanDigestAlgorithmName(hash.String())
}
func CleanDigestAlgorithmName(name string) string {
lower := strings.ToLower(name)
return strings.ReplaceAll(lower, "-", "")
}

View File

@ -6,6 +6,7 @@ import (
"github.com/CycloneDX/cyclonedx-go"
"github.com/anchore/syft/internal/file"
syftFile "github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
)
@ -116,7 +117,7 @@ func decodeExternalReferences(c *cyclonedx.Component, metadata interface{}) {
if ref.Hashes != nil {
for _, hash := range *ref.Hashes {
digests = append(digests, syftFile.Digest{
Algorithm: syftFile.CleanDigestAlgorithmName(string(hash.Algorithm)),
Algorithm: file.CleanDigestAlgorithmName(string(hash.Algorithm)),
Value: hash.Value,
})
}

View File

@ -179,7 +179,7 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
defer archiveCloser.Close()
// grab and assign digest for the entire archive
digests, err := file.NewDigestsFromFile(archiveCloser, javaArchiveHashes)
digests, err := intFile.NewDigestsFromFile(archiveCloser, javaArchiveHashes)
if err != nil {
log.Warnf("failed to create digest for file=%q: %+v", j.archivePath, err)
}

View File

@ -13,6 +13,7 @@ import (
"github.com/opencontainers/go-digest"
stereoFile "github.com/anchore/stereoscope/pkg/file"
intFile "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
@ -67,7 +68,7 @@ func NewFromFile(cfg FileConfig) (*FileSource, error) {
defer fh.Close()
digests, err = file.NewDigestsFromFile(fh, cfg.DigestAlgorithms)
digests, err = intFile.NewDigestsFromFile(fh, cfg.DigestAlgorithms)
if err != nil {
return nil, fmt.Errorf("unable to calculate digests for file=%q: %w", cfg.Path, err)
}