add configurable task collection backend

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2022-06-06 22:02:39 -04:00
parent 078dbedfb6
commit a5dd485672
No known key found for this signature in database
GPG Key ID: 5CB45AE22BAB7EA7
60 changed files with 1073 additions and 644 deletions

View File

@ -193,6 +193,12 @@ func attestationExecWorker(si source.Input, format sbom.Format, predicateType st
go func() { go func() {
defer close(errs) defer close(errs)
catalogingConfig, err := appConfig.ToCatalogingConfig()
if err != nil {
errs <- err
return
}
src, cleanup, err := source.NewFromRegistry(si, appConfig.Registry.ToOptions(), appConfig.Exclusions) src, cleanup, err := source.NewFromRegistry(si, appConfig.Registry.ToOptions(), appConfig.Exclusions)
if cleanup != nil { if cleanup != nil {
defer cleanup() defer cleanup()
@ -202,7 +208,7 @@ func attestationExecWorker(si source.Input, format sbom.Format, predicateType st
return return
} }
s, err := generateSBOM(src) s, err := generateSBOM(src, catalogingConfig)
if err != nil { if err != nil {
errs <- err errs <- err
return return

View File

@ -94,7 +94,7 @@ func init() {
func setPackageFlags(flags *pflag.FlagSet) { func setPackageFlags(flags *pflag.FlagSet) {
// Formatting & Input options ////////////////////////////////////////////// // Formatting & Input options //////////////////////////////////////////////
flags.StringP( flags.StringP(
"scope", "s", syft.DefaultCatalogingConfig().Scope.String(), "scope", "s", syft.DefaultCatalogingConfig().DefaultScope.String(),
fmt.Sprintf("selection of layers to catalog, options=%v", source.AllScopes)) fmt.Sprintf("selection of layers to catalog, options=%v", source.AllScopes))
flags.StringArrayP( flags.StringArrayP(
@ -165,7 +165,7 @@ func bindExclusivePackagesConfigOptions(flags *pflag.FlagSet) error {
// note: output is not included since this configuration option is shared between multiple subcommands // note: output is not included since this configuration option is shared between multiple subcommands
if err := viper.BindPFlag("package.cataloger.scope", flags.Lookup("scope")); err != nil { if err := viper.BindPFlag("scope", flags.Lookup("scope")); err != nil {
return err return err
} }
@ -257,15 +257,9 @@ func isVerbose() (result bool) {
return appConfig.CliOptions.Verbosity > 0 || isPipedInput return appConfig.CliOptions.Verbosity > 0 || isPipedInput
} }
func generateSBOM(src *source.Source) (*sbom.SBOM, error) { func generateSBOM(src *source.Source, config *syft.CatalogingConfig) (*sbom.SBOM, error) {
catalogingConfig, err := appConfig.ToCatalogingConfig()
if err != nil {
return nil, err
}
return syft.Catalog(src, return syft.Catalog(src,
syft.WithConfig(*catalogingConfig), syft.WithConfig(*config),
syft.WithDefaultPackageCatalogers(appConfig.Package.ToConfig()),
) )
} }
@ -274,6 +268,12 @@ func packagesExecWorker(si source.Input, writer sbom.Writer) <-chan error {
go func() { go func() {
defer close(errs) defer close(errs)
catalogingConfig, err := appConfig.ToCatalogingConfig()
if err != nil {
errs <- err
return
}
src, cleanup, err := source.New(si, appConfig.Registry.ToOptions(), appConfig.Exclusions) src, cleanup, err := source.New(si, appConfig.Registry.ToOptions(), appConfig.Exclusions)
if cleanup != nil { if cleanup != nil {
defer cleanup() defer cleanup()
@ -283,7 +283,7 @@ func packagesExecWorker(si source.Input, writer sbom.Writer) <-chan error {
return return
} }
s, err := generateSBOM(src) s, err := generateSBOM(src, catalogingConfig)
if err != nil { if err != nil {
errs <- err errs <- err
return return

View File

@ -104,10 +104,17 @@ func powerUserExecWorker(userInput string, writer sbom.Writer) <-chan error {
go func() { go func() {
defer close(errs) defer close(errs)
appConfig.Secrets.Cataloger.Enabled = true // TODO: replace
appConfig.FileMetadata.Cataloger.Enabled = true //appConfig.Secrets.Cataloger.Enabled = true
appConfig.FileContents.Cataloger.Enabled = true //appConfig.FileMetadata.Cataloger.Enabled = true
appConfig.FileClassification.Cataloger.Enabled = true //appConfig.FileContents.Cataloger.Enabled = true
//appConfig.FileClassification.Cataloger.Enabled = true
catalogingConfig, err := appConfig.ToCatalogingConfig()
if err != nil {
errs <- err
return
}
si, err := source.ParseInput(userInput, appConfig.Platform, true) si, err := source.ParseInput(userInput, appConfig.Platform, true)
if err != nil { if err != nil {
@ -124,7 +131,7 @@ func powerUserExecWorker(userInput string, writer sbom.Writer) <-chan error {
defer cleanup() defer cleanup()
} }
s, err := generateSBOM(src) s, err := generateSBOM(src, catalogingConfig)
if err != nil { if err != nil {
errs <- err errs <- err
return return

View File

@ -54,7 +54,7 @@ func (m *mockPackageSBOMImportAPI) ImportImagePackages(ctx context.Context, sess
func sbomFixture() sbom.SBOM { func sbomFixture() sbom.SBOM {
return sbom.SBOM{ return sbom.SBOM{
Artifacts: sbom.Artifacts{ Artifacts: sbom.Artifacts{
PackageCatalog: pkg.NewCollection(pkg.Package{ Packages: pkg.NewCollection(pkg.Package{
Name: "name", Name: "name",
Version: "version", Version: "version",
FoundBy: "foundBy", FoundBy: "foundBy",

View File

@ -4,6 +4,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"github.com/anchore/syft/syft/cataloger/files/fileclassifier" "github.com/anchore/syft/syft/cataloger/files/fileclassifier"
"github.com/anchore/syft/syft/source"
"path" "path"
"reflect" "reflect"
"strings" "strings"
@ -21,6 +22,7 @@ import (
var ErrApplicationConfigNotFound = fmt.Errorf("application config not found") var ErrApplicationConfigNotFound = fmt.Errorf("application config not found")
// TODO: set all catalogers when this is set
var catalogerEnabledDefault = false var catalogerEnabledDefault = false
type defaultValueLoader interface { type defaultValueLoader interface {
@ -33,24 +35,24 @@ type parser interface {
// Application is the main syft application configuration. // Application is the main syft application configuration.
type Application struct { type Application struct {
ConfigPath string `yaml:",omitempty" json:"configPath"` // the location where the application config was read from (either from -c or discovered while loading) ConfigPath string `yaml:",omitempty" json:"configPath"` // the location where the application config was read from (either from -c or discovered while loading)
Outputs []string `yaml:"output" json:"output" mapstructure:"output"` // -o, the format to use for output Outputs []string `yaml:"output" json:"output" mapstructure:"output"` // -o, the format to use for output
File string `yaml:"file" json:"file" mapstructure:"file"` // --file, the file to write report output to File string `yaml:"file" json:"file" mapstructure:"file"` // --file, the file to write report output to
Quiet bool `yaml:"quiet" json:"quiet" mapstructure:"quiet"` // -q, indicates to not show any status output to stderr (ETUI or logging UI) Quiet bool `yaml:"quiet" json:"quiet" mapstructure:"quiet"` // -q, indicates to not show any status output to stderr (ETUI or logging UI)
CheckForAppUpdate bool `yaml:"check-for-app-update" json:"check-for-app-update" mapstructure:"check-for-app-update"` // whether to check for an application update on start up or not CheckForAppUpdate bool `yaml:"check-for-app-update" json:"check-for-app-update" mapstructure:"check-for-app-update"` // whether to check for an application update on start up or not
Anchore anchore `yaml:"anchore" json:"anchore" mapstructure:"anchore"` // options for interacting with Anchore Engine/Enterprise Anchore anchore `yaml:"anchore" json:"anchore" mapstructure:"anchore"` // options for interacting with Anchore Engine/Enterprise
CliOptions CliOnlyOptions `yaml:"-" json:"-"` // all options only available through the CLI (not via env vars or config) CliOptions CliOnlyOptions `yaml:"-" json:"-"` // all options only available through the CLI (not via env vars or config)
Dev development `yaml:"dev" json:"dev" mapstructure:"dev"` Scope string `yaml:"scope" json:"scope" mapstructure:"scope"`
Log logging `yaml:"log" json:"log" mapstructure:"log"` // all logging-related options Dev development `yaml:"dev" json:"dev" mapstructure:"dev"`
Package pkg `yaml:"package" json:"package" mapstructure:"package"` Log logging `yaml:"log" json:"log" mapstructure:"log"` // all logging-related options
FileMetadata FileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"` Package pkg `yaml:"package" json:"package" mapstructure:"package"`
FileClassification fileClassification `yaml:"file-classification" json:"file-classification" mapstructure:"file-classification"` FileMetadata fileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"`
FileContents fileContents `yaml:"file-contents" json:"file-contents" mapstructure:"file-contents"` FileContents fileContents `yaml:"file-contents" json:"file-contents" mapstructure:"file-contents"`
Secrets secretsCfg `yaml:"secrets" json:"secrets" mapstructure:"secrets"` Secrets secretsCfg `yaml:"secrets" json:"secrets" mapstructure:"secrets"`
Registry registry `yaml:"registry" json:"registry" mapstructure:"registry"` Registry registry `yaml:"registry" json:"registry" mapstructure:"registry"`
Exclusions []string `yaml:"exclude" json:"exclude" mapstructure:"exclude"` Exclusions []string `yaml:"exclude" json:"exclude" mapstructure:"exclude"`
Attest attest `yaml:"attest" json:"attest" mapstructure:"attest"` Attest attest `yaml:"attest" json:"attest" mapstructure:"attest"`
Platform string `yaml:"platform" json:"platform" mapstructure:"platform"` Platform string `yaml:"platform" json:"platform" mapstructure:"platform"`
} }
func (cfg Application) ToCatalogingConfig() (*syft.CatalogingConfig, error) { func (cfg Application) ToCatalogingConfig() (*syft.CatalogingConfig, error) {
@ -59,26 +61,33 @@ func (cfg Application) ToCatalogingConfig() (*syft.CatalogingConfig, error) {
return nil, fmt.Errorf("unable to parse config item 'file-metadata.digests': %w", err) return nil, fmt.Errorf("unable to parse config item 'file-metadata.digests': %w", err)
} }
scopeOption := source.ParseScope(cfg.Scope)
if scopeOption == source.UnknownScope {
return nil, fmt.Errorf("bad scope value %q", cfg.Scope)
}
secretsConfig, err := cfg.Secrets.ToConfig() secretsConfig, err := cfg.Secrets.ToConfig()
if err != nil { if err != nil {
return nil, err return nil, err
} }
secretsScopeOption := source.ParseScope(cfg.Secrets.Scope)
if secretsScopeOption == source.UnknownScope {
return nil, fmt.Errorf("bad scope value %q", cfg.Secrets.Scope)
}
return &syft.CatalogingConfig{ return &syft.CatalogingConfig{
// note: package catalogers cannot be determined until runtime // note: package catalogers cannot be determined until runtime
ToolName: internal.ApplicationName, ToolName: internal.ApplicationName,
ToolVersion: version.FromBuild().Version, ToolVersion: version.FromBuild().Version,
ToolConfiguration: cfg, ToolConfiguration: cfg,
Scope: cfg.Package.Cataloger.ScopeOpt, DefaultScope: scopeOption,
ProcessTasksInSerial: false, ProcessTasksInSerial: false,
CaptureFileMetadata: cfg.FileMetadata.Cataloger.Enabled,
DigestHashes: digests, DigestHashes: digests,
CaptureSecrets: cfg.Secrets.Cataloger.Enabled, SecretsSearch: *secretsConfig,
SecretsConfig: *secretsConfig, SecretsScope: secretsScopeOption,
SecretsScope: cfg.Secrets.Cataloger.ScopeOpt,
ClassifyFiles: cfg.FileClassification.Cataloger.Enabled,
FileClassifiers: fileclassifier.DefaultClassifiers(), FileClassifiers: fileclassifier.DefaultClassifiers(),
ContentsConfig: cfg.FileContents.ToConfig(), ContentsSearch: cfg.FileContents.ToConfig(),
}, nil }, nil
} }

View File

@ -1,29 +0,0 @@
package config
import (
"fmt"
"github.com/spf13/viper"
"github.com/anchore/syft/syft/source"
)
type catalogerOptions struct {
Enabled bool `yaml:"enabled" json:"enabled" mapstructure:"enabled"`
Scope string `yaml:"scope" json:"scope" mapstructure:"scope"`
ScopeOpt source.Scope `yaml:"-" json:"-"`
}
func (cfg catalogerOptions) loadDefaultValues(v *viper.Viper) {
v.SetDefault("package.cataloger.enabled", true)
}
func (cfg *catalogerOptions) parseConfigValues() error {
scopeOption := source.ParseScope(cfg.Scope)
if scopeOption == source.UnknownScope {
return fmt.Errorf("bad scope value %q", cfg.Scope)
}
cfg.ScopeOpt = scopeOption
return nil
}

View File

@ -1,19 +0,0 @@
package config
import (
"github.com/anchore/syft/syft/source"
"github.com/spf13/viper"
)
type fileClassification struct {
Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"`
}
func (cfg fileClassification) loadDefaultValues(v *viper.Viper) {
v.SetDefault("file-classification.cataloger.enabled", catalogerEnabledDefault)
v.SetDefault("file-classification.cataloger.scope", source.SquashedScope)
}
func (cfg *fileClassification) parseConfigValues() error {
return cfg.Cataloger.parseConfigValues()
}

View File

@ -3,29 +3,21 @@ package config
import ( import (
"github.com/anchore/syft/syft/cataloger/files/filecontents" "github.com/anchore/syft/syft/cataloger/files/filecontents"
"github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/source"
"github.com/spf13/viper" "github.com/spf13/viper"
) )
type fileContents struct { type fileContents struct {
Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"` SkipFilesAboveSize int64 `yaml:"skip-files-above-size" json:"skip-files-above-size" mapstructure:"skip-files-above-size"`
SkipFilesAboveSize int64 `yaml:"skip-files-above-size" json:"skip-files-above-size" mapstructure:"skip-files-above-size"` Globs []string `yaml:"globs" json:"globs" mapstructure:"globs"`
Globs []string `yaml:"globs" json:"globs" mapstructure:"globs"`
} }
func (cfg fileContents) loadDefaultValues(v *viper.Viper) { func (cfg fileContents) loadDefaultValues(v *viper.Viper) {
v.SetDefault("file-contents.cataloger.enabled", catalogerEnabledDefault)
v.SetDefault("file-contents.cataloger.scope", source.SquashedScope)
v.SetDefault("file-contents.skip-files-above-size", 1*file.MB) v.SetDefault("file-contents.skip-files-above-size", 1*file.MB)
v.SetDefault("file-contents.globs", []string{}) v.SetDefault("file-contents.globs", []string{})
} }
func (cfg *fileContents) parseConfigValues() error { func (cfg fileContents) ToConfig() filecontents.Config {
return cfg.Cataloger.parseConfigValues() return filecontents.Config{
}
func (cfg fileContents) ToConfig() filecontents.CatalogerConfig {
return filecontents.CatalogerConfig{
Globs: cfg.Globs, Globs: cfg.Globs,
SkipFilesAboveSizeInBytes: cfg.SkipFilesAboveSize, SkipFilesAboveSizeInBytes: cfg.SkipFilesAboveSize,
} }

View File

@ -1,21 +1,17 @@
package config package config
import ( import (
"github.com/anchore/syft/syft/source"
"github.com/spf13/viper" "github.com/spf13/viper"
) )
type FileMetadata struct { type fileMetadata struct {
Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"` Digests []string `yaml:"digests" json:"digests" mapstructure:"digests"`
Digests []string `yaml:"digests" json:"digests" mapstructure:"digests"`
} }
func (cfg FileMetadata) loadDefaultValues(v *viper.Viper) { func (cfg fileMetadata) loadDefaultValues(v *viper.Viper) {
v.SetDefault("file-metadata.cataloger.enabled", catalogerEnabledDefault)
v.SetDefault("file-metadata.cataloger.scope", source.SquashedScope)
v.SetDefault("file-metadata.digests", []string{"sha256"}) v.SetDefault("file-metadata.digests", []string{"sha256"})
} }
func (cfg *FileMetadata) parseConfigValues() error { func (cfg *fileMetadata) parseConfigValues() error {
return cfg.Cataloger.parseConfigValues() return nil
} }

View File

@ -6,22 +6,16 @@ import (
) )
type pkg struct { type pkg struct {
Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"` SearchUnindexedArchives bool `yaml:"search-unindexed-archives" json:"search-unindexed-archives" mapstructure:"search-unindexed-archives"`
SearchUnindexedArchives bool `yaml:"search-unindexed-archives" json:"search-unindexed-archives" mapstructure:"search-unindexed-archives"` SearchIndexedArchives bool `yaml:"search-indexed-archives" json:"search-indexed-archives" mapstructure:"search-indexed-archives"`
SearchIndexedArchives bool `yaml:"search-indexed-archives" json:"search-indexed-archives" mapstructure:"search-indexed-archives"`
} }
func (cfg pkg) loadDefaultValues(v *viper.Viper) { func (cfg pkg) loadDefaultValues(v *viper.Viper) {
cfg.Cataloger.loadDefaultValues(v)
c := packages.DefaultSearchConfig() c := packages.DefaultSearchConfig()
v.SetDefault("package.search-unindexed-archives", c.IncludeUnindexedArchives) v.SetDefault("package.search-unindexed-archives", c.IncludeUnindexedArchives)
v.SetDefault("package.search-indexed-archives", c.IncludeIndexedArchives) v.SetDefault("package.search-indexed-archives", c.IncludeIndexedArchives)
} }
func (cfg *pkg) parseConfigValues() error {
return cfg.Cataloger.parseConfigValues()
}
func (cfg pkg) ToConfig() packages.SearchConfig { func (cfg pkg) ToConfig() packages.SearchConfig {
return packages.SearchConfig{ return packages.SearchConfig{
IncludeIndexedArchives: cfg.SearchIndexedArchives, IncludeIndexedArchives: cfg.SearchIndexedArchives,

View File

@ -10,32 +10,27 @@ import (
) )
type secretsCfg struct { type secretsCfg struct {
Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"`
AdditionalPatterns map[string]string `yaml:"additional-patterns" json:"additional-patterns" mapstructure:"additional-patterns"` AdditionalPatterns map[string]string `yaml:"additional-patterns" json:"additional-patterns" mapstructure:"additional-patterns"`
ExcludePatternNames []string `yaml:"exclude-pattern-names" json:"exclude-pattern-names" mapstructure:"exclude-pattern-names"` ExcludePatternNames []string `yaml:"exclude-pattern-names" json:"exclude-pattern-names" mapstructure:"exclude-pattern-names"`
RevealValues bool `yaml:"reveal-values" json:"reveal-values" mapstructure:"reveal-values"` RevealValues bool `yaml:"reveal-values" json:"reveal-values" mapstructure:"reveal-values"`
SkipFilesAboveSize int64 `yaml:"skip-files-above-size" json:"skip-files-above-size" mapstructure:"skip-files-above-size"` SkipFilesAboveSize int64 `yaml:"skip-files-above-size" json:"skip-files-above-size" mapstructure:"skip-files-above-size"`
Scope string `yaml:"scope" json:"scope" mapstructure:"scope"`
} }
func (cfg secretsCfg) loadDefaultValues(v *viper.Viper) { func (cfg secretsCfg) loadDefaultValues(v *viper.Viper) {
v.SetDefault("secrets.cataloger.enabled", catalogerEnabledDefault) v.SetDefault("secrets.scope", source.AllLayersScope)
v.SetDefault("secrets.cataloger.scope", source.AllLayersScope)
v.SetDefault("secrets.reveal-values", false) v.SetDefault("secrets.reveal-values", false)
v.SetDefault("secrets.skip-files-above-size", 1*file.MB) v.SetDefault("secrets.skip-files-above-size", 1*file.MB)
v.SetDefault("secrets.additional-patterns", map[string]string{}) v.SetDefault("secrets.additional-patterns", map[string]string{})
v.SetDefault("secrets.exclude-pattern-names", []string{}) v.SetDefault("secrets.exclude-pattern-names", []string{})
} }
func (cfg *secretsCfg) parseConfigValues() error { func (cfg secretsCfg) ToConfig() (*secrets.Config, error) {
return cfg.Cataloger.parseConfigValues()
}
func (cfg secretsCfg) ToConfig() (*secrets.CatalogerConfig, error) {
patterns, err := file.GenerateSearchPatterns(secrets.DefaultSecretsPatterns, cfg.AdditionalPatterns, cfg.ExcludePatternNames) patterns, err := file.GenerateSearchPatterns(secrets.DefaultSecretsPatterns, cfg.AdditionalPatterns, cfg.ExcludePatternNames)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to process secrets config patterns: %w", err) return nil, fmt.Errorf("unable to process secrets config patterns: %w", err)
} }
return &secrets.CatalogerConfig{ return &secrets.Config{
Patterns: patterns, Patterns: patterns,
RevealValues: cfg.RevealValues, RevealValues: cfg.RevealValues,
MaxFileSize: cfg.SkipFilesAboveSize, MaxFileSize: cfg.SkipFilesAboveSize,

View File

@ -49,7 +49,7 @@ func toSyftModel(bom *cyclonedx.BOM) (*sbom.SBOM, error) {
} }
s := &sbom.SBOM{ s := &sbom.SBOM{
Artifacts: sbom.Artifacts{ Artifacts: sbom.Artifacts{
PackageCatalog: pkg.NewCollection(), Packages: pkg.NewCollection(),
LinuxDistribution: linuxReleaseFromComponents(*bom.Components), LinuxDistribution: linuxReleaseFromComponents(*bom.Components),
}, },
Source: meta, Source: meta,
@ -86,7 +86,7 @@ func collectPackages(component *cyclonedx.Component, s *sbom.SBOM, idMap map[str
idMap[component.BOMRef] = p idMap[component.BOMRef] = p
// TODO there must be a better way than needing to call this manually: // TODO there must be a better way than needing to call this manually:
p.SetID() p.SetID()
s.Artifacts.PackageCatalog.Add(*p) s.Artifacts.Packages.Add(*p)
} }
if component.Components != nil { if component.Components != nil {

View File

@ -212,7 +212,7 @@ func Test_decode(t *testing.T) {
assert.Equal(t, e.ver, sbom.Artifacts.LinuxDistribution.VersionID) assert.Equal(t, e.ver, sbom.Artifacts.LinuxDistribution.VersionID)
} }
if e.pkg != "" { if e.pkg != "" {
for p := range sbom.Artifacts.PackageCatalog.Enumerate() { for p := range sbom.Artifacts.Packages.Enumerate() {
if e.pkg != p.Name { if e.pkg != p.Name {
continue continue
} }
@ -240,7 +240,7 @@ func Test_decode(t *testing.T) {
if e.relation != "" { if e.relation != "" {
foundRelation := false foundRelation := false
for _, r := range sbom.Relationships { for _, r := range sbom.Relationships {
p := sbom.Artifacts.PackageCatalog.Package(r.To.ID()) p := sbom.Artifacts.Packages.Package(r.To.ID())
if e.relation == p.Name { if e.relation == p.Name {
foundRelation = true foundRelation = true
break break

View File

@ -25,7 +25,7 @@ func ToFormatModel(s sbom.SBOM) *cyclonedx.BOM {
cdxBOM.SerialNumber = uuid.New().URN() cdxBOM.SerialNumber = uuid.New().URN()
cdxBOM.Metadata = toBomDescriptor(internal.ApplicationName, versionInfo.Version, s.Source) cdxBOM.Metadata = toBomDescriptor(internal.ApplicationName, versionInfo.Version, s.Source)
packages := s.Artifacts.PackageCatalog.Sorted() packages := s.Artifacts.Packages.Sorted()
components := make([]cyclonedx.Component, len(packages)) components := make([]cyclonedx.Component, len(packages))
for i, p := range packages { for i, p := range packages {
components[i] = encodeComponent(p) components[i] = encodeComponent(p)

View File

@ -21,7 +21,7 @@ func ToSyftModel(doc *spdx.Document2_2) (*sbom.SBOM, error) {
s := &sbom.SBOM{ s := &sbom.SBOM{
Artifacts: sbom.Artifacts{ Artifacts: sbom.Artifacts{
PackageCatalog: pkg.NewCollection(), Packages: pkg.NewCollection(),
FileMetadata: map[file.Coordinates]file.Metadata{}, FileMetadata: map[file.Coordinates]file.Metadata{},
FileDigests: map[file.Coordinates][]file.Digest{}, FileDigests: map[file.Coordinates][]file.Digest{},
LinuxDistribution: findLinuxReleaseByPURL(doc), LinuxDistribution: findLinuxReleaseByPURL(doc),
@ -74,7 +74,7 @@ func collectSyftPackages(s *sbom.SBOM, spdxIDMap map[string]interface{}, doc *sp
for _, p := range doc.Packages { for _, p := range doc.Packages {
syftPkg := toSyftPackage(p) syftPkg := toSyftPackage(p)
spdxIDMap[string(p.PackageSPDXIdentifier)] = syftPkg spdxIDMap[string(p.PackageSPDXIdentifier)] = syftPkg
s.Artifacts.PackageCatalog.Add(*syftPkg) s.Artifacts.Packages.Add(*syftPkg)
} }
} }

View File

@ -94,7 +94,7 @@ func TestToSyftModel(t *testing.T) {
assert.NotNil(t, sbom) assert.NotNil(t, sbom)
pkgs := sbom.Artifacts.PackageCatalog.Sorted() pkgs := sbom.Artifacts.Packages.Sorted()
assert.Len(t, pkgs, 2) assert.Len(t, pkgs, 2)

View File

@ -124,7 +124,7 @@ func ImageInput(t testing.TB, testImage string, options ...ImageOption) sbom.SBO
return sbom.SBOM{ return sbom.SBOM{
Artifacts: sbom.Artifacts{ Artifacts: sbom.Artifacts{
PackageCatalog: catalog, Packages: catalog,
LinuxDistribution: &linux.Release{ LinuxDistribution: &linux.Release{
PrettyName: "debian", PrettyName: "debian",
Name: "debian", Name: "debian",
@ -152,7 +152,7 @@ func carriageRedactor(s []byte) []byte {
return []byte(msg) return []byte(msg)
} }
func populateImageCatalog(catalog *pkg.Collection, img *image.Image) { func populateImageCatalog(catalog pkg.Collection, img *image.Image) {
_, ref1, _ := img.SquashedTree().File("/somefile-1.txt", filetree.FollowBasenameLinks) _, ref1, _ := img.SquashedTree().File("/somefile-1.txt", filetree.FollowBasenameLinks)
_, ref2, _ := img.SquashedTree().File("/somefile-2.txt", filetree.FollowBasenameLinks) _, ref2, _ := img.SquashedTree().File("/somefile-2.txt", filetree.FollowBasenameLinks)
@ -205,7 +205,7 @@ func DirectoryInput(t testing.TB) sbom.SBOM {
return sbom.SBOM{ return sbom.SBOM{
Artifacts: sbom.Artifacts{ Artifacts: sbom.Artifacts{
PackageCatalog: catalog, Packages: catalog,
LinuxDistribution: &linux.Release{ LinuxDistribution: &linux.Release{
PrettyName: "debian", PrettyName: "debian",
Name: "debian", Name: "debian",
@ -228,7 +228,7 @@ func DirectoryInput(t testing.TB) sbom.SBOM {
} }
} }
func newDirectoryCatalog() *pkg.Collection { func newDirectoryCatalog() pkg.Collection {
catalog := pkg.NewCollection() catalog := pkg.NewCollection()
// populate catalog with test data // populate catalog with test data

View File

@ -57,7 +57,7 @@ func Test_decodeJSON(t *testing.T) {
split = strings.SplitN(pkg, ":", 2) split = strings.SplitN(pkg, ":", 2)
name = split[0] name = split[0]
version = split[1] version = split[1]
for p := range bom.Artifacts.PackageCatalog.Enumerate() { for p := range bom.Artifacts.Packages.Enumerate() {
if p.Name == name { if p.Name == name {
assert.Equal(t, version, p.Version) assert.Equal(t, version, p.Version)
continue pkgs continue pkgs

View File

@ -57,7 +57,7 @@ func Test_decodeXML(t *testing.T) {
split = strings.SplitN(pkg, ":", 2) split = strings.SplitN(pkg, ":", 2)
name = split[0] name = split[0]
version = split[1] version = split[1]
for p := range bom.Artifacts.PackageCatalog.Enumerate() { for p := range bom.Artifacts.Packages.Enumerate() {
if p.Name == name { if p.Name == name {
assert.Equal(t, version, p.Version) assert.Equal(t, version, p.Version)
continue pkgs continue pkgs

View File

@ -71,11 +71,11 @@ func TestSPDXJSONDecoder(t *testing.T) {
} }
if test.packages != nil { if test.packages != nil {
assert.Equal(t, sbom.Artifacts.PackageCatalog.PackageCount(), len(test.packages)) assert.Equal(t, sbom.Artifacts.Packages.Size(), len(test.packages))
packages: packages:
for _, pkgName := range test.packages { for _, pkgName := range test.packages {
for _, p := range sbom.Artifacts.PackageCatalog.Sorted() { for _, p := range sbom.Artifacts.Packages.Sorted() {
if p.Name == pkgName { if p.Name == pkgName {
continue packages continue packages
} }

View File

@ -42,13 +42,13 @@ func toFormatModel(s sbom.SBOM) (*model.Document, error) {
}, },
DataLicense: "CC0-1.0", DataLicense: "CC0-1.0",
DocumentNamespace: namespace, DocumentNamespace: namespace,
Packages: toPackages(s.Artifacts.PackageCatalog, s.Relationships), Packages: toPackages(s.Artifacts.Packages, s.Relationships),
Files: toFiles(s), Files: toFiles(s),
Relationships: toRelationships(s.Relationships), Relationships: toRelationships(s.Relationships),
}, nil }, nil
} }
func toPackages(catalog *pkg.Collection, relationships []artifact.Relationship) []model.Package { func toPackages(catalog pkg.Collection, relationships []artifact.Relationship) []model.Package {
packages := make([]model.Package, 0) packages := make([]model.Package, 0)
for _, p := range catalog.Sorted() { for _, p := range catalog.Sorted() {

View File

@ -85,13 +85,13 @@ func toFormatModel(s sbom.SBOM) (*spdx.Document2_2, error) {
// Cardinality: optional, one // Cardinality: optional, one
DocumentComment: "", DocumentComment: "",
}, },
Packages: toFormatPackages(s.Artifacts.PackageCatalog), Packages: toFormatPackages(s.Artifacts.Packages),
}, nil }, nil
} }
// packages populates all Package Information from the package Collection (see https://spdx.github.io/spdx-spec/3-package-information/) // packages populates all Package Information from the package Collection (see https://spdx.github.io/spdx-spec/3-package-information/)
// nolint: funlen // nolint: funlen
func toFormatPackages(catalog *pkg.Collection) map[spdx.ElementID]*spdx.Package2_2 { func toFormatPackages(catalog pkg.Collection) map[spdx.ElementID]*spdx.Package2_2 {
results := make(map[spdx.ElementID]*spdx.Package2_2) results := make(map[spdx.ElementID]*spdx.Package2_2)
for _, p := range catalog.Sorted() { for _, p := range catalog.Sorted() {

View File

@ -28,8 +28,8 @@ func TestEncodeDecodeCycle(t *testing.T) {
t.Errorf("metadata difference: %+v", d) t.Errorf("metadata difference: %+v", d)
} }
actualPackages := actualSBOM.Artifacts.PackageCatalog.Sorted() actualPackages := actualSBOM.Artifacts.Packages.Sorted()
for idx, p := range originalSBOM.Artifacts.PackageCatalog.Sorted() { for idx, p := range originalSBOM.Artifacts.Packages.Sorted() {
if !assert.Equal(t, p.Name, actualPackages[idx].Name) { if !assert.Equal(t, p.Name, actualPackages[idx].Name) {
t.Errorf("different package at idx=%d: %s vs %s", idx, p.Name, actualPackages[idx].Name) t.Errorf("different package at idx=%d: %s vs %s", idx, p.Name, actualPackages[idx].Name)
continue continue

View File

@ -95,7 +95,7 @@ func TestEncodeFullJSONDocument(t *testing.T) {
s := sbom.SBOM{ s := sbom.SBOM{
Artifacts: sbom.Artifacts{ Artifacts: sbom.Artifacts{
PackageCatalog: catalog, Packages: catalog,
FileMetadata: map[file.Coordinates]file.Metadata{ FileMetadata: map[file.Coordinates]file.Metadata{
file.NewLocation("/a/place").Coordinates: { file.NewLocation("/a/place").Coordinates: {
Mode: 0775, Mode: 0775,

View File

@ -31,7 +31,7 @@ func ToFormatModel(s sbom.SBOM) model.Document {
} }
return model.Document{ return model.Document{
Artifacts: toPackageModels(s.Artifacts.PackageCatalog), Artifacts: toPackageModels(s.Artifacts.Packages),
ArtifactRelationships: toRelationshipModel(s.Relationships), ArtifactRelationships: toRelationshipModel(s.Relationships),
Files: toFile(s), Files: toFile(s),
Secrets: toSecrets(s.Artifacts.Secrets), Secrets: toSecrets(s.Artifacts.Secrets),
@ -153,7 +153,7 @@ func toFileMetadataEntry(coordinates file.Coordinates, metadata *file.Metadata)
} }
} }
func toPackageModels(catalog *pkg.Collection) []model.Package { func toPackageModels(catalog pkg.Collection) []model.Package {
artifacts := make([]model.Package, 0) artifacts := make([]model.Package, 0)
if catalog == nil { if catalog == nil {
return artifacts return artifacts

View File

@ -18,7 +18,7 @@ func toSyftModel(doc model.Document) (*sbom.SBOM, error) {
return &sbom.SBOM{ return &sbom.SBOM{
Artifacts: sbom.Artifacts{ Artifacts: sbom.Artifacts{
PackageCatalog: catalog, Packages: catalog,
LinuxDistribution: toSyftLinuxRelease(doc.Distro), LinuxDistribution: toSyftLinuxRelease(doc.Distro),
}, },
Source: *toSyftSourceData(doc.Source), Source: *toSyftSourceData(doc.Source),
@ -48,7 +48,7 @@ func toSyftLinuxRelease(d model.LinuxRelease) *linux.Release {
} }
} }
func toSyftRelationships(doc *model.Document, catalog *pkg.Collection, relationships []model.Relationship) []artifact.Relationship { func toSyftRelationships(doc *model.Document, catalog pkg.Collection, relationships []model.Relationship) []artifact.Relationship {
idMap := make(map[string]interface{}) idMap := make(map[string]interface{})
for _, p := range catalog.Sorted() { for _, p := range catalog.Sorted() {
@ -130,7 +130,7 @@ func toSyftSourceData(s model.Source) *source.Metadata {
return nil return nil
} }
func toSyftCatalog(pkgs []model.Package) *pkg.Collection { func toSyftCatalog(pkgs []model.Package) pkg.Collection {
catalog := pkg.NewCollection() catalog := pkg.NewCollection()
for _, p := range pkgs { for _, p := range pkgs {
catalog.Add(toSyftPackage(p)) catalog.Add(toSyftPackage(p))

View File

@ -15,7 +15,7 @@ func encoder(output io.Writer, s sbom.SBOM) error {
var rows [][]string var rows [][]string
columns := []string{"Name", "Version", "Type"} columns := []string{"Name", "Version", "Type"}
for _, p := range s.Artifacts.PackageCatalog.Sorted() { for _, p := range s.Artifacts.Packages.Sorted() {
row := []string{ row := []string{
p.Name, p.Name,
p.Version, p.Version,

View File

@ -35,7 +35,7 @@ func encoder(output io.Writer, s sbom.SBOM) error {
// populate artifacts... // populate artifacts...
rows := 0 rows := 0
for _, p := range s.Artifacts.PackageCatalog.Sorted() { for _, p := range s.Artifacts.Packages.Sorted() {
fmt.Fprintf(w, "[%s]\n", p.Name) fmt.Fprintf(w, "[%s]\n", p.Name)
fmt.Fprintln(w, " Version:\t", p.Version) fmt.Fprintln(w, " Version:\t", p.Version)
fmt.Fprintln(w, " Type:\t", string(p.Type)) fmt.Fprintln(w, " Type:\t", string(p.Type))

View File

@ -2,13 +2,28 @@ package syft
import ( import (
"fmt" "fmt"
"github.com/anchore/syft/internal/bus"
"github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/event"
"github.com/anchore/syft/syft/event/monitor"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/sbom"
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
"github.com/hashicorp/go-multierror" "github.com/hashicorp/go-multierror"
"github.com/wagoodman/go-partybus"
"github.com/wagoodman/go-progress"
) )
type monitorableCollection struct {
pkg.Collection
monitor *progress.Manual
}
func (m *monitorableCollection) Add(p pkg.Package) {
m.monitor.N++
m.Collection.Add(p)
}
func Catalog(src *source.Source, options ...CatalogingOption) (*sbom.SBOM, error) { func Catalog(src *source.Source, options ...CatalogingOption) (*sbom.SBOM, error) {
var config = DefaultCatalogingConfig() var config = DefaultCatalogingConfig()
for _, optFn := range options { for _, optFn := range options {
@ -17,28 +32,60 @@ func Catalog(src *source.Source, options ...CatalogingOption) (*sbom.SBOM, error
} }
} }
var tasks []task if config.availableTasks == nil {
config.availableTasks = newTaskCollection()
generators := []taskGenerator{
generatePackagesCatalogingTask,
generateFileMetadataCatalogingTask,
generateFileDigestsCatalogingTask,
generateSecretsCatalogingTask,
generateFileClassifierTask,
generateContentsCatalogingTask,
} }
for _, generator := range generators { tc := config.availableTasks
t, err := generator(config) if err := tc.addAllCatalogers(config); err != nil {
if err != nil { return nil, fmt.Errorf("unable to register catalogers: %w", err)
return nil, fmt.Errorf("unable to create cataloging task: %w", err) }
}
if t != nil { var catalogingTasks []task
tasks = append(tasks, t)
if len(config.EnabledCatalogers) == 0 {
switch src.Metadata.Scheme {
case source.ImageType:
catalogingTasks = tc.tasks(tc.withLabels(packageTaskLabel, installedTaskLabel)...)
case source.FileType:
catalogingTasks = tc.tasks(tc.all()...)
case source.DirectoryType:
// TODO: it looks like gemspec was left out on main, is this intentional? if so it's not accounted for here...
catalogingTasks = tc.tasks(tc.withLabels(packageTaskLabel)...)
} }
} }
if len(catalogingTasks) == 0 {
return nil, fmt.Errorf("no cataloging tasks configured to run")
}
// special case: we need to identify the linux distro for downstream processing
identifyLinuxDistroTask, err := newIdentifyDistroTask(config)
if err != nil {
return nil, fmt.Errorf("unable to create linux distro identification task: %+v", err)
}
synthesizePackageRelationshipsTask, err := newSynthesizePackageRelationshipsTasks(config)
if err != nil {
return nil, fmt.Errorf("unable to create task to synthesize package relationships: %+v", err)
}
taskGroups := [][]task{
{
identifyLinuxDistroTask,
},
catalogingTasks,
{
synthesizePackageRelationshipsTask,
},
}
files, pkgs := newCatalogerMonitor()
defer func() {
files.SetCompleted() // TODO: files monitor is unused... should we remove?
pkgs.SetCompleted()
}()
s := sbom.SBOM{ s := sbom.SBOM{
Source: src.Metadata, Source: src.Metadata,
Descriptor: sbom.Descriptor{ Descriptor: sbom.Descriptor{
@ -46,12 +93,39 @@ func Catalog(src *source.Source, options ...CatalogingOption) (*sbom.SBOM, error
Version: config.ToolVersion, Version: config.ToolVersion,
Configuration: config.ToolConfiguration, Configuration: config.ToolConfiguration,
}, },
Artifacts: sbom.Artifacts{
Packages: &monitorableCollection{
Collection: pkg.NewCollection(),
monitor: pkgs,
},
},
} }
return &s, runTasks(&s, src, tasks, config.ProcessTasksInSerial) for _, tasks := range taskGroups {
if err := runTasks(&s, src, config.ProcessTasksInSerial, tasks...); err != nil {
return &s, err
}
}
return &s, nil
} }
func runTasks(s *sbom.SBOM, src *source.Source, tasks []task, serial bool) error { // newCatalogerMonitor creates a new CatalogingMonitor object and publishes the object on the bus as a CatalogingStarted event.
func newCatalogerMonitor() (*progress.Manual, *progress.Manual) {
filesProcessed := progress.Manual{}
packagesDiscovered := progress.Manual{}
bus.Publish(partybus.Event{
Type: event.CatalogingStarted,
Value: monitor.CatalogingMonitor{
FilesProcessed: progress.Monitorable(&filesProcessed),
PackagesDiscovered: progress.Monitorable(&packagesDiscovered),
},
})
return &filesProcessed, &packagesDiscovered
}
func runTasks(s *sbom.SBOM, src *source.Source, serial bool, tasks ...task) error {
var relationships []<-chan artifact.Relationship var relationships []<-chan artifact.Relationship
var errs = make(chan error) var errs = make(chan error)
for _, t := range tasks { for _, t := range tasks {
@ -92,7 +166,7 @@ func mergeErrors(errs <-chan error) (allErrs error) {
func runTask(t task, a *sbom.Artifacts, src *source.Source, r chan<- artifact.Relationship, errs chan<- error) { func runTask(t task, a *sbom.Artifacts, src *source.Source, r chan<- artifact.Relationship, errs chan<- error) {
defer close(r) defer close(r)
relationships, err := t(a, src) relationships, err := t.Run(a, src)
if err != nil { if err != nil {
errs <- err errs <- err
return return

View File

@ -11,23 +11,23 @@ import (
"github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/file"
) )
type CatalogerConfig struct { type Config struct {
Globs []string Globs []string
SkipFilesAboveSizeInBytes int64 SkipFilesAboveSizeInBytes int64
} }
type Cataloger struct { type Cataloger struct {
config CatalogerConfig config Config
} }
func DefaultCatalogerConfig() CatalogerConfig { func DefaultConfig() Config {
return CatalogerConfig{ return Config{
Globs: nil, Globs: nil,
SkipFilesAboveSizeInBytes: 1 * file.MB, SkipFilesAboveSizeInBytes: 1 * file.MB,
} }
} }
func NewCataloger(config CatalogerConfig) (*Cataloger, error) { func NewCataloger(config Config) (*Cataloger, error) {
return &Cataloger{ return &Cataloger{
config: config, config: config,
}, nil }, nil

View File

@ -66,7 +66,7 @@ func TestContentsCataloger(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
c, err := NewCataloger(CatalogerConfig{ c, err := NewCataloger(Config{
Globs: test.globs, Globs: test.globs,
SkipFilesAboveSizeInBytes: test.maxSize, SkipFilesAboveSizeInBytes: test.maxSize,
}) })

View File

@ -27,30 +27,30 @@ var DefaultSecretsPatterns = map[string]string{
"generic-api-key": `(?i)api(-|_)?key["'=:\s]*?(?P<value>[A-Z0-9]{20,60})["']?(\s|$)`, "generic-api-key": `(?i)api(-|_)?key["'=:\s]*?(?P<value>[A-Z0-9]{20,60})["']?(\s|$)`,
} }
type CatalogerConfig struct { type Config struct {
Patterns map[string]*regexp.Regexp Patterns map[string]*regexp.Regexp
RevealValues bool RevealValues bool
MaxFileSize int64 MaxFileSize int64
} }
type Cataloger struct { type Cataloger struct {
config CatalogerConfig config Config
} }
func DefaultCatalogerConfig() CatalogerConfig { func DefaultConfig() Config {
patterns, err := file.GenerateSearchPatterns(DefaultSecretsPatterns, nil, nil) patterns, err := file.GenerateSearchPatterns(DefaultSecretsPatterns, nil, nil)
if err != nil { if err != nil {
patterns = make(map[string]*regexp.Regexp) patterns = make(map[string]*regexp.Regexp)
log.Errorf("unable to create default secrets config: %w", err) log.Errorf("unable to create default secrets config: %w", err)
} }
return CatalogerConfig{ return Config{
Patterns: patterns, Patterns: patterns,
RevealValues: false, RevealValues: false,
MaxFileSize: 1 * file.MB, MaxFileSize: 1 * file.MB,
} }
} }
func NewCataloger(config CatalogerConfig) (*Cataloger, error) { func NewCataloger(config Config) (*Cataloger, error) {
return &Cataloger{ return &Cataloger{
config: config, config: config,
}, nil }, nil

View File

@ -174,7 +174,7 @@ func TestSecretsCataloger(t *testing.T) {
regexObjs[name] = obj regexObjs[name] = obj
} }
c, err := NewCataloger(CatalogerConfig{ c, err := NewCataloger(Config{
Patterns: regexObjs, Patterns: regexObjs,
RevealValues: test.reveal, RevealValues: test.reveal,
MaxFileSize: test.maxSize, MaxFileSize: test.maxSize,
@ -420,7 +420,7 @@ j4f668YfhUbKdRF6S6734856
for _, test := range tests { for _, test := range tests {
t.Run(test.fixture, func(t *testing.T) { t.Run(test.fixture, func(t *testing.T) {
c, err := NewCataloger(CatalogerConfig{ c, err := NewCataloger(Config{
Patterns: regexObjs, Patterns: regexObjs,
RevealValues: true, RevealValues: true,
MaxFileSize: 10 * file.MB, MaxFileSize: 10 * file.MB,

40
syft/cataloger/id.go Normal file
View File

@ -0,0 +1,40 @@
package cataloger
const (
ApkDBID ID = "os-apkdb"
DpkgID ID = "os-dpkg"
RpmDBID ID = "os-rpmdb"
RubyGemspecID ID = "ruby-gem-spec"
RubyGemfileLockID ID = "ruby-gem-file-lock"
PythonPackageID ID = "python-package"
PythonRequirementsID ID = "python-requirements"
PythonPoetryID ID = "python-poetry"
PythonSetupID ID = "python-setup"
PythonPipFileID ID = "python-pipfile"
JavascriptPackageJSONID ID = "javascript-package-json"
JavascriptPackageLockID ID = "javascript-package-lock"
JavaScriptYarnLockID ID = "javascript-yarn-lock"
JavaArchiveID ID = "java-archive"
GoModID ID = "go-mod"
GoBinaryID ID = "go-binary"
RustCargoLockID ID = "rust-cargo-lock"
PHPInstalledJSONID ID = "php-installed-json"
PHPComposerLockID ID = "php-composer-lock"
FileMetadataID ID = "file-metadata"
FileDigestsID ID = "file-digest"
SecretsID ID = "secrets"
FileClassifierID ID = "file-classifier"
FileContentsID ID = "file-content"
)
type ID string
type IDs []ID
func (c IDs) Len() int { return len(c) }
func (c IDs) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
func (c IDs) Less(i, j int) bool {
return c[i] < c[j]
}

View File

@ -1,126 +0,0 @@
package packages
import (
"fmt"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/internal/bus"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/event"
"github.com/anchore/syft/syft/event/monitor"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/linux"
"github.com/anchore/syft/syft/speculate/cpes"
"github.com/hashicorp/go-multierror"
"github.com/wagoodman/go-partybus"
"github.com/wagoodman/go-progress"
)
// Catalog a given source (container image or filesystem) with the given catalogers, returning all discovered packages.
// In order to efficiently retrieve contents from an underlying container image the content fetch requests are
// done in bulk. Specifically, all files of interest are collected from each cataloger and accumulated into a single
// request.
func Catalog(resolver file.Resolver, release *linux.Release, catalogers ...pkg.Cataloger) (*pkg.Collection, []artifact.Relationship, error) {
catalog := pkg.NewCollection()
var allRelationships []artifact.Relationship
filesProcessed, packagesDiscovered := newPackageCatalogerMonitor()
// perform analysis, accumulating errors for each failed analysis
var errs error
for _, c := range catalogers {
// find packages from the underlying raw data
log.Debugf("cataloging with %q", c.Name())
packages, relationships, err := c.Catalog(resolver)
if err != nil {
errs = multierror.Append(errs, err)
continue
}
catalogedPackages := len(packages)
log.Debugf("discovered %d packages", catalogedPackages)
packagesDiscovered.N += int64(catalogedPackages)
for _, p := range packages {
// generate CPEs (note: this is excluded from package ID, so is safe to mutate)
p.CPEs = cpes.Generate(p)
// generate PURL (note: this is excluded from package ID, so is safe to mutate)
p.PURL = pkg.URL(p, release)
// create file-to-package relationships for files owned by the package
owningRelationships, err := packageFileOwnershipRelationships(p, resolver)
if err != nil {
log.Warnf("unable to create any package-file relationships for package name=%q: %w", p.Name, err)
} else {
allRelationships = append(allRelationships, owningRelationships...)
}
// add to catalog
catalog.Add(p)
}
allRelationships = append(allRelationships, relationships...)
}
allRelationships = append(allRelationships, pkg.NewRelationships(catalog)...)
if errs != nil {
return nil, nil, errs
}
filesProcessed.SetCompleted()
packagesDiscovered.SetCompleted()
return catalog, allRelationships, nil
}
func packageFileOwnershipRelationships(p pkg.Package, resolver file.PathResolver) ([]artifact.Relationship, error) {
fileOwner, ok := p.Metadata.(pkg.FileOwner)
if !ok {
return nil, nil
}
var relationships []artifact.Relationship
for _, path := range fileOwner.OwnedFiles() {
locations, err := resolver.FilesByPath(path)
if err != nil {
return nil, fmt.Errorf("unable to find path for path=%q: %w", path, err)
}
if len(locations) == 0 {
// ideally we want to warn users about missing files from a package, however, it is very common for
// container image authors to delete files that are not needed in order to keep image sizes small. Adding
// a warning here would be needlessly noisy (even for popular base images).
continue
}
for _, l := range locations {
relationships = append(relationships, artifact.Relationship{
From: p,
To: l.Coordinates,
Type: artifact.ContainsRelationship,
})
}
}
return relationships, nil
}
// newPackageCatalogerMonitor creates a new PackageCatalogerMonitor object and publishes the object on the bus as a PackageCatalogerStarted event.
func newPackageCatalogerMonitor() (*progress.Manual, *progress.Manual) {
filesProcessed := progress.Manual{}
packagesDiscovered := progress.Manual{}
bus.Publish(partybus.Event{
Type: event.PackageCatalogerStarted,
Value: monitor.PackageCatalogerMonitor{
FilesProcessed: progress.Monitorable(&filesProcessed),
PackagesDiscovered: progress.Monitorable(&packagesDiscovered),
},
})
return &filesProcessed, &packagesDiscovered
}

View File

@ -1,82 +0,0 @@
package packages
import (
"github.com/anchore/syft/syft/cataloger/packages/apkdb"
"github.com/anchore/syft/syft/cataloger/packages/deb"
"github.com/anchore/syft/syft/cataloger/packages/golang"
"github.com/anchore/syft/syft/cataloger/packages/java"
"github.com/anchore/syft/syft/cataloger/packages/javascript"
"github.com/anchore/syft/syft/cataloger/packages/php"
"github.com/anchore/syft/syft/cataloger/packages/python"
"github.com/anchore/syft/syft/cataloger/packages/rpmdb"
"github.com/anchore/syft/syft/cataloger/packages/ruby"
"github.com/anchore/syft/syft/cataloger/packages/rust"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
)
// TODO: add tag-based API to select appropriate package catalogers for different scenarios
// AllCatalogers returns all implemented package catalogers
func AllCatalogers(cfg SearchConfig) []pkg.Cataloger {
return []pkg.Cataloger{
ruby.NewGemFileLockCataloger(),
ruby.NewGemSpecCataloger(),
python.NewPythonIndexCataloger(),
python.NewPythonPackageCataloger(),
javascript.NewJavascriptLockCataloger(),
javascript.NewJavascriptPackageCataloger(),
deb.NewDpkgdbCataloger(),
rpmdb.NewRpmdbCataloger(),
java.NewJavaCataloger(cfg.Java()),
apkdb.NewApkdbCataloger(),
golang.NewGoModuleBinaryCataloger(),
golang.NewGoModFileCataloger(),
rust.NewCargoLockCataloger(),
}
}
// InstalledCatalogers returns a slice of locally implemented package catalogers that are fit for detecting installations of packages.
func InstalledCatalogers(cfg SearchConfig) []pkg.Cataloger {
return []pkg.Cataloger{
ruby.NewGemSpecCataloger(),
python.NewPythonPackageCataloger(),
php.NewPHPComposerInstalledCataloger(),
javascript.NewJavascriptPackageCataloger(),
deb.NewDpkgdbCataloger(),
rpmdb.NewRpmdbCataloger(),
java.NewJavaCataloger(cfg.Java()),
apkdb.NewApkdbCataloger(),
golang.NewGoModuleBinaryCataloger(),
}
}
// IndexCatalogers returns a slice of locally implemented package catalogers that are fit for detecting packages from index files (and select installations)
func IndexCatalogers(cfg SearchConfig) []pkg.Cataloger {
return []pkg.Cataloger{
ruby.NewGemFileLockCataloger(),
python.NewPythonIndexCataloger(),
python.NewPythonPackageCataloger(), // for install
php.NewPHPComposerLockCataloger(),
javascript.NewJavascriptLockCataloger(),
deb.NewDpkgdbCataloger(), // for install
rpmdb.NewRpmdbCataloger(), // for install
java.NewJavaCataloger(cfg.Java()), // for install
apkdb.NewApkdbCataloger(), // for install
golang.NewGoModuleBinaryCataloger(), // for install
golang.NewGoModFileCataloger(),
rust.NewCargoLockCataloger(),
}
}
func CatalogersBySourceScheme(scheme source.Type, cfg SearchConfig) []pkg.Cataloger {
switch scheme {
case source.ImageType:
return InstalledCatalogers(cfg)
case source.FileType:
return AllCatalogers(cfg)
case source.DirectoryType:
return IndexCatalogers(cfg)
}
return nil
}

View File

@ -0,0 +1,24 @@
package packages
import (
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
)
func FindRelationships(catalog pkg.Collection, resolver file.Resolver) []artifact.Relationship {
var allRelationships []artifact.Relationship
for p := range catalog.Enumerate() {
relationships, err := createFileOwnershipRelationships(p, resolver)
if err != nil {
log.Warnf("unable to create any package-file relationships for package name=%q: %w", p.Name, err)
continue
}
allRelationships = append(allRelationships, relationships...)
}
allRelationships = append(allRelationships, findOwnershipByFileOverlapRelationship(catalog)...)
return allRelationships
}

View File

@ -13,15 +13,22 @@ func NewJavascriptPackageCataloger() *generic.Cataloger {
"**/package.json": parsePackageJSON, "**/package.json": parsePackageJSON,
} }
return generic.NewCataloger(nil, globParsers, "javascript-package-cataloger") return generic.NewCataloger(nil, globParsers, "javascript-package-json-cataloger")
} }
// NewJavascriptLockCataloger returns a new Javascript cataloger object base on package lock files. // NewJavascriptLockCataloger returns a new Javascript cataloger object base on package lock files.
func NewJavascriptLockCataloger() *generic.Cataloger { func NewJavascriptPackageLockCataloger() *generic.Cataloger {
globParsers := map[string]generic.Parser{ globParsers := map[string]generic.Parser{
"**/package-lock.json": parsePackageLock, "**/package-lock.json": parsePackageLock,
"**/yarn.lock": parseYarnLock,
} }
return generic.NewCataloger(nil, globParsers, "javascript-lock-cataloger") return generic.NewCataloger(nil, globParsers, "javascript-package-lock-cataloger")
}
func NewJavascriptYarnLockCataloger() *generic.Cataloger {
globParsers := map[string]generic.Parser{
"**/yarn.lock": parseYarnLock,
}
return generic.NewCataloger(nil, globParsers, "javascript-yarn-lock-cataloger")
} }

View File

@ -8,13 +8,34 @@ import (
) )
// NewPythonIndexCataloger returns a new cataloger for python packages referenced from poetry lock files, requirements.txt files, and setup.py files. // NewPythonIndexCataloger returns a new cataloger for python packages referenced from poetry lock files, requirements.txt files, and setup.py files.
func NewPythonIndexCataloger() *generic.Cataloger { func NewPythonRequirementsCataloger() *generic.Cataloger {
globParsers := map[string]generic.Parser{ globParsers := map[string]generic.Parser{
"**/*requirements*.txt": parseRequirementsTxt, "**/*requirements*.txt": parseRequirementsTxt,
"**/poetry.lock": parsePoetryLock,
"**/Pipfile.lock": parsePipfileLock,
"**/setup.py": parseSetup,
} }
return generic.NewCataloger(nil, globParsers, "python-index-cataloger") return generic.NewCataloger(nil, globParsers, "python-requirements-cataloger")
}
func NewPythonPoetryCataloger() *generic.Cataloger {
globParsers := map[string]generic.Parser{
"**/poetry.lock": parsePoetryLock,
}
return generic.NewCataloger(nil, globParsers, "python-poetry-cataloger")
}
func NewPythonPipfileCataloger() *generic.Cataloger {
globParsers := map[string]generic.Parser{
"**/Pipfile.lock": parsePipfileLock,
}
return generic.NewCataloger(nil, globParsers, "python-pipfile-cataloger")
}
func NewPythonSetupCataloger() *generic.Cataloger {
globParsers := map[string]generic.Parser{
"**/setup.py": parseSetup,
}
return generic.NewCataloger(nil, globParsers, "python-setup-cataloger")
} }

View File

@ -1,8 +1,11 @@
package pkg package packages
import ( import (
"fmt"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/bmatcuk/doublestar/v4" "github.com/bmatcuk/doublestar/v4"
"github.com/scylladb/go-set/strset" "github.com/scylladb/go-set/strset"
) )
@ -10,9 +13,9 @@ import (
var globsForbiddenFromBeingOwned = []string{ var globsForbiddenFromBeingOwned = []string{
// any OS DBs should automatically be ignored to prevent cyclic issues (e.g. the "rpm" RPM owns the path to the // any OS DBs should automatically be ignored to prevent cyclic issues (e.g. the "rpm" RPM owns the path to the
// RPM DB, so if not ignored that package would own all other packages on the system). // RPM DB, so if not ignored that package would own all other packages on the system).
ApkDBGlob, pkg.ApkDBGlob,
DpkgDBGlob, pkg.DpkgDBGlob,
RpmDBGlob, pkg.RpmDBGlob,
// DEB packages share common copyright info between, this does not mean that sharing these paths implies ownership. // DEB packages share common copyright info between, this does not mean that sharing these paths implies ownership.
"/usr/share/doc/**/copyright", "/usr/share/doc/**/copyright",
} }
@ -21,17 +24,50 @@ type ownershipByFilesMetadata struct {
Files []string `json:"files"` Files []string `json:"files"`
} }
// RelationshipsByFileOwnership creates a package-to-package relationship based on discovering which packages have func createFileOwnershipRelationships(p pkg.Package, resolver file.PathResolver) ([]artifact.Relationship, error) {
fileOwner, ok := p.Metadata.(pkg.FileOwner)
if !ok {
return nil, nil
}
var relationships []artifact.Relationship
for _, path := range fileOwner.OwnedFiles() {
locations, err := resolver.FilesByPath(path)
if err != nil {
return nil, fmt.Errorf("unable to find path for path=%q: %w", path, err)
}
if len(locations) == 0 {
// ideally we want to warn users about missing files from a package, however, it is very common for
// container image authors to delete files that are not needed in order to keep image sizes small. Adding
// a warning here would be needlessly noisy (even for popular base images).
continue
}
for _, l := range locations {
relationships = append(relationships, artifact.Relationship{
From: p,
To: l.Coordinates,
Type: artifact.ContainsRelationship,
})
}
}
return relationships, nil
}
// findOwnershipByFileOverlapRelationship creates a package-to-package relationship based on discovering which packages have
// evidence locations that overlap with ownership claim from another package's package manager metadata. // evidence locations that overlap with ownership claim from another package's package manager metadata.
func RelationshipsByFileOwnership(catalog *Collection) []artifact.Relationship { func findOwnershipByFileOverlapRelationship(catalog pkg.Collection) []artifact.Relationship {
var relationships = findOwnershipByFilesRelationships(catalog) var relationships = findFilesWithDisputedOwnership(catalog)
var edges []artifact.Relationship var edges []artifact.Relationship
for parentID, children := range relationships { for parentID, children := range relationships {
for childID, files := range children { for childID, files := range children {
edges = append(edges, artifact.Relationship{ edges = append(edges, artifact.Relationship{
From: catalog.byID[parentID], From: catalog.Package(parentID),
To: catalog.byID[childID], To: catalog.Package(childID),
Type: artifact.OwnershipByFileOverlapRelationship, Type: artifact.OwnershipByFileOverlapRelationship,
Data: ownershipByFilesMetadata{ Data: ownershipByFilesMetadata{
Files: files.List(), Files: files.List(),
@ -43,9 +79,9 @@ func RelationshipsByFileOwnership(catalog *Collection) []artifact.Relationship {
return edges return edges
} }
// findOwnershipByFilesRelationships find overlaps in file ownership with a file that defines another package. Specifically, a .Location.Path of // findFilesWithDisputedOwnership find overlaps in file ownership with a file that defines another package. Specifically, a .Location.Path of
// a package is found to be owned by another (from the owner's .Metadata.Files[]). // a package is found to be owned by another (from the owner's .Metadata.Files[]).
func findOwnershipByFilesRelationships(catalog *Collection) map[artifact.ID]map[artifact.ID]*strset.Set { func findFilesWithDisputedOwnership(catalog pkg.Collection) map[artifact.ID]map[artifact.ID]*strset.Set {
var relationships = make(map[artifact.ID]map[artifact.ID]*strset.Set) var relationships = make(map[artifact.ID]map[artifact.ID]*strset.Set)
if catalog == nil { if catalog == nil {
@ -59,7 +95,7 @@ func findOwnershipByFilesRelationships(catalog *Collection) map[artifact.ID]map[
} }
// check to see if this is a file owner // check to see if this is a file owner
pkgFileOwner, ok := candidateOwnerPkg.Metadata.(FileOwner) pkgFileOwner, ok := candidateOwnerPkg.Metadata.(pkg.FileOwner)
if !ok { if !ok {
continue continue
} }

View File

@ -1,7 +1,8 @@
package pkg package packages
import ( import (
"github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"testing" "testing"
"github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/artifact"
@ -12,20 +13,20 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
setup func(t testing.TB) ([]Package, []artifact.Relationship) setup func(t testing.TB) ([]pkg.Package, []artifact.Relationship)
}{ }{
{ {
name: "owns-by-real-path", name: "owns-by-real-path",
setup: func(t testing.TB) ([]Package, []artifact.Relationship) { setup: func(t testing.TB) ([]pkg.Package, []artifact.Relationship) {
parent := Package{ parent := pkg.Package{
Locations: []file.Location{ Locations: []file.Location{
file.NewVirtualLocation("/a/path", "/another/path"), file.NewVirtualLocation("/a/path", "/another/path"),
file.NewVirtualLocation("/b/path", "/bee/path"), file.NewVirtualLocation("/b/path", "/bee/path"),
}, },
Type: RpmPkg, Type: pkg.RpmPkg,
MetadataType: RpmdbMetadataType, MetadataType: pkg.RpmdbMetadataType,
Metadata: RpmdbMetadata{ Metadata: pkg.RpmdbMetadata{
Files: []RpmdbFileRecord{ Files: []pkg.RpmdbFileRecord{
{Path: "/owning/path/1"}, {Path: "/owning/path/1"},
{Path: "/owning/path/2"}, {Path: "/owning/path/2"},
{Path: "/d/path"}, {Path: "/d/path"},
@ -34,12 +35,12 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
} }
parent.SetID() parent.SetID()
child := Package{ child := pkg.Package{
Locations: []file.Location{ Locations: []file.Location{
file.NewVirtualLocation("/c/path", "/another/path"), file.NewVirtualLocation("/c/path", "/another/path"),
file.NewVirtualLocation("/d/path", "/another/path"), file.NewVirtualLocation("/d/path", "/another/path"),
}, },
Type: NpmPkg, Type: pkg.NpmPkg,
} }
child.SetID() child.SetID()
@ -54,21 +55,21 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
}, },
} }
return []Package{parent, child}, []artifact.Relationship{relationship} return []pkg.Package{parent, child}, []artifact.Relationship{relationship}
}, },
}, },
{ {
name: "owns-by-virtual-path", name: "owns-by-virtual-path",
setup: func(t testing.TB) ([]Package, []artifact.Relationship) { setup: func(t testing.TB) ([]pkg.Package, []artifact.Relationship) {
parent := Package{ parent := pkg.Package{
Locations: []file.Location{ Locations: []file.Location{
file.NewVirtualLocation("/a/path", "/some/other/path"), file.NewVirtualLocation("/a/path", "/some/other/path"),
file.NewVirtualLocation("/b/path", "/bee/path"), file.NewVirtualLocation("/b/path", "/bee/path"),
}, },
Type: RpmPkg, Type: pkg.RpmPkg,
MetadataType: RpmdbMetadataType, MetadataType: pkg.RpmdbMetadataType,
Metadata: RpmdbMetadata{ Metadata: pkg.RpmdbMetadata{
Files: []RpmdbFileRecord{ Files: []pkg.RpmdbFileRecord{
{Path: "/owning/path/1"}, {Path: "/owning/path/1"},
{Path: "/owning/path/2"}, {Path: "/owning/path/2"},
{Path: "/another/path"}, {Path: "/another/path"},
@ -77,12 +78,12 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
} }
parent.SetID() parent.SetID()
child := Package{ child := pkg.Package{
Locations: []file.Location{ Locations: []file.Location{
file.NewVirtualLocation("/c/path", "/another/path"), file.NewVirtualLocation("/c/path", "/another/path"),
file.NewLocation("/d/path"), file.NewLocation("/d/path"),
}, },
Type: NpmPkg, Type: pkg.NpmPkg,
} }
child.SetID() child.SetID()
@ -96,21 +97,21 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
}, },
}, },
} }
return []Package{parent, child}, []artifact.Relationship{relationship} return []pkg.Package{parent, child}, []artifact.Relationship{relationship}
}, },
}, },
{ {
name: "ignore-empty-path", name: "ignore-empty-path",
setup: func(t testing.TB) ([]Package, []artifact.Relationship) { setup: func(t testing.TB) ([]pkg.Package, []artifact.Relationship) {
parent := Package{ parent := pkg.Package{
Locations: []file.Location{ Locations: []file.Location{
file.NewVirtualLocation("/a/path", "/some/other/path"), file.NewVirtualLocation("/a/path", "/some/other/path"),
file.NewVirtualLocation("/b/path", "/bee/path"), file.NewVirtualLocation("/b/path", "/bee/path"),
}, },
Type: RpmPkg, Type: pkg.RpmPkg,
MetadataType: RpmdbMetadataType, MetadataType: pkg.RpmdbMetadataType,
Metadata: RpmdbMetadata{ Metadata: pkg.RpmdbMetadata{
Files: []RpmdbFileRecord{ Files: []pkg.RpmdbFileRecord{
{Path: "/owning/path/1"}, {Path: "/owning/path/1"},
{Path: "/owning/path/2"}, {Path: "/owning/path/2"},
{Path: ""}, {Path: ""},
@ -120,17 +121,17 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
parent.SetID() parent.SetID()
child := Package{ child := pkg.Package{
Locations: []file.Location{ Locations: []file.Location{
file.NewVirtualLocation("/c/path", "/another/path"), file.NewVirtualLocation("/c/path", "/another/path"),
file.NewLocation("/d/path"), file.NewLocation("/d/path"),
}, },
Type: NpmPkg, Type: pkg.NpmPkg,
} }
child.SetID() child.SetID()
return []Package{parent, child}, nil return []pkg.Package{parent, child}, nil
}, },
}, },
} }
@ -138,8 +139,8 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
pkgs, expectedRelations := test.setup(t) pkgs, expectedRelations := test.setup(t)
c := NewCollection(pkgs...) c := pkg.NewCollection(pkgs...)
relationships := RelationshipsByFileOwnership(c) relationships := findOwnershipByFileOverlapRelationship(c)
assert.Len(t, relationships, len(expectedRelations)) assert.Len(t, relationships, len(expectedRelations))
for idx, expectedRelationship := range expectedRelations { for idx, expectedRelationship := range expectedRelations {

View File

@ -1,9 +1,5 @@
package packages package packages
import (
"github.com/anchore/syft/syft/cataloger/packages/java"
)
type SearchConfig struct { type SearchConfig struct {
IncludeIndexedArchives bool IncludeIndexedArchives bool
IncludeUnindexedArchives bool IncludeUnindexedArchives bool
@ -15,10 +11,3 @@ func DefaultSearchConfig() SearchConfig {
IncludeUnindexedArchives: false, IncludeUnindexedArchives: false,
} }
} }
func (c SearchConfig) Java() java.CatalogerConfig {
return java.CatalogerConfig{
SearchUnindexedArchives: c.IncludeUnindexedArchives,
SearchIndexedArchives: c.IncludeIndexedArchives,
}
}

View File

@ -2,13 +2,13 @@ package syft
import ( import (
"crypto" "crypto"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/version"
"github.com/anchore/syft/syft/cataloger"
"github.com/anchore/syft/syft/cataloger/files/fileclassifier" "github.com/anchore/syft/syft/cataloger/files/fileclassifier"
"github.com/anchore/syft/syft/cataloger/files/filecontents" "github.com/anchore/syft/syft/cataloger/files/filecontents"
"github.com/anchore/syft/syft/cataloger/files/secrets" "github.com/anchore/syft/syft/cataloger/files/secrets"
"github.com/anchore/syft/syft/cataloger/packages"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/version"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
) )
@ -18,32 +18,32 @@ type CatalogingConfig struct {
ToolVersion string ToolVersion string
ToolConfiguration interface{} ToolConfiguration interface{}
// applies to all catalogers // applies to all catalogers
Scope source.Scope DefaultScope source.Scope // TODO: shouldn't this be in the package.SearchConfig?
ProcessTasksInSerial bool ProcessTasksInSerial bool // TODO: this seems a little odd, if this should be an option is this the right spot?
EnabledCatalogers []cataloger.ID
availableTasks *taskCollection
// package // package
PackageCatalogers []pkg.Cataloger PackageSearch packages.SearchConfig
// file metadata // file metadata
CaptureFileMetadata bool DigestHashes []crypto.Hash
DigestHashes []crypto.Hash
// secrets // secrets
CaptureSecrets bool SecretsSearch secrets.Config
SecretsConfig secrets.CatalogerConfig SecretsScope source.Scope
SecretsScope source.Scope
// file classification // file classification
ClassifyFiles bool
FileClassifiers []fileclassifier.Classifier FileClassifiers []fileclassifier.Classifier
// file contents // file contents
ContentsConfig filecontents.CatalogerConfig ContentsSearch filecontents.Config
} }
func DefaultCatalogingConfig() CatalogingConfig { func DefaultCatalogingConfig() CatalogingConfig {
return CatalogingConfig{ return CatalogingConfig{
Scope: source.SquashedScope, DefaultScope: source.SquashedScope,
ToolName: internal.ApplicationName, ToolName: internal.ApplicationName,
ToolVersion: version.Guess(), ToolVersion: version.Guess(),
SecretsScope: source.AllLayersScope, SecretsScope: source.AllLayersScope,
SecretsConfig: secrets.DefaultCatalogerConfig(), SecretsSearch: secrets.DefaultConfig(),
FileClassifiers: fileclassifier.DefaultClassifiers(), FileClassifiers: fileclassifier.DefaultClassifiers(),
ContentsConfig: filecontents.DefaultCatalogerConfig(), ContentsSearch: filecontents.DefaultConfig(),
PackageSearch: packages.DefaultSearchConfig(),
} }
} }

View File

@ -2,9 +2,9 @@ package syft
import ( import (
"crypto" "crypto"
"github.com/anchore/syft/syft/cataloger"
"github.com/anchore/syft/syft/cataloger/files/fileclassifier" "github.com/anchore/syft/syft/cataloger/files/fileclassifier"
"github.com/anchore/syft/syft/cataloger/files/secrets" "github.com/anchore/syft/syft/cataloger/files/secrets"
"github.com/anchore/syft/syft/cataloger/packages"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
) )
@ -27,7 +27,7 @@ func WithoutConcurrency() CatalogingOption {
func WithScope(scope source.Scope) CatalogingOption { func WithScope(scope source.Scope) CatalogingOption {
return func(_ *source.Source, config *CatalogingConfig) error { return func(_ *source.Source, config *CatalogingConfig) error {
config.Scope = scope config.DefaultScope = scope
return nil return nil
} }
} }
@ -47,47 +47,55 @@ func WithToolConfiguration(c interface{}) CatalogingOption {
} }
} }
func WithPackageCatalogers(catalogers ...pkg.Cataloger) CatalogingOption { func WithCataloger(id cataloger.ID, c pkg.Cataloger) CatalogingOption {
return func(_ *source.Source, config *CatalogingConfig) error { return func(_ *source.Source, config *CatalogingConfig) error {
config.PackageCatalogers = catalogers if config.availableTasks == nil {
return nil config.availableTasks = newTaskCollection()
}
var cfg CatalogingConfig
if config != nil {
cfg = *config
}
return config.availableTasks.add(pkgCatalogerTask{
id: id,
cataloger: c,
config: cfg,
})
} }
} }
func WithAdditionalPackageCatalogers(catalogers ...pkg.Cataloger) CatalogingOption { func WithDefaultCatalogers() CatalogingOption {
return func(_ *source.Source, config *CatalogingConfig) error {
config.PackageCatalogers = append(config.PackageCatalogers, catalogers...)
return nil
}
}
func WithDefaultPackageCatalogers(cfg packages.SearchConfig) CatalogingOption {
return func(src *source.Source, config *CatalogingConfig) error { return func(src *source.Source, config *CatalogingConfig) error {
config.PackageCatalogers = packages.CatalogersBySourceScheme(src.Metadata.Scheme, cfg) // override any previously added catalogers
config.availableTasks = newTaskCollection()
config.EnabledCatalogers = nil
return nil return nil
} }
} }
func WithFileMetadata() CatalogingOption { func WithFileMetadata() CatalogingOption {
return func(_ *source.Source, config *CatalogingConfig) error { return func(_ *source.Source, config *CatalogingConfig) error {
config.CaptureFileMetadata = true config.EnabledCatalogers = append(config.EnabledCatalogers, cataloger.FileMetadataID)
return nil return nil
} }
} }
func WithFileDigests(hashes ...crypto.Hash) CatalogingOption { func WithFileDigests(hashes ...crypto.Hash) CatalogingOption {
return func(_ *source.Source, config *CatalogingConfig) error { return func(_ *source.Source, config *CatalogingConfig) error {
config.EnabledCatalogers = append(config.EnabledCatalogers, cataloger.FileDigestsID)
config.DigestHashes = hashes config.DigestHashes = hashes
return nil return nil
} }
} }
func WithSecrets(secretConfig *secrets.CatalogerConfig) CatalogingOption { func WithSecrets(secretConfig *secrets.Config) CatalogingOption {
return func(_ *source.Source, config *CatalogingConfig) error { return func(_ *source.Source, config *CatalogingConfig) error {
config.CaptureSecrets = true config.EnabledCatalogers = append(config.EnabledCatalogers, cataloger.SecretsID)
if secretConfig != nil { if secretConfig != nil {
config.SecretsConfig = *secretConfig config.SecretsSearch = *secretConfig
} }
return nil return nil
} }
@ -95,30 +103,35 @@ func WithSecrets(secretConfig *secrets.CatalogerConfig) CatalogingOption {
func WithFileClassification() CatalogingOption { func WithFileClassification() CatalogingOption {
return func(_ *source.Source, config *CatalogingConfig) error { return func(_ *source.Source, config *CatalogingConfig) error {
config.ClassifyFiles = true if len(config.FileClassifiers) > 0 {
config.EnabledCatalogers = append(config.EnabledCatalogers, cataloger.FileClassifierID)
}
return nil return nil
} }
} }
func WithFileClassifiers(classifiers ...fileclassifier.Classifier) CatalogingOption { func WithFileClassifiers(classifiers ...fileclassifier.Classifier) CatalogingOption {
return func(_ *source.Source, config *CatalogingConfig) error { return func(_ *source.Source, config *CatalogingConfig) error {
config.ClassifyFiles = !(len(classifiers) > 0)
config.FileClassifiers = classifiers config.FileClassifiers = classifiers
if len(config.FileClassifiers) > 0 {
config.EnabledCatalogers = append(config.EnabledCatalogers, cataloger.FileClassifierID)
}
return nil return nil
} }
} }
func WithFileContents(globs ...string) CatalogingOption { func WithFileContents(globs ...string) CatalogingOption {
return func(_ *source.Source, config *CatalogingConfig) error { return func(_ *source.Source, config *CatalogingConfig) error {
config.ContentsConfig.Globs = globs config.EnabledCatalogers = append(config.EnabledCatalogers, cataloger.FileContentsID)
config.ContentsSearch.Globs = globs
return nil return nil
} }
} }
func WithFileSizeLimit(byteLimit int64) CatalogingOption { func WithFileSizeLimit(byteLimit int64) CatalogingOption {
return func(_ *source.Source, config *CatalogingConfig) error { return func(_ *source.Source, config *CatalogingConfig) error {
config.ContentsConfig.SkipFilesAboveSizeInBytes = byteLimit config.ContentsSearch.SkipFilesAboveSizeInBytes = byteLimit
config.SecretsConfig.MaxFileSize = byteLimit config.SecretsSearch.MaxFileSize = byteLimit
return nil return nil
} }
} }

View File

@ -10,8 +10,8 @@ const (
// AppUpdateAvailable is a partybus event that occurs when an application update is available // AppUpdateAvailable is a partybus event that occurs when an application update is available
AppUpdateAvailable partybus.EventType = "syft-app-update-available" AppUpdateAvailable partybus.EventType = "syft-app-update-available"
// PackageCatalogerStarted is a partybus event that occurs when the package cataloging has begun // CatalogingStarted is a partybus event that occurs when the first cataloger has started
PackageCatalogerStarted partybus.EventType = "syft-package-cataloger-started-event" CatalogingStarted partybus.EventType = "syft-cataloging-started-event"
// nolint:gosec // nolint:gosec
// SecretsCatalogerStarted is a partybus event that occurs when the secrets cataloging has begun // SecretsCatalogerStarted is a partybus event that occurs when the secrets cataloging has begun

View File

@ -4,8 +4,8 @@ import (
"github.com/wagoodman/go-progress" "github.com/wagoodman/go-progress"
) )
// PackageCatalogerMonitor provides progress-related data for observing the progress of a Catalog() call (published on the event bus). // CatalogingMonitor provides progress-related data for observing the progress of a Catalog() call (published on the event bus).
type PackageCatalogerMonitor struct { type CatalogingMonitor struct {
FilesProcessed progress.Monitorable // the number of files selected and contents analyzed from all registered catalogers FilesProcessed progress.Monitorable // the number of files selected and contents analyzed from all registered catalogers
PackagesDiscovered progress.Monitorable // the number of packages discovered from all registered catalogers PackagesDiscovered progress.Monitorable // the number of packages discovered from all registered catalogers
} }

View File

@ -38,12 +38,12 @@ func checkEventType(actual, expected partybus.EventType) error {
return nil return nil
} }
func ParsePackageCatalogerStarted(e partybus.Event) (*monitor.PackageCatalogerMonitor, error) { func ParsePackageCatalogerStarted(e partybus.Event) (*monitor.CatalogingMonitor, error) {
if err := checkEventType(e.Type, event.PackageCatalogerStarted); err != nil { if err := checkEventType(e.Type, event.CatalogingStarted); err != nil {
return nil, err return nil, err
} }
monitor, ok := e.Value.(monitor.PackageCatalogerMonitor) monitor, ok := e.Value.(monitor.CatalogingMonitor)
if !ok { if !ok {
return nil, newPayloadErr(e.Type, "Value", e.Value) return nil, newPayloadErr(e.Type, "Value", e.Value)
} }

View File

@ -11,7 +11,17 @@ import (
) )
// Collection represents a collection of Packages. // Collection represents a collection of Packages.
type Collection struct { type Collection interface {
Size() int
Package(id artifact.ID) *Package
PackagesByPath(path string) []Package
Packages(ids []artifact.ID) (result []Package)
Add(p Package)
Enumerate(types ...Type) <-chan Package
Sorted(types ...Type) (pkgs []Package)
}
type collection struct {
byID map[artifact.ID]Package byID map[artifact.ID]Package
idsByType map[Type][]artifact.ID idsByType map[Type][]artifact.ID
idsByPath map[string][]artifact.ID // note: this is real path or virtual path idsByPath map[string][]artifact.ID // note: this is real path or virtual path
@ -19,8 +29,8 @@ type Collection struct {
} }
// NewCollection returns a new empty Collection // NewCollection returns a new empty Collection
func NewCollection(pkgs ...Package) *Collection { func NewCollection(pkgs ...Package) Collection {
catalog := Collection{ catalog := &collection{
byID: make(map[artifact.ID]Package), byID: make(map[artifact.ID]Package),
idsByType: make(map[Type][]artifact.ID), idsByType: make(map[Type][]artifact.ID),
idsByPath: make(map[string][]artifact.ID), idsByPath: make(map[string][]artifact.ID),
@ -30,16 +40,16 @@ func NewCollection(pkgs ...Package) *Collection {
catalog.Add(p) catalog.Add(p)
} }
return &catalog return catalog
} }
// PackageCount returns the total number of packages that have been added. // Size returns the total number of packages that have been added.
func (c *Collection) PackageCount() int { func (c *collection) Size() int {
return len(c.byID) return len(c.byID)
} }
// Package returns the package with the given ID. // Package returns the package with the given ID.
func (c *Collection) Package(id artifact.ID) *Package { func (c *collection) Package(id artifact.ID) *Package {
v, exists := c.byID[id] v, exists := c.byID[id]
if !exists { if !exists {
return nil return nil
@ -54,12 +64,12 @@ func (c *Collection) Package(id artifact.ID) *Package {
} }
// PackagesByPath returns all packages that were discovered from the given path. // PackagesByPath returns all packages that were discovered from the given path.
func (c *Collection) PackagesByPath(path string) []Package { func (c *collection) PackagesByPath(path string) []Package {
return c.Packages(c.idsByPath[path]) return c.Packages(c.idsByPath[path])
} }
// Packages returns all packages for the given ID. // Packages returns all packages for the given ID.
func (c *Collection) Packages(ids []artifact.ID) (result []Package) { func (c *collection) Packages(ids []artifact.ID) (result []Package) {
for _, i := range ids { for _, i := range ids {
p, exists := c.byID[i] p, exists := c.byID[i]
if exists { if exists {
@ -70,7 +80,7 @@ func (c *Collection) Packages(ids []artifact.ID) (result []Package) {
} }
// Add a package to the Collection. // Add a package to the Collection.
func (c *Collection) Add(p Package) { func (c *collection) Add(p Package) {
c.lock.Lock() c.lock.Lock()
defer c.lock.Unlock() defer c.lock.Unlock()
@ -102,7 +112,7 @@ func (c *Collection) Add(p Package) {
} }
// Enumerate all packages for the given type(s), enumerating all packages if no type is specified. // Enumerate all packages for the given type(s), enumerating all packages if no type is specified.
func (c *Collection) Enumerate(types ...Type) <-chan Package { func (c *collection) Enumerate(types ...Type) <-chan Package {
channel := make(chan Package) channel := make(chan Package)
go func() { go func() {
defer close(channel) defer close(channel)
@ -135,9 +145,8 @@ func (c *Collection) Enumerate(types ...Type) <-chan Package {
return channel return channel
} }
// Sorted enumerates all packages for the given types sorted by package name. Enumerates all packages if no type // Sorted enumerates all packages for the given types sorted by package name. Enumerates all packages if no type is specified.
// is specified. func (c *collection) Sorted(types ...Type) (pkgs []Package) {
func (c *Collection) Sorted(types ...Type) (pkgs []Package) {
for p := range c.Enumerate(types...) { for p := range c.Enumerate(types...) {
pkgs = append(pkgs, p) pkgs = append(pkgs, p)
} }

View File

@ -1,8 +0,0 @@
package pkg
import "github.com/anchore/syft/syft/artifact"
// TODO: as more relationships are added, this function signature will probably accommodate selection
func NewRelationships(catalog *Collection) []artifact.Relationship {
return RelationshipsByFileOwnership(catalog)
}

View File

@ -16,7 +16,7 @@ type SBOM struct {
} }
type Artifacts struct { type Artifacts struct {
PackageCatalog *pkg.Collection Packages pkg.Collection
FileMetadata map[file.Coordinates]file.Metadata FileMetadata map[file.Coordinates]file.Metadata
FileDigests map[file.Coordinates][]file.Digest FileDigests map[file.Coordinates][]file.Digest
FileClassifications map[file.Coordinates][]file.Classification FileClassifications map[file.Coordinates][]file.Classification

View File

@ -0,0 +1,16 @@
package speculate
import (
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/linux"
"github.com/anchore/syft/syft/speculate/cpes"
)
func Identifiers(p *pkg.Package, release *linux.Release) {
// generate CPEs (note: this is excluded from package ID, so is safe to mutate)
p.CPEs = cpes.Generate(*p)
// generate PURL (note: this is excluded from package ID, so is safe to mutate)
p.PURL = pkg.URL(*p, release)
}

233
syft/task_collection.go Normal file
View File

@ -0,0 +1,233 @@
package syft
import (
"fmt"
"github.com/anchore/syft/syft/cataloger"
"github.com/scylladb/go-set/strset"
"sort"
"strings"
)
const (
packageTaskLabel = "package"
fileTaskLabel = "file"
osTaskLabel = "os"
languageTaskLabel = "language"
installedTaskLabel = "installed"
declaredTaskLabel = "declared"
)
type taskCollection struct {
taskByName map[string]task // name -> generator
namesByLabel map[string][]string // label -> names
}
func newTaskCollection() *taskCollection {
return &taskCollection{
taskByName: make(map[string]task),
namesByLabel: make(map[string][]string),
}
}
func (c *taskCollection) add(t task, labels ...string) error {
var name string
switch v := t.(type) {
case pkgCatalogerTask:
name = string(v.id)
case catalogerTask:
name = string(v.id)
default:
if len(labels) == 0 {
return fmt.Errorf("no ID found for generic task")
}
name = labels[0]
}
if _, exists := c.taskByName[name]; exists {
return fmt.Errorf("task already exists: %q", name)
}
c.taskByName[name] = t
labelSet := strset.New(labels...)
labelSet.Add(name)
for _, n := range labelSet.List() {
c.namesByLabel[n] = append(c.namesByLabel[n], name)
}
return nil
}
func (c *taskCollection) addAllCatalogers(config CatalogingConfig) error {
for _, d := range []struct {
generator taskGenerator
labels []string
}{
{
generator: newAPKDBCatalogingTask,
labels: []string{packageTaskLabel, osTaskLabel, installedTaskLabel, "alpine", "apk", "apkdb"},
},
{
generator: newDPKGCatalogingTask,
labels: []string{packageTaskLabel, osTaskLabel, installedTaskLabel, "debian", "dpkg", "deb", "dpkgdb"},
},
{
generator: newRPMDBCatalogingTask,
labels: []string{packageTaskLabel, osTaskLabel, installedTaskLabel, "redhat", "rhel", "centos", "rpm", "rpmdb"},
},
{
generator: newRubyGemSpecCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, installedTaskLabel, "ruby", "gemspec", "gem"},
},
{
generator: newRubyGemFileLockCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "ruby", "gemfile", "gem", "gemfile.lock"},
},
{
generator: newPythonPackageCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, installedTaskLabel, "python", "egg", "wheel"},
},
{
generator: newPythonRequirementsCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "python", "requirements", "requirements.txt"},
},
{
generator: newPythonPoetryCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "python", "poetry", "poetry.lock"},
},
{
generator: newPythonSetupCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "python", "setup", "setup.py"},
},
{
generator: newPythonPipfileCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "python", "pip", "pipfile"},
},
{
generator: newJavascriptPackageJSONCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, installedTaskLabel, "javascript", "node", "package.json"},
},
{
generator: newJavascriptPackageLockCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "javascript", "node", "package-lock.json"},
},
{
generator: newJavascriptYarnLockCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "javascript", "node", "yarn", "yarn.lock"},
},
{
generator: newJavaCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, installedTaskLabel, "java", "maven", "jar", "war", "ear", "jenkins", "hudson", "hpi", "jpi", "par", "sar", "lpkg"},
},
{
generator: newGolangModuleCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "go", "golang", "go-module", "go.mod"},
},
{
generator: newGolangBinaryCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, installedTaskLabel, "go", "golang", "go-module", "binary"},
},
{
generator: newRustCargoLockCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "rust", "cargo", "cargo.lock"},
},
{
generator: newPHPInstalledCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, installedTaskLabel, "php", "composer", "installed.json"},
},
{
generator: newPHPComposerLockCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "php", "composer", "composer.lock"},
},
{
generator: newFileMetadataCatalogingTask,
labels: []string{fileTaskLabel},
},
{
generator: newFileDigestsCatalogingTask,
labels: []string{fileTaskLabel, "digests", "digest", "file-digests"},
},
{
generator: newSecretsCatalogingTask,
labels: []string{"secrets"},
},
{
generator: newFileClassifierTask,
labels: []string{fileTaskLabel, "classifier"},
},
{
generator: newFileContentsCatalogingTask,
labels: []string{fileTaskLabel, "contents", "content", "file-contents"},
},
} {
t, err := d.generator(config)
if err != nil {
return err
}
if t == nil {
continue
}
if err := c.add(t, d.labels...); err != nil {
return err
}
}
return nil
}
func (c taskCollection) query(q string) []cataloger.ID {
fields := strings.FieldsFunc(q, func(r rune) bool {
switch r {
case '+', ',', '&':
return true
}
return false
})
return c.withLabels(fields...)
}
func (c taskCollection) all() []cataloger.ID {
var ret []cataloger.ID
for k := range c.taskByName {
ret = append(ret, cataloger.ID(k))
}
sort.Sort(cataloger.IDs(ret))
return ret
}
func (c taskCollection) withLabels(q ...string) []cataloger.ID {
req := strset.New()
for i, f := range q {
switch i {
case 0:
req.Add(c.namesByLabel[f]...)
continue
default:
req = strset.Intersection(req, strset.New(c.namesByLabel[f]...))
}
}
var ret []cataloger.ID
for _, i := range req.List() {
ret = append(ret, cataloger.ID(i))
}
// ensure stable results
sort.Sort(cataloger.IDs(ret))
return ret
}
func (c taskCollection) tasks(ids ...cataloger.ID) (ts []task) {
for _, id := range ids {
t, exists := c.taskByName[string(id)]
if !exists {
continue
}
ts = append(ts, t)
}
return ts
}

View File

@ -2,11 +2,24 @@ package syft
import ( import (
"fmt" "fmt"
"github.com/anchore/syft/syft/cataloger"
"github.com/anchore/syft/syft/cataloger/files/fileclassifier" "github.com/anchore/syft/syft/cataloger/files/fileclassifier"
"github.com/anchore/syft/syft/cataloger/files/filecontents" "github.com/anchore/syft/syft/cataloger/files/filecontents"
"github.com/anchore/syft/syft/cataloger/files/filedigests" "github.com/anchore/syft/syft/cataloger/files/filedigests"
"github.com/anchore/syft/syft/cataloger/files/filemetadata" "github.com/anchore/syft/syft/cataloger/files/filemetadata"
"github.com/anchore/syft/syft/cataloger/files/secrets" "github.com/anchore/syft/syft/cataloger/files/secrets"
"github.com/anchore/syft/syft/cataloger/packages/apkdb"
"github.com/anchore/syft/syft/cataloger/packages/deb"
"github.com/anchore/syft/syft/cataloger/packages/golang"
"github.com/anchore/syft/syft/cataloger/packages/java"
"github.com/anchore/syft/syft/cataloger/packages/javascript"
"github.com/anchore/syft/syft/cataloger/packages/php"
"github.com/anchore/syft/syft/cataloger/packages/python"
"github.com/anchore/syft/syft/cataloger/packages/rpmdb"
"github.com/anchore/syft/syft/cataloger/packages/ruby"
"github.com/anchore/syft/syft/cataloger/packages/rust"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/speculate"
"github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/cataloger/packages" "github.com/anchore/syft/syft/cataloger/packages"
@ -15,152 +28,370 @@ import (
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
) )
type task func(*sbom.Artifacts, *source.Source) ([]artifact.Relationship, error)
type taskGenerator func(CatalogingConfig) (task, error) type taskGenerator func(CatalogingConfig) (task, error)
func generatePackagesCatalogingTask(config CatalogingConfig) (task, error) { type task interface {
if len(config.PackageCatalogers) == 0 { Run(*sbom.Artifacts, *source.Source) ([]artifact.Relationship, error)
return nil, nil }
type genericTask struct {
run func(*sbom.Artifacts, *source.Source) ([]artifact.Relationship, error)
}
func (t genericTask) Run(artifacts *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
return t.run(artifacts, src)
}
type catalogerTask struct {
id cataloger.ID
genericTask
}
type pkgCatalogerTask struct {
id cataloger.ID
cataloger pkg.Cataloger
config CatalogingConfig
}
func (t pkgCatalogerTask) Run(artifacts *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(t.config.DefaultScope)
if err != nil {
return nil, fmt.Errorf("unable to determine resolver while cataloging packages: %w", err)
} }
return func(artifacts *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { // catalog packages
resolver, err := src.FileResolver(config.Scope) pkgs, relationships, err := t.cataloger.Catalog(resolver)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to determine resolver while cataloging packages: %w", err) return nil, err
} }
// find the distro for _, p := range pkgs {
artifacts.LinuxDistribution = linux.IdentifyRelease(resolver) p.FoundBy = string(t.id)
speculate.Identifiers(&p, artifacts.LinuxDistribution)
p.SetID()
artifacts.Packages.Add(p)
}
// catalog packages return relationships, nil
catalog, relationships, err := packages.Catalog(resolver, artifacts.LinuxDistribution, config.PackageCatalogers...) }
if err != nil {
return nil, err
}
artifacts.PackageCatalog = catalog
return relationships, nil func newIdentifyDistroTask(config CatalogingConfig) (task, error) {
return genericTask{
run: func(artifacts *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(config.DefaultScope)
if err != nil {
return nil, fmt.Errorf("unable to determine resolver while determining linux distro: %w", err)
}
artifacts.LinuxDistribution = linux.IdentifyRelease(resolver)
return nil, nil
},
}, nil }, nil
} }
func generateFileMetadataCatalogingTask(config CatalogingConfig) (task, error) { func newAPKDBCatalogingTask(config CatalogingConfig) (task, error) {
if !config.CaptureFileMetadata { return pkgCatalogerTask{
return nil, nil id: cataloger.ApkDBID,
} cataloger: apkdb.NewApkdbCataloger(),
config: config,
cataloger := filemetadata.NewCataloger()
return func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(config.Scope)
if err != nil {
return nil, err
}
result, err := cataloger.Catalog(resolver)
if err != nil {
return nil, err
}
results.FileMetadata = result
return nil, nil
}, nil }, nil
} }
func generateFileDigestsCatalogingTask(config CatalogingConfig) (task, error) { func newDPKGCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.DpkgID,
cataloger: deb.NewDpkgdbCataloger(),
config: config,
}, nil
}
func newGolangBinaryCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.GoBinaryID,
cataloger: golang.NewGoModuleBinaryCataloger(),
config: config,
}, nil
}
func newGolangModuleCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.GoModID,
cataloger: golang.NewGoModFileCataloger(),
config: config,
}, nil
}
func newJavaCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.JavaArchiveID,
cataloger: java.NewJavaCataloger(java.CatalogerConfig{
SearchUnindexedArchives: config.PackageSearch.IncludeUnindexedArchives,
SearchIndexedArchives: config.PackageSearch.IncludeIndexedArchives,
}),
config: config,
}, nil
}
func newJavascriptPackageJSONCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.JavascriptPackageJSONID,
cataloger: javascript.NewJavascriptPackageCataloger(),
config: config,
}, nil
}
func newJavascriptPackageLockCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.JavascriptPackageLockID,
cataloger: javascript.NewJavascriptPackageLockCataloger(),
config: config,
}, nil
}
func newJavascriptYarnLockCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.JavaScriptYarnLockID,
cataloger: javascript.NewJavascriptYarnLockCataloger(),
config: config,
}, nil
}
func newPHPComposerLockCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.PHPComposerLockID,
cataloger: php.NewPHPComposerLockCataloger(),
config: config,
}, nil
}
func newPHPInstalledCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.PHPInstalledJSONID,
cataloger: php.NewPHPComposerInstalledCataloger(),
config: config,
}, nil
}
func newPythonPackageCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.PythonPackageID,
cataloger: python.NewPythonPackageCataloger(),
config: config,
}, nil
}
func newPythonRequirementsCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.PythonRequirementsID,
cataloger: python.NewPythonRequirementsCataloger(),
config: config,
}, nil
}
func newPythonPoetryCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.PythonPoetryID,
cataloger: python.NewPythonPoetryCataloger(),
config: config,
}, nil
}
func newPythonPipfileCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.PythonPipFileID,
cataloger: python.NewPythonPipfileCataloger(),
config: config,
}, nil
}
func newPythonSetupCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.PythonSetupID,
cataloger: python.NewPythonSetupCataloger(),
config: config,
}, nil
}
func newRPMDBCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.RpmDBID,
cataloger: rpmdb.NewRpmdbCataloger(),
config: config,
}, nil
}
func newRubyGemFileLockCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.RubyGemfileLockID,
cataloger: ruby.NewGemFileLockCataloger(),
config: config,
}, nil
}
func newRubyGemSpecCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.RubyGemspecID,
cataloger: ruby.NewGemSpecCataloger(),
config: config,
}, nil
}
func newRustCargoLockCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.RustCargoLockID,
cataloger: rust.NewCargoLockCataloger(),
config: config,
}, nil
}
func newFileMetadataCatalogingTask(config CatalogingConfig) (task, error) {
c := filemetadata.NewCataloger()
return catalogerTask{
id: cataloger.FileMetadataID,
genericTask: genericTask{
run: func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(config.DefaultScope)
if err != nil {
return nil, err
}
result, err := c.Catalog(resolver)
if err != nil {
return nil, err
}
results.FileMetadata = result
return nil, nil
},
},
}, nil
}
func newFileDigestsCatalogingTask(config CatalogingConfig) (task, error) {
if len(config.DigestHashes) == 0 { if len(config.DigestHashes) == 0 {
return nil, nil return nil, nil
} }
cataloger, err := filedigests.NewCataloger(config.DigestHashes) c, err := filedigests.NewCataloger(config.DigestHashes)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { return catalogerTask{
resolver, err := src.FileResolver(config.Scope) id: cataloger.FileDigestsID,
if err != nil { genericTask: genericTask{
return nil, err run: func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
} resolver, err := src.FileResolver(config.DefaultScope)
if err != nil {
return nil, err
}
result, err := cataloger.Catalog(resolver) result, err := c.Catalog(resolver)
if err != nil { if err != nil {
return nil, err return nil, err
} }
results.FileDigests = result results.FileDigests = result
return nil, nil return nil, nil
},
},
}, nil }, nil
} }
func generateContentsCatalogingTask(config CatalogingConfig) (task, error) { func newFileContentsCatalogingTask(config CatalogingConfig) (task, error) {
if len(config.ContentsConfig.Globs) == 0 { if len(config.ContentsSearch.Globs) == 0 {
return nil, nil return nil, nil
} }
cataloger, err := filecontents.NewCataloger(config.ContentsConfig) c, err := filecontents.NewCataloger(config.ContentsSearch)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { return catalogerTask{
resolver, err := src.FileResolver(config.Scope) id: cataloger.FileContentsID,
if err != nil { genericTask: genericTask{
return nil, err run: func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
} resolver, err := src.FileResolver(config.DefaultScope)
if err != nil {
return nil, err
}
result, err := cataloger.Catalog(resolver) result, err := c.Catalog(resolver)
if err != nil { if err != nil {
return nil, err return nil, err
} }
results.FileContents = result results.FileContents = result
return nil, nil return nil, nil
},
},
}, nil }, nil
} }
func generateSecretsCatalogingTask(config CatalogingConfig) (task, error) { func newSecretsCatalogingTask(config CatalogingConfig) (task, error) {
if !config.CaptureSecrets {
return nil, nil
}
cataloger, err := secrets.NewCataloger(config.SecretsConfig) c, err := secrets.NewCataloger(config.SecretsSearch)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { return catalogerTask{
resolver, err := src.FileResolver(config.SecretsScope) id: cataloger.SecretsID,
if err != nil { genericTask: genericTask{
return nil, err run: func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
} resolver, err := src.FileResolver(config.SecretsScope)
if err != nil {
return nil, err
}
result, err := cataloger.Catalog(resolver) result, err := c.Catalog(resolver)
if err != nil { if err != nil {
return nil, err return nil, err
} }
results.Secrets = result results.Secrets = result
return nil, nil return nil, nil
},
},
}, nil }, nil
} }
func generateFileClassifierTask(config CatalogingConfig) (task, error) { func newFileClassifierTask(config CatalogingConfig) (task, error) {
if !config.ClassifyFiles {
return nil, nil
}
cataloger, err := fileclassifier.NewCataloger(config.FileClassifiers) c, err := fileclassifier.NewCataloger(config.FileClassifiers)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { return catalogerTask{
resolver, err := src.FileResolver(config.Scope) id: cataloger.FileClassifierID,
if err != nil { genericTask: genericTask{
return nil, err run: func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
} resolver, err := src.FileResolver(config.DefaultScope)
if err != nil {
return nil, err
}
result, err := cataloger.Catalog(resolver) result, err := c.Catalog(resolver)
if err != nil { if err != nil {
return nil, err return nil, err
} }
results.FileClassifications = result results.FileClassifications = result
return nil, nil return nil, nil
},
},
}, nil
}
func newSynthesizePackageRelationshipsTasks(config CatalogingConfig) (task, error) {
return genericTask{
run: func(artifacts *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(config.DefaultScope)
if err != nil {
return nil, err
}
return packages.FindRelationships(artifacts.Packages, resolver), nil
},
}, nil }, nil
} }

View File

@ -20,7 +20,7 @@ func BenchmarkImagePackageCatalogers(b *testing.B) {
imagetest.GetFixtureImage(b, "docker-archive", fixtureImageName) imagetest.GetFixtureImage(b, "docker-archive", fixtureImageName)
tarPath := imagetest.GetFixtureImageTarPath(b, fixtureImageName) tarPath := imagetest.GetFixtureImageTarPath(b, fixtureImageName)
var pc *pkg.Collection var pc pkg.Collection
for _, c := range packages.InstalledCatalogers(packages.DefaultSearchConfig()) { for _, c := range packages.InstalledCatalogers(packages.DefaultSearchConfig()) {
// in case of future alteration where state is persisted, assume no dependency is safe to reuse // in case of future alteration where state is persisted, assume no dependency is safe to reuse
userInput := "docker-archive:" + tarPath userInput := "docker-archive:" + tarPath
@ -48,7 +48,7 @@ func BenchmarkImagePackageCatalogers(b *testing.B) {
} }
}) })
b.Logf("catalog for %q number of packages: %d", c.Name(), pc.PackageCount()) b.Logf("catalog for %q number of packages: %d", c.Name(), pc.Size())
} }
} }
@ -84,7 +84,7 @@ func TestPkgCoverageImage(t *testing.T) {
t.Run(c.name, func(t *testing.T) { t.Run(c.name, func(t *testing.T) {
pkgCount := 0 pkgCount := 0
for a := range sbom.Artifacts.PackageCatalog.Enumerate(c.pkgType) { for a := range sbom.Artifacts.Packages.Enumerate(c.pkgType) {
if a.Language.String() != "" { if a.Language.String() != "" {
observedLanguages.Add(a.Language.String()) observedLanguages.Add(a.Language.String())
@ -112,7 +112,7 @@ func TestPkgCoverageImage(t *testing.T) {
if pkgCount != len(c.pkgInfo)+c.duplicates { if pkgCount != len(c.pkgInfo)+c.duplicates {
t.Logf("Discovered packages of type %+v", c.pkgType) t.Logf("Discovered packages of type %+v", c.pkgType)
for a := range sbom.Artifacts.PackageCatalog.Enumerate(c.pkgType) { for a := range sbom.Artifacts.Packages.Enumerate(c.pkgType) {
t.Log(" ", a) t.Log(" ", a)
} }
t.Fatalf("unexpected package count: %d!=%d", pkgCount, len(c.pkgInfo)) t.Fatalf("unexpected package count: %d!=%d", pkgCount, len(c.pkgInfo))
@ -161,7 +161,7 @@ func TestPkgCoverageDirectory(t *testing.T) {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
actualPkgCount := 0 actualPkgCount := 0
for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(test.pkgType) { for actualPkg := range sbom.Artifacts.Packages.Enumerate(test.pkgType) {
observedLanguages.Add(actualPkg.Language.String()) observedLanguages.Add(actualPkg.Language.String())
observedPkgs.Add(string(actualPkg.Type)) observedPkgs.Add(string(actualPkg.Type))
@ -186,7 +186,7 @@ func TestPkgCoverageDirectory(t *testing.T) {
} }
if actualPkgCount != len(test.pkgInfo)+test.duplicates { if actualPkgCount != len(test.pkgInfo)+test.duplicates {
for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(test.pkgType) { for actualPkg := range sbom.Artifacts.Packages.Enumerate(test.pkgType) {
t.Log(" ", actualPkg) t.Log(" ", actualPkg)
} }
t.Fatalf("unexpected package count: %d!=%d", actualPkgCount, len(test.pkgInfo)) t.Fatalf("unexpected package count: %d!=%d", actualPkgCount, len(test.pkgInfo))

View File

@ -13,7 +13,7 @@ func TestNpmPackageLockDirectory(t *testing.T) {
foundPackages := internal.NewStringSet() foundPackages := internal.NewStringSet()
for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.NpmPkg) { for actualPkg := range sbom.Artifacts.Packages.Enumerate(pkg.NpmPkg) {
for _, actualLocation := range actualPkg.Locations { for _, actualLocation := range actualPkg.Locations {
if strings.Contains(actualLocation.RealPath, "node_modules") { if strings.Contains(actualLocation.RealPath, "node_modules") {
t.Errorf("found packages from package-lock.json in node_modules: %s", actualLocation) t.Errorf("found packages from package-lock.json in node_modules: %s", actualLocation)
@ -34,7 +34,7 @@ func TestYarnPackageLockDirectory(t *testing.T) {
foundPackages := internal.NewStringSet() foundPackages := internal.NewStringSet()
for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.NpmPkg) { for actualPkg := range sbom.Artifacts.Packages.Enumerate(pkg.NpmPkg) {
for _, actualLocation := range actualPkg.Locations { for _, actualLocation := range actualPkg.Locations {
if strings.Contains(actualLocation.RealPath, "node_modules") { if strings.Contains(actualLocation.RealPath, "node_modules") {
t.Errorf("found packages from yarn.lock in node_modules: %s", actualLocation) t.Errorf("found packages from yarn.lock in node_modules: %s", actualLocation)

View File

@ -13,7 +13,7 @@ func TestRegression212ApkBufferSize(t *testing.T) {
expectedPkgs := 58 expectedPkgs := 58
actualPkgs := 0 actualPkgs := 0
for range sbom.Artifacts.PackageCatalog.Enumerate(pkg.ApkPkg) { for range sbom.Artifacts.Packages.Enumerate(pkg.ApkPkg) {
actualPkgs += 1 actualPkgs += 1
} }

View File

@ -19,7 +19,7 @@ func TestRegressionGoArchDiscovery(t *testing.T) {
var actualELF, actualWIN, actualMACOS int var actualELF, actualWIN, actualMACOS int
for p := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.GoModulePkg) { for p := range sbom.Artifacts.Packages.Enumerate(pkg.GoModulePkg) {
for _, l := range p.Locations { for _, l := range p.Locations {
switch { switch {
case strings.Contains(l.RealPath, "elf"): case strings.Contains(l.RealPath, "elf"):

View File

@ -33,7 +33,7 @@ func catalogFixtureImage(t *testing.T, fixtureImageName string) (sbom.SBOM, *sou
return sbom.SBOM{ return sbom.SBOM{
Artifacts: sbom.Artifacts{ Artifacts: sbom.Artifacts{
PackageCatalog: pkgCatalog, Packages: pkgCatalog,
LinuxDistribution: release, LinuxDistribution: release,
}, },
Relationships: relationships, Relationships: relationships,
@ -69,7 +69,7 @@ func catalogDirectory(t *testing.T, dir string) (sbom.SBOM, *source.Source) {
return sbom.SBOM{ return sbom.SBOM{
Artifacts: sbom.Artifacts{ Artifacts: sbom.Artifacts{
PackageCatalog: pkgCatalog, Packages: pkgCatalog,
LinuxDistribution: release, LinuxDistribution: release,
}, },
Relationships: relationships, Relationships: relationships,

View File

@ -15,7 +15,7 @@ import (
"github.com/wagoodman/jotframe/pkg/frame" "github.com/wagoodman/jotframe/pkg/frame"
) )
// Handler is an aggregated event handler for the set of supported events (PullDockerImage, ReadImage, FetchImage, PackageCatalogerStarted) // Handler is an aggregated event handler for the set of supported events (PullDockerImage, ReadImage, FetchImage, CatalogingStarted)
type Handler struct { type Handler struct {
} }
@ -27,7 +27,7 @@ func NewHandler() *Handler {
// RespondsTo indicates if the handler is capable of handling the given event. // RespondsTo indicates if the handler is capable of handling the given event.
func (r *Handler) RespondsTo(event partybus.Event) bool { func (r *Handler) RespondsTo(event partybus.Event) bool {
switch event.Type { switch event.Type {
case stereoscopeEvent.PullDockerImage, stereoscopeEvent.ReadImage, stereoscopeEvent.FetchImage, syftEvent.PackageCatalogerStarted, syftEvent.SecretsCatalogerStarted, syftEvent.FileDigestsCatalogerStarted, syftEvent.FileMetadataCatalogerStarted, syftEvent.FileIndexingStarted, syftEvent.ImportStarted: case stereoscopeEvent.PullDockerImage, stereoscopeEvent.ReadImage, stereoscopeEvent.FetchImage, syftEvent.CatalogingStarted, syftEvent.SecretsCatalogerStarted, syftEvent.FileDigestsCatalogerStarted, syftEvent.FileMetadataCatalogerStarted, syftEvent.FileIndexingStarted, syftEvent.ImportStarted:
return true return true
default: default:
return false return false
@ -46,7 +46,7 @@ func (r *Handler) Handle(ctx context.Context, fr *frame.Frame, event partybus.Ev
case stereoscopeEvent.FetchImage: case stereoscopeEvent.FetchImage:
return FetchImageHandler(ctx, fr, event, wg) return FetchImageHandler(ctx, fr, event, wg)
case syftEvent.PackageCatalogerStarted: case syftEvent.CatalogingStarted:
return PackageCatalogerStartedHandler(ctx, fr, event, wg) return PackageCatalogerStartedHandler(ctx, fr, event, wg)
case syftEvent.SecretsCatalogerStarted: case syftEvent.SecretsCatalogerStarted: