diff --git a/cmd/attest.go b/cmd/attest.go index d4c7a33e1..7711c8c90 100644 --- a/cmd/attest.go +++ b/cmd/attest.go @@ -193,6 +193,12 @@ func attestationExecWorker(si source.Input, format sbom.Format, predicateType st go func() { defer close(errs) + catalogingConfig, err := appConfig.ToCatalogingConfig() + if err != nil { + errs <- err + return + } + src, cleanup, err := source.NewFromRegistry(si, appConfig.Registry.ToOptions(), appConfig.Exclusions) if cleanup != nil { defer cleanup() @@ -202,7 +208,7 @@ func attestationExecWorker(si source.Input, format sbom.Format, predicateType st return } - s, err := generateSBOM(src) + s, err := generateSBOM(src, catalogingConfig) if err != nil { errs <- err return diff --git a/cmd/packages.go b/cmd/packages.go index c99d0b99e..bb5485474 100644 --- a/cmd/packages.go +++ b/cmd/packages.go @@ -94,7 +94,7 @@ func init() { func setPackageFlags(flags *pflag.FlagSet) { // Formatting & Input options ////////////////////////////////////////////// flags.StringP( - "scope", "s", syft.DefaultCatalogingConfig().Scope.String(), + "scope", "s", syft.DefaultCatalogingConfig().DefaultScope.String(), fmt.Sprintf("selection of layers to catalog, options=%v", source.AllScopes)) flags.StringArrayP( @@ -165,7 +165,7 @@ func bindExclusivePackagesConfigOptions(flags *pflag.FlagSet) error { // note: output is not included since this configuration option is shared between multiple subcommands - if err := viper.BindPFlag("package.cataloger.scope", flags.Lookup("scope")); err != nil { + if err := viper.BindPFlag("scope", flags.Lookup("scope")); err != nil { return err } @@ -257,15 +257,9 @@ func isVerbose() (result bool) { return appConfig.CliOptions.Verbosity > 0 || isPipedInput } -func generateSBOM(src *source.Source) (*sbom.SBOM, error) { - catalogingConfig, err := appConfig.ToCatalogingConfig() - if err != nil { - return nil, err - } - +func generateSBOM(src *source.Source, config *syft.CatalogingConfig) (*sbom.SBOM, error) { return syft.Catalog(src, - syft.WithConfig(*catalogingConfig), - syft.WithDefaultPackageCatalogers(appConfig.Package.ToConfig()), + syft.WithConfig(*config), ) } @@ -274,6 +268,12 @@ func packagesExecWorker(si source.Input, writer sbom.Writer) <-chan error { go func() { defer close(errs) + catalogingConfig, err := appConfig.ToCatalogingConfig() + if err != nil { + errs <- err + return + } + src, cleanup, err := source.New(si, appConfig.Registry.ToOptions(), appConfig.Exclusions) if cleanup != nil { defer cleanup() @@ -283,7 +283,7 @@ func packagesExecWorker(si source.Input, writer sbom.Writer) <-chan error { return } - s, err := generateSBOM(src) + s, err := generateSBOM(src, catalogingConfig) if err != nil { errs <- err return diff --git a/cmd/power_user.go b/cmd/power_user.go index b900c41e7..a4a0610f3 100644 --- a/cmd/power_user.go +++ b/cmd/power_user.go @@ -104,10 +104,17 @@ func powerUserExecWorker(userInput string, writer sbom.Writer) <-chan error { go func() { defer close(errs) - appConfig.Secrets.Cataloger.Enabled = true - appConfig.FileMetadata.Cataloger.Enabled = true - appConfig.FileContents.Cataloger.Enabled = true - appConfig.FileClassification.Cataloger.Enabled = true + // TODO: replace + //appConfig.Secrets.Cataloger.Enabled = true + //appConfig.FileMetadata.Cataloger.Enabled = true + //appConfig.FileContents.Cataloger.Enabled = true + //appConfig.FileClassification.Cataloger.Enabled = true + + catalogingConfig, err := appConfig.ToCatalogingConfig() + if err != nil { + errs <- err + return + } si, err := source.ParseInput(userInput, appConfig.Platform, true) if err != nil { @@ -124,7 +131,7 @@ func powerUserExecWorker(userInput string, writer sbom.Writer) <-chan error { defer cleanup() } - s, err := generateSBOM(src) + s, err := generateSBOM(src, catalogingConfig) if err != nil { errs <- err return diff --git a/internal/anchore/import_package_sbom_test.go b/internal/anchore/import_package_sbom_test.go index f2f321844..3310d7e5f 100644 --- a/internal/anchore/import_package_sbom_test.go +++ b/internal/anchore/import_package_sbom_test.go @@ -54,7 +54,7 @@ func (m *mockPackageSBOMImportAPI) ImportImagePackages(ctx context.Context, sess func sbomFixture() sbom.SBOM { return sbom.SBOM{ Artifacts: sbom.Artifacts{ - PackageCatalog: pkg.NewCollection(pkg.Package{ + Packages: pkg.NewCollection(pkg.Package{ Name: "name", Version: "version", FoundBy: "foundBy", diff --git a/internal/config/application.go b/internal/config/application.go index c0e4b1701..ebe424a29 100644 --- a/internal/config/application.go +++ b/internal/config/application.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "github.com/anchore/syft/syft/cataloger/files/fileclassifier" + "github.com/anchore/syft/syft/source" "path" "reflect" "strings" @@ -21,6 +22,7 @@ import ( var ErrApplicationConfigNotFound = fmt.Errorf("application config not found") +// TODO: set all catalogers when this is set var catalogerEnabledDefault = false type defaultValueLoader interface { @@ -33,24 +35,24 @@ type parser interface { // Application is the main syft application configuration. type Application struct { - ConfigPath string `yaml:",omitempty" json:"configPath"` // the location where the application config was read from (either from -c or discovered while loading) - Outputs []string `yaml:"output" json:"output" mapstructure:"output"` // -o, the format to use for output - File string `yaml:"file" json:"file" mapstructure:"file"` // --file, the file to write report output to - Quiet bool `yaml:"quiet" json:"quiet" mapstructure:"quiet"` // -q, indicates to not show any status output to stderr (ETUI or logging UI) - CheckForAppUpdate bool `yaml:"check-for-app-update" json:"check-for-app-update" mapstructure:"check-for-app-update"` // whether to check for an application update on start up or not - Anchore anchore `yaml:"anchore" json:"anchore" mapstructure:"anchore"` // options for interacting with Anchore Engine/Enterprise - CliOptions CliOnlyOptions `yaml:"-" json:"-"` // all options only available through the CLI (not via env vars or config) - Dev development `yaml:"dev" json:"dev" mapstructure:"dev"` - Log logging `yaml:"log" json:"log" mapstructure:"log"` // all logging-related options - Package pkg `yaml:"package" json:"package" mapstructure:"package"` - FileMetadata FileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"` - FileClassification fileClassification `yaml:"file-classification" json:"file-classification" mapstructure:"file-classification"` - FileContents fileContents `yaml:"file-contents" json:"file-contents" mapstructure:"file-contents"` - Secrets secretsCfg `yaml:"secrets" json:"secrets" mapstructure:"secrets"` - Registry registry `yaml:"registry" json:"registry" mapstructure:"registry"` - Exclusions []string `yaml:"exclude" json:"exclude" mapstructure:"exclude"` - Attest attest `yaml:"attest" json:"attest" mapstructure:"attest"` - Platform string `yaml:"platform" json:"platform" mapstructure:"platform"` + ConfigPath string `yaml:",omitempty" json:"configPath"` // the location where the application config was read from (either from -c or discovered while loading) + Outputs []string `yaml:"output" json:"output" mapstructure:"output"` // -o, the format to use for output + File string `yaml:"file" json:"file" mapstructure:"file"` // --file, the file to write report output to + Quiet bool `yaml:"quiet" json:"quiet" mapstructure:"quiet"` // -q, indicates to not show any status output to stderr (ETUI or logging UI) + CheckForAppUpdate bool `yaml:"check-for-app-update" json:"check-for-app-update" mapstructure:"check-for-app-update"` // whether to check for an application update on start up or not + Anchore anchore `yaml:"anchore" json:"anchore" mapstructure:"anchore"` // options for interacting with Anchore Engine/Enterprise + CliOptions CliOnlyOptions `yaml:"-" json:"-"` // all options only available through the CLI (not via env vars or config) + Scope string `yaml:"scope" json:"scope" mapstructure:"scope"` + Dev development `yaml:"dev" json:"dev" mapstructure:"dev"` + Log logging `yaml:"log" json:"log" mapstructure:"log"` // all logging-related options + Package pkg `yaml:"package" json:"package" mapstructure:"package"` + FileMetadata fileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"` + FileContents fileContents `yaml:"file-contents" json:"file-contents" mapstructure:"file-contents"` + Secrets secretsCfg `yaml:"secrets" json:"secrets" mapstructure:"secrets"` + Registry registry `yaml:"registry" json:"registry" mapstructure:"registry"` + Exclusions []string `yaml:"exclude" json:"exclude" mapstructure:"exclude"` + Attest attest `yaml:"attest" json:"attest" mapstructure:"attest"` + Platform string `yaml:"platform" json:"platform" mapstructure:"platform"` } func (cfg Application) ToCatalogingConfig() (*syft.CatalogingConfig, error) { @@ -59,26 +61,33 @@ func (cfg Application) ToCatalogingConfig() (*syft.CatalogingConfig, error) { return nil, fmt.Errorf("unable to parse config item 'file-metadata.digests': %w", err) } + scopeOption := source.ParseScope(cfg.Scope) + if scopeOption == source.UnknownScope { + return nil, fmt.Errorf("bad scope value %q", cfg.Scope) + } + secretsConfig, err := cfg.Secrets.ToConfig() if err != nil { return nil, err } + secretsScopeOption := source.ParseScope(cfg.Secrets.Scope) + if secretsScopeOption == source.UnknownScope { + return nil, fmt.Errorf("bad scope value %q", cfg.Secrets.Scope) + } + return &syft.CatalogingConfig{ // note: package catalogers cannot be determined until runtime ToolName: internal.ApplicationName, ToolVersion: version.FromBuild().Version, ToolConfiguration: cfg, - Scope: cfg.Package.Cataloger.ScopeOpt, + DefaultScope: scopeOption, ProcessTasksInSerial: false, - CaptureFileMetadata: cfg.FileMetadata.Cataloger.Enabled, DigestHashes: digests, - CaptureSecrets: cfg.Secrets.Cataloger.Enabled, - SecretsConfig: *secretsConfig, - SecretsScope: cfg.Secrets.Cataloger.ScopeOpt, - ClassifyFiles: cfg.FileClassification.Cataloger.Enabled, + SecretsSearch: *secretsConfig, + SecretsScope: secretsScopeOption, FileClassifiers: fileclassifier.DefaultClassifiers(), - ContentsConfig: cfg.FileContents.ToConfig(), + ContentsSearch: cfg.FileContents.ToConfig(), }, nil } diff --git a/internal/config/cataloger_options.go b/internal/config/cataloger_options.go deleted file mode 100644 index 1fb2992b4..000000000 --- a/internal/config/cataloger_options.go +++ /dev/null @@ -1,29 +0,0 @@ -package config - -import ( - "fmt" - - "github.com/spf13/viper" - - "github.com/anchore/syft/syft/source" -) - -type catalogerOptions struct { - Enabled bool `yaml:"enabled" json:"enabled" mapstructure:"enabled"` - Scope string `yaml:"scope" json:"scope" mapstructure:"scope"` - ScopeOpt source.Scope `yaml:"-" json:"-"` -} - -func (cfg catalogerOptions) loadDefaultValues(v *viper.Viper) { - v.SetDefault("package.cataloger.enabled", true) -} - -func (cfg *catalogerOptions) parseConfigValues() error { - scopeOption := source.ParseScope(cfg.Scope) - if scopeOption == source.UnknownScope { - return fmt.Errorf("bad scope value %q", cfg.Scope) - } - cfg.ScopeOpt = scopeOption - - return nil -} diff --git a/internal/config/file_classification.go b/internal/config/file_classification.go deleted file mode 100644 index f4ba63018..000000000 --- a/internal/config/file_classification.go +++ /dev/null @@ -1,19 +0,0 @@ -package config - -import ( - "github.com/anchore/syft/syft/source" - "github.com/spf13/viper" -) - -type fileClassification struct { - Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"` -} - -func (cfg fileClassification) loadDefaultValues(v *viper.Viper) { - v.SetDefault("file-classification.cataloger.enabled", catalogerEnabledDefault) - v.SetDefault("file-classification.cataloger.scope", source.SquashedScope) -} - -func (cfg *fileClassification) parseConfigValues() error { - return cfg.Cataloger.parseConfigValues() -} diff --git a/internal/config/file_contents.go b/internal/config/file_contents.go index 55e66ade0..c31031591 100644 --- a/internal/config/file_contents.go +++ b/internal/config/file_contents.go @@ -3,29 +3,21 @@ package config import ( "github.com/anchore/syft/syft/cataloger/files/filecontents" "github.com/anchore/syft/syft/file" - "github.com/anchore/syft/syft/source" "github.com/spf13/viper" ) type fileContents struct { - Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"` - SkipFilesAboveSize int64 `yaml:"skip-files-above-size" json:"skip-files-above-size" mapstructure:"skip-files-above-size"` - Globs []string `yaml:"globs" json:"globs" mapstructure:"globs"` + SkipFilesAboveSize int64 `yaml:"skip-files-above-size" json:"skip-files-above-size" mapstructure:"skip-files-above-size"` + Globs []string `yaml:"globs" json:"globs" mapstructure:"globs"` } func (cfg fileContents) loadDefaultValues(v *viper.Viper) { - v.SetDefault("file-contents.cataloger.enabled", catalogerEnabledDefault) - v.SetDefault("file-contents.cataloger.scope", source.SquashedScope) v.SetDefault("file-contents.skip-files-above-size", 1*file.MB) v.SetDefault("file-contents.globs", []string{}) } -func (cfg *fileContents) parseConfigValues() error { - return cfg.Cataloger.parseConfigValues() -} - -func (cfg fileContents) ToConfig() filecontents.CatalogerConfig { - return filecontents.CatalogerConfig{ +func (cfg fileContents) ToConfig() filecontents.Config { + return filecontents.Config{ Globs: cfg.Globs, SkipFilesAboveSizeInBytes: cfg.SkipFilesAboveSize, } diff --git a/internal/config/file_metadata.go b/internal/config/file_metadata.go index 764b9b392..fd1938722 100644 --- a/internal/config/file_metadata.go +++ b/internal/config/file_metadata.go @@ -1,21 +1,17 @@ package config import ( - "github.com/anchore/syft/syft/source" "github.com/spf13/viper" ) -type FileMetadata struct { - Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"` - Digests []string `yaml:"digests" json:"digests" mapstructure:"digests"` +type fileMetadata struct { + Digests []string `yaml:"digests" json:"digests" mapstructure:"digests"` } -func (cfg FileMetadata) loadDefaultValues(v *viper.Viper) { - v.SetDefault("file-metadata.cataloger.enabled", catalogerEnabledDefault) - v.SetDefault("file-metadata.cataloger.scope", source.SquashedScope) +func (cfg fileMetadata) loadDefaultValues(v *viper.Viper) { v.SetDefault("file-metadata.digests", []string{"sha256"}) } -func (cfg *FileMetadata) parseConfigValues() error { - return cfg.Cataloger.parseConfigValues() +func (cfg *fileMetadata) parseConfigValues() error { + return nil } diff --git a/internal/config/pkg.go b/internal/config/pkg.go index d4e0b98d1..75355b104 100644 --- a/internal/config/pkg.go +++ b/internal/config/pkg.go @@ -6,22 +6,16 @@ import ( ) type pkg struct { - Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"` - SearchUnindexedArchives bool `yaml:"search-unindexed-archives" json:"search-unindexed-archives" mapstructure:"search-unindexed-archives"` - SearchIndexedArchives bool `yaml:"search-indexed-archives" json:"search-indexed-archives" mapstructure:"search-indexed-archives"` + SearchUnindexedArchives bool `yaml:"search-unindexed-archives" json:"search-unindexed-archives" mapstructure:"search-unindexed-archives"` + SearchIndexedArchives bool `yaml:"search-indexed-archives" json:"search-indexed-archives" mapstructure:"search-indexed-archives"` } func (cfg pkg) loadDefaultValues(v *viper.Viper) { - cfg.Cataloger.loadDefaultValues(v) c := packages.DefaultSearchConfig() v.SetDefault("package.search-unindexed-archives", c.IncludeUnindexedArchives) v.SetDefault("package.search-indexed-archives", c.IncludeIndexedArchives) } -func (cfg *pkg) parseConfigValues() error { - return cfg.Cataloger.parseConfigValues() -} - func (cfg pkg) ToConfig() packages.SearchConfig { return packages.SearchConfig{ IncludeIndexedArchives: cfg.SearchIndexedArchives, diff --git a/internal/config/secrets.go b/internal/config/secrets.go index 3c916b5e0..f2c8115ad 100644 --- a/internal/config/secrets.go +++ b/internal/config/secrets.go @@ -10,32 +10,27 @@ import ( ) type secretsCfg struct { - Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"` AdditionalPatterns map[string]string `yaml:"additional-patterns" json:"additional-patterns" mapstructure:"additional-patterns"` ExcludePatternNames []string `yaml:"exclude-pattern-names" json:"exclude-pattern-names" mapstructure:"exclude-pattern-names"` RevealValues bool `yaml:"reveal-values" json:"reveal-values" mapstructure:"reveal-values"` SkipFilesAboveSize int64 `yaml:"skip-files-above-size" json:"skip-files-above-size" mapstructure:"skip-files-above-size"` + Scope string `yaml:"scope" json:"scope" mapstructure:"scope"` } func (cfg secretsCfg) loadDefaultValues(v *viper.Viper) { - v.SetDefault("secrets.cataloger.enabled", catalogerEnabledDefault) - v.SetDefault("secrets.cataloger.scope", source.AllLayersScope) + v.SetDefault("secrets.scope", source.AllLayersScope) v.SetDefault("secrets.reveal-values", false) v.SetDefault("secrets.skip-files-above-size", 1*file.MB) v.SetDefault("secrets.additional-patterns", map[string]string{}) v.SetDefault("secrets.exclude-pattern-names", []string{}) } -func (cfg *secretsCfg) parseConfigValues() error { - return cfg.Cataloger.parseConfigValues() -} - -func (cfg secretsCfg) ToConfig() (*secrets.CatalogerConfig, error) { +func (cfg secretsCfg) ToConfig() (*secrets.Config, error) { patterns, err := file.GenerateSearchPatterns(secrets.DefaultSecretsPatterns, cfg.AdditionalPatterns, cfg.ExcludePatternNames) if err != nil { return nil, fmt.Errorf("unable to process secrets config patterns: %w", err) } - return &secrets.CatalogerConfig{ + return &secrets.Config{ Patterns: patterns, RevealValues: cfg.RevealValues, MaxFileSize: cfg.SkipFilesAboveSize, diff --git a/internal/formats/common/cyclonedxhelpers/decoder.go b/internal/formats/common/cyclonedxhelpers/decoder.go index 264c96b73..bfef8a5af 100644 --- a/internal/formats/common/cyclonedxhelpers/decoder.go +++ b/internal/formats/common/cyclonedxhelpers/decoder.go @@ -49,7 +49,7 @@ func toSyftModel(bom *cyclonedx.BOM) (*sbom.SBOM, error) { } s := &sbom.SBOM{ Artifacts: sbom.Artifacts{ - PackageCatalog: pkg.NewCollection(), + Packages: pkg.NewCollection(), LinuxDistribution: linuxReleaseFromComponents(*bom.Components), }, Source: meta, @@ -86,7 +86,7 @@ func collectPackages(component *cyclonedx.Component, s *sbom.SBOM, idMap map[str idMap[component.BOMRef] = p // TODO there must be a better way than needing to call this manually: p.SetID() - s.Artifacts.PackageCatalog.Add(*p) + s.Artifacts.Packages.Add(*p) } if component.Components != nil { diff --git a/internal/formats/common/cyclonedxhelpers/decoder_test.go b/internal/formats/common/cyclonedxhelpers/decoder_test.go index e18e89f0a..e99d4dec3 100644 --- a/internal/formats/common/cyclonedxhelpers/decoder_test.go +++ b/internal/formats/common/cyclonedxhelpers/decoder_test.go @@ -212,7 +212,7 @@ func Test_decode(t *testing.T) { assert.Equal(t, e.ver, sbom.Artifacts.LinuxDistribution.VersionID) } if e.pkg != "" { - for p := range sbom.Artifacts.PackageCatalog.Enumerate() { + for p := range sbom.Artifacts.Packages.Enumerate() { if e.pkg != p.Name { continue } @@ -240,7 +240,7 @@ func Test_decode(t *testing.T) { if e.relation != "" { foundRelation := false for _, r := range sbom.Relationships { - p := sbom.Artifacts.PackageCatalog.Package(r.To.ID()) + p := sbom.Artifacts.Packages.Package(r.To.ID()) if e.relation == p.Name { foundRelation = true break diff --git a/internal/formats/common/cyclonedxhelpers/format.go b/internal/formats/common/cyclonedxhelpers/format.go index ded8be88d..660b79b9e 100644 --- a/internal/formats/common/cyclonedxhelpers/format.go +++ b/internal/formats/common/cyclonedxhelpers/format.go @@ -25,7 +25,7 @@ func ToFormatModel(s sbom.SBOM) *cyclonedx.BOM { cdxBOM.SerialNumber = uuid.New().URN() cdxBOM.Metadata = toBomDescriptor(internal.ApplicationName, versionInfo.Version, s.Source) - packages := s.Artifacts.PackageCatalog.Sorted() + packages := s.Artifacts.Packages.Sorted() components := make([]cyclonedx.Component, len(packages)) for i, p := range packages { components[i] = encodeComponent(p) diff --git a/internal/formats/common/spdxhelpers/to_syft_model.go b/internal/formats/common/spdxhelpers/to_syft_model.go index 9c7b843a0..eceb98423 100644 --- a/internal/formats/common/spdxhelpers/to_syft_model.go +++ b/internal/formats/common/spdxhelpers/to_syft_model.go @@ -21,7 +21,7 @@ func ToSyftModel(doc *spdx.Document2_2) (*sbom.SBOM, error) { s := &sbom.SBOM{ Artifacts: sbom.Artifacts{ - PackageCatalog: pkg.NewCollection(), + Packages: pkg.NewCollection(), FileMetadata: map[file.Coordinates]file.Metadata{}, FileDigests: map[file.Coordinates][]file.Digest{}, LinuxDistribution: findLinuxReleaseByPURL(doc), @@ -74,7 +74,7 @@ func collectSyftPackages(s *sbom.SBOM, spdxIDMap map[string]interface{}, doc *sp for _, p := range doc.Packages { syftPkg := toSyftPackage(p) spdxIDMap[string(p.PackageSPDXIdentifier)] = syftPkg - s.Artifacts.PackageCatalog.Add(*syftPkg) + s.Artifacts.Packages.Add(*syftPkg) } } diff --git a/internal/formats/common/spdxhelpers/to_syft_model_test.go b/internal/formats/common/spdxhelpers/to_syft_model_test.go index 9864c5bef..efe805d2a 100644 --- a/internal/formats/common/spdxhelpers/to_syft_model_test.go +++ b/internal/formats/common/spdxhelpers/to_syft_model_test.go @@ -94,7 +94,7 @@ func TestToSyftModel(t *testing.T) { assert.NotNil(t, sbom) - pkgs := sbom.Artifacts.PackageCatalog.Sorted() + pkgs := sbom.Artifacts.Packages.Sorted() assert.Len(t, pkgs, 2) diff --git a/internal/formats/common/testutils/utils.go b/internal/formats/common/testutils/utils.go index b2cb6edc0..f416e96b7 100644 --- a/internal/formats/common/testutils/utils.go +++ b/internal/formats/common/testutils/utils.go @@ -124,7 +124,7 @@ func ImageInput(t testing.TB, testImage string, options ...ImageOption) sbom.SBO return sbom.SBOM{ Artifacts: sbom.Artifacts{ - PackageCatalog: catalog, + Packages: catalog, LinuxDistribution: &linux.Release{ PrettyName: "debian", Name: "debian", @@ -152,7 +152,7 @@ func carriageRedactor(s []byte) []byte { return []byte(msg) } -func populateImageCatalog(catalog *pkg.Collection, img *image.Image) { +func populateImageCatalog(catalog pkg.Collection, img *image.Image) { _, ref1, _ := img.SquashedTree().File("/somefile-1.txt", filetree.FollowBasenameLinks) _, ref2, _ := img.SquashedTree().File("/somefile-2.txt", filetree.FollowBasenameLinks) @@ -205,7 +205,7 @@ func DirectoryInput(t testing.TB) sbom.SBOM { return sbom.SBOM{ Artifacts: sbom.Artifacts{ - PackageCatalog: catalog, + Packages: catalog, LinuxDistribution: &linux.Release{ PrettyName: "debian", Name: "debian", @@ -228,7 +228,7 @@ func DirectoryInput(t testing.TB) sbom.SBOM { } } -func newDirectoryCatalog() *pkg.Collection { +func newDirectoryCatalog() pkg.Collection { catalog := pkg.NewCollection() // populate catalog with test data diff --git a/internal/formats/cyclonedxjson/decoder_test.go b/internal/formats/cyclonedxjson/decoder_test.go index e561ff137..f969732a1 100644 --- a/internal/formats/cyclonedxjson/decoder_test.go +++ b/internal/formats/cyclonedxjson/decoder_test.go @@ -57,7 +57,7 @@ func Test_decodeJSON(t *testing.T) { split = strings.SplitN(pkg, ":", 2) name = split[0] version = split[1] - for p := range bom.Artifacts.PackageCatalog.Enumerate() { + for p := range bom.Artifacts.Packages.Enumerate() { if p.Name == name { assert.Equal(t, version, p.Version) continue pkgs diff --git a/internal/formats/cyclonedxxml/decoder_test.go b/internal/formats/cyclonedxxml/decoder_test.go index 7a6643339..ca0622abc 100644 --- a/internal/formats/cyclonedxxml/decoder_test.go +++ b/internal/formats/cyclonedxxml/decoder_test.go @@ -57,7 +57,7 @@ func Test_decodeXML(t *testing.T) { split = strings.SplitN(pkg, ":", 2) name = split[0] version = split[1] - for p := range bom.Artifacts.PackageCatalog.Enumerate() { + for p := range bom.Artifacts.Packages.Enumerate() { if p.Name == name { assert.Equal(t, version, p.Version) continue pkgs diff --git a/internal/formats/spdx22json/decoder_test.go b/internal/formats/spdx22json/decoder_test.go index 6f718dc4d..bac074b43 100644 --- a/internal/formats/spdx22json/decoder_test.go +++ b/internal/formats/spdx22json/decoder_test.go @@ -71,11 +71,11 @@ func TestSPDXJSONDecoder(t *testing.T) { } if test.packages != nil { - assert.Equal(t, sbom.Artifacts.PackageCatalog.PackageCount(), len(test.packages)) + assert.Equal(t, sbom.Artifacts.Packages.Size(), len(test.packages)) packages: for _, pkgName := range test.packages { - for _, p := range sbom.Artifacts.PackageCatalog.Sorted() { + for _, p := range sbom.Artifacts.Packages.Sorted() { if p.Name == pkgName { continue packages } diff --git a/internal/formats/spdx22json/to_format_model.go b/internal/formats/spdx22json/to_format_model.go index dda31f293..bdae56f55 100644 --- a/internal/formats/spdx22json/to_format_model.go +++ b/internal/formats/spdx22json/to_format_model.go @@ -42,13 +42,13 @@ func toFormatModel(s sbom.SBOM) (*model.Document, error) { }, DataLicense: "CC0-1.0", DocumentNamespace: namespace, - Packages: toPackages(s.Artifacts.PackageCatalog, s.Relationships), + Packages: toPackages(s.Artifacts.Packages, s.Relationships), Files: toFiles(s), Relationships: toRelationships(s.Relationships), }, nil } -func toPackages(catalog *pkg.Collection, relationships []artifact.Relationship) []model.Package { +func toPackages(catalog pkg.Collection, relationships []artifact.Relationship) []model.Package { packages := make([]model.Package, 0) for _, p := range catalog.Sorted() { diff --git a/internal/formats/spdx22tagvalue/to_format_model.go b/internal/formats/spdx22tagvalue/to_format_model.go index ed11ce7b6..e0cb530d3 100644 --- a/internal/formats/spdx22tagvalue/to_format_model.go +++ b/internal/formats/spdx22tagvalue/to_format_model.go @@ -85,13 +85,13 @@ func toFormatModel(s sbom.SBOM) (*spdx.Document2_2, error) { // Cardinality: optional, one DocumentComment: "", }, - Packages: toFormatPackages(s.Artifacts.PackageCatalog), + Packages: toFormatPackages(s.Artifacts.Packages), }, nil } // packages populates all Package Information from the package Collection (see https://spdx.github.io/spdx-spec/3-package-information/) // nolint: funlen -func toFormatPackages(catalog *pkg.Collection) map[spdx.ElementID]*spdx.Package2_2 { +func toFormatPackages(catalog pkg.Collection) map[spdx.ElementID]*spdx.Package2_2 { results := make(map[spdx.ElementID]*spdx.Package2_2) for _, p := range catalog.Sorted() { diff --git a/internal/formats/syftjson/decoder_test.go b/internal/formats/syftjson/decoder_test.go index 3dc069fc2..d1becec6b 100644 --- a/internal/formats/syftjson/decoder_test.go +++ b/internal/formats/syftjson/decoder_test.go @@ -28,8 +28,8 @@ func TestEncodeDecodeCycle(t *testing.T) { t.Errorf("metadata difference: %+v", d) } - actualPackages := actualSBOM.Artifacts.PackageCatalog.Sorted() - for idx, p := range originalSBOM.Artifacts.PackageCatalog.Sorted() { + actualPackages := actualSBOM.Artifacts.Packages.Sorted() + for idx, p := range originalSBOM.Artifacts.Packages.Sorted() { if !assert.Equal(t, p.Name, actualPackages[idx].Name) { t.Errorf("different package at idx=%d: %s vs %s", idx, p.Name, actualPackages[idx].Name) continue diff --git a/internal/formats/syftjson/encoder_test.go b/internal/formats/syftjson/encoder_test.go index dd869e310..9a3babf05 100644 --- a/internal/formats/syftjson/encoder_test.go +++ b/internal/formats/syftjson/encoder_test.go @@ -95,7 +95,7 @@ func TestEncodeFullJSONDocument(t *testing.T) { s := sbom.SBOM{ Artifacts: sbom.Artifacts{ - PackageCatalog: catalog, + Packages: catalog, FileMetadata: map[file.Coordinates]file.Metadata{ file.NewLocation("/a/place").Coordinates: { Mode: 0775, diff --git a/internal/formats/syftjson/to_format_model.go b/internal/formats/syftjson/to_format_model.go index 9002a4aeb..b9971ea68 100644 --- a/internal/formats/syftjson/to_format_model.go +++ b/internal/formats/syftjson/to_format_model.go @@ -31,7 +31,7 @@ func ToFormatModel(s sbom.SBOM) model.Document { } return model.Document{ - Artifacts: toPackageModels(s.Artifacts.PackageCatalog), + Artifacts: toPackageModels(s.Artifacts.Packages), ArtifactRelationships: toRelationshipModel(s.Relationships), Files: toFile(s), Secrets: toSecrets(s.Artifacts.Secrets), @@ -153,7 +153,7 @@ func toFileMetadataEntry(coordinates file.Coordinates, metadata *file.Metadata) } } -func toPackageModels(catalog *pkg.Collection) []model.Package { +func toPackageModels(catalog pkg.Collection) []model.Package { artifacts := make([]model.Package, 0) if catalog == nil { return artifacts diff --git a/internal/formats/syftjson/to_syft_model.go b/internal/formats/syftjson/to_syft_model.go index a9d4c361a..e297d2026 100644 --- a/internal/formats/syftjson/to_syft_model.go +++ b/internal/formats/syftjson/to_syft_model.go @@ -18,7 +18,7 @@ func toSyftModel(doc model.Document) (*sbom.SBOM, error) { return &sbom.SBOM{ Artifacts: sbom.Artifacts{ - PackageCatalog: catalog, + Packages: catalog, LinuxDistribution: toSyftLinuxRelease(doc.Distro), }, Source: *toSyftSourceData(doc.Source), @@ -48,7 +48,7 @@ func toSyftLinuxRelease(d model.LinuxRelease) *linux.Release { } } -func toSyftRelationships(doc *model.Document, catalog *pkg.Collection, relationships []model.Relationship) []artifact.Relationship { +func toSyftRelationships(doc *model.Document, catalog pkg.Collection, relationships []model.Relationship) []artifact.Relationship { idMap := make(map[string]interface{}) for _, p := range catalog.Sorted() { @@ -130,7 +130,7 @@ func toSyftSourceData(s model.Source) *source.Metadata { return nil } -func toSyftCatalog(pkgs []model.Package) *pkg.Collection { +func toSyftCatalog(pkgs []model.Package) pkg.Collection { catalog := pkg.NewCollection() for _, p := range pkgs { catalog.Add(toSyftPackage(p)) diff --git a/internal/formats/table/encoder.go b/internal/formats/table/encoder.go index e651674c5..81cf5b60a 100644 --- a/internal/formats/table/encoder.go +++ b/internal/formats/table/encoder.go @@ -15,7 +15,7 @@ func encoder(output io.Writer, s sbom.SBOM) error { var rows [][]string columns := []string{"Name", "Version", "Type"} - for _, p := range s.Artifacts.PackageCatalog.Sorted() { + for _, p := range s.Artifacts.Packages.Sorted() { row := []string{ p.Name, p.Version, diff --git a/internal/formats/text/encoder.go b/internal/formats/text/encoder.go index 33c1cefa8..6f6ef9b8d 100644 --- a/internal/formats/text/encoder.go +++ b/internal/formats/text/encoder.go @@ -35,7 +35,7 @@ func encoder(output io.Writer, s sbom.SBOM) error { // populate artifacts... rows := 0 - for _, p := range s.Artifacts.PackageCatalog.Sorted() { + for _, p := range s.Artifacts.Packages.Sorted() { fmt.Fprintf(w, "[%s]\n", p.Name) fmt.Fprintln(w, " Version:\t", p.Version) fmt.Fprintln(w, " Type:\t", string(p.Type)) diff --git a/syft/catalog.go b/syft/catalog.go index c9f568da8..4a0c8ad50 100644 --- a/syft/catalog.go +++ b/syft/catalog.go @@ -2,13 +2,28 @@ package syft import ( "fmt" - + "github.com/anchore/syft/internal/bus" "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/event" + "github.com/anchore/syft/syft/event/monitor" + "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/source" "github.com/hashicorp/go-multierror" + "github.com/wagoodman/go-partybus" + "github.com/wagoodman/go-progress" ) +type monitorableCollection struct { + pkg.Collection + monitor *progress.Manual +} + +func (m *monitorableCollection) Add(p pkg.Package) { + m.monitor.N++ + m.Collection.Add(p) +} + func Catalog(src *source.Source, options ...CatalogingOption) (*sbom.SBOM, error) { var config = DefaultCatalogingConfig() for _, optFn := range options { @@ -17,28 +32,60 @@ func Catalog(src *source.Source, options ...CatalogingOption) (*sbom.SBOM, error } } - var tasks []task - - generators := []taskGenerator{ - generatePackagesCatalogingTask, - generateFileMetadataCatalogingTask, - generateFileDigestsCatalogingTask, - generateSecretsCatalogingTask, - generateFileClassifierTask, - generateContentsCatalogingTask, + if config.availableTasks == nil { + config.availableTasks = newTaskCollection() } - for _, generator := range generators { - t, err := generator(config) - if err != nil { - return nil, fmt.Errorf("unable to create cataloging task: %w", err) - } + tc := config.availableTasks + if err := tc.addAllCatalogers(config); err != nil { + return nil, fmt.Errorf("unable to register catalogers: %w", err) + } - if t != nil { - tasks = append(tasks, t) + var catalogingTasks []task + + if len(config.EnabledCatalogers) == 0 { + switch src.Metadata.Scheme { + case source.ImageType: + catalogingTasks = tc.tasks(tc.withLabels(packageTaskLabel, installedTaskLabel)...) + case source.FileType: + catalogingTasks = tc.tasks(tc.all()...) + case source.DirectoryType: + // TODO: it looks like gemspec was left out on main, is this intentional? if so it's not accounted for here... + catalogingTasks = tc.tasks(tc.withLabels(packageTaskLabel)...) } } + if len(catalogingTasks) == 0 { + return nil, fmt.Errorf("no cataloging tasks configured to run") + } + + // special case: we need to identify the linux distro for downstream processing + identifyLinuxDistroTask, err := newIdentifyDistroTask(config) + if err != nil { + return nil, fmt.Errorf("unable to create linux distro identification task: %+v", err) + } + + synthesizePackageRelationshipsTask, err := newSynthesizePackageRelationshipsTasks(config) + if err != nil { + return nil, fmt.Errorf("unable to create task to synthesize package relationships: %+v", err) + } + + taskGroups := [][]task{ + { + identifyLinuxDistroTask, + }, + catalogingTasks, + { + synthesizePackageRelationshipsTask, + }, + } + + files, pkgs := newCatalogerMonitor() + defer func() { + files.SetCompleted() // TODO: files monitor is unused... should we remove? + pkgs.SetCompleted() + }() + s := sbom.SBOM{ Source: src.Metadata, Descriptor: sbom.Descriptor{ @@ -46,12 +93,39 @@ func Catalog(src *source.Source, options ...CatalogingOption) (*sbom.SBOM, error Version: config.ToolVersion, Configuration: config.ToolConfiguration, }, + Artifacts: sbom.Artifacts{ + Packages: &monitorableCollection{ + Collection: pkg.NewCollection(), + monitor: pkgs, + }, + }, } - return &s, runTasks(&s, src, tasks, config.ProcessTasksInSerial) + for _, tasks := range taskGroups { + if err := runTasks(&s, src, config.ProcessTasksInSerial, tasks...); err != nil { + return &s, err + } + } + + return &s, nil } -func runTasks(s *sbom.SBOM, src *source.Source, tasks []task, serial bool) error { +// newCatalogerMonitor creates a new CatalogingMonitor object and publishes the object on the bus as a CatalogingStarted event. +func newCatalogerMonitor() (*progress.Manual, *progress.Manual) { + filesProcessed := progress.Manual{} + packagesDiscovered := progress.Manual{} + + bus.Publish(partybus.Event{ + Type: event.CatalogingStarted, + Value: monitor.CatalogingMonitor{ + FilesProcessed: progress.Monitorable(&filesProcessed), + PackagesDiscovered: progress.Monitorable(&packagesDiscovered), + }, + }) + return &filesProcessed, &packagesDiscovered +} + +func runTasks(s *sbom.SBOM, src *source.Source, serial bool, tasks ...task) error { var relationships []<-chan artifact.Relationship var errs = make(chan error) for _, t := range tasks { @@ -92,7 +166,7 @@ func mergeErrors(errs <-chan error) (allErrs error) { func runTask(t task, a *sbom.Artifacts, src *source.Source, r chan<- artifact.Relationship, errs chan<- error) { defer close(r) - relationships, err := t(a, src) + relationships, err := t.Run(a, src) if err != nil { errs <- err return diff --git a/syft/cataloger/files/filecontents/cataloger.go b/syft/cataloger/files/filecontents/cataloger.go index 5357213d6..9bdb7b353 100644 --- a/syft/cataloger/files/filecontents/cataloger.go +++ b/syft/cataloger/files/filecontents/cataloger.go @@ -11,23 +11,23 @@ import ( "github.com/anchore/syft/syft/file" ) -type CatalogerConfig struct { +type Config struct { Globs []string SkipFilesAboveSizeInBytes int64 } type Cataloger struct { - config CatalogerConfig + config Config } -func DefaultCatalogerConfig() CatalogerConfig { - return CatalogerConfig{ +func DefaultConfig() Config { + return Config{ Globs: nil, SkipFilesAboveSizeInBytes: 1 * file.MB, } } -func NewCataloger(config CatalogerConfig) (*Cataloger, error) { +func NewCataloger(config Config) (*Cataloger, error) { return &Cataloger{ config: config, }, nil diff --git a/syft/cataloger/files/filecontents/cataloger_test.go b/syft/cataloger/files/filecontents/cataloger_test.go index 3cd51ac3d..e498c7995 100644 --- a/syft/cataloger/files/filecontents/cataloger_test.go +++ b/syft/cataloger/files/filecontents/cataloger_test.go @@ -66,7 +66,7 @@ func TestContentsCataloger(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - c, err := NewCataloger(CatalogerConfig{ + c, err := NewCataloger(Config{ Globs: test.globs, SkipFilesAboveSizeInBytes: test.maxSize, }) diff --git a/syft/cataloger/files/secrets/cataloger.go b/syft/cataloger/files/secrets/cataloger.go index 2e5dcef86..ff3879d40 100644 --- a/syft/cataloger/files/secrets/cataloger.go +++ b/syft/cataloger/files/secrets/cataloger.go @@ -27,30 +27,30 @@ var DefaultSecretsPatterns = map[string]string{ "generic-api-key": `(?i)api(-|_)?key["'=:\s]*?(?P[A-Z0-9]{20,60})["']?(\s|$)`, } -type CatalogerConfig struct { +type Config struct { Patterns map[string]*regexp.Regexp RevealValues bool MaxFileSize int64 } type Cataloger struct { - config CatalogerConfig + config Config } -func DefaultCatalogerConfig() CatalogerConfig { +func DefaultConfig() Config { patterns, err := file.GenerateSearchPatterns(DefaultSecretsPatterns, nil, nil) if err != nil { patterns = make(map[string]*regexp.Regexp) log.Errorf("unable to create default secrets config: %w", err) } - return CatalogerConfig{ + return Config{ Patterns: patterns, RevealValues: false, MaxFileSize: 1 * file.MB, } } -func NewCataloger(config CatalogerConfig) (*Cataloger, error) { +func NewCataloger(config Config) (*Cataloger, error) { return &Cataloger{ config: config, }, nil diff --git a/syft/cataloger/files/secrets/cataloger_test.go b/syft/cataloger/files/secrets/cataloger_test.go index a72a7d74a..e4f3307fb 100644 --- a/syft/cataloger/files/secrets/cataloger_test.go +++ b/syft/cataloger/files/secrets/cataloger_test.go @@ -174,7 +174,7 @@ func TestSecretsCataloger(t *testing.T) { regexObjs[name] = obj } - c, err := NewCataloger(CatalogerConfig{ + c, err := NewCataloger(Config{ Patterns: regexObjs, RevealValues: test.reveal, MaxFileSize: test.maxSize, @@ -420,7 +420,7 @@ j4f668YfhUbKdRF6S6734856 for _, test := range tests { t.Run(test.fixture, func(t *testing.T) { - c, err := NewCataloger(CatalogerConfig{ + c, err := NewCataloger(Config{ Patterns: regexObjs, RevealValues: true, MaxFileSize: 10 * file.MB, diff --git a/syft/cataloger/id.go b/syft/cataloger/id.go new file mode 100644 index 000000000..7da59f434 --- /dev/null +++ b/syft/cataloger/id.go @@ -0,0 +1,40 @@ +package cataloger + +const ( + ApkDBID ID = "os-apkdb" + DpkgID ID = "os-dpkg" + RpmDBID ID = "os-rpmdb" + RubyGemspecID ID = "ruby-gem-spec" + RubyGemfileLockID ID = "ruby-gem-file-lock" + PythonPackageID ID = "python-package" + PythonRequirementsID ID = "python-requirements" + PythonPoetryID ID = "python-poetry" + PythonSetupID ID = "python-setup" + PythonPipFileID ID = "python-pipfile" + JavascriptPackageJSONID ID = "javascript-package-json" + JavascriptPackageLockID ID = "javascript-package-lock" + JavaScriptYarnLockID ID = "javascript-yarn-lock" + JavaArchiveID ID = "java-archive" + GoModID ID = "go-mod" + GoBinaryID ID = "go-binary" + RustCargoLockID ID = "rust-cargo-lock" + PHPInstalledJSONID ID = "php-installed-json" + PHPComposerLockID ID = "php-composer-lock" + + FileMetadataID ID = "file-metadata" + FileDigestsID ID = "file-digest" + SecretsID ID = "secrets" + FileClassifierID ID = "file-classifier" + FileContentsID ID = "file-content" +) + +type ID string +type IDs []ID + +func (c IDs) Len() int { return len(c) } + +func (c IDs) Swap(i, j int) { c[i], c[j] = c[j], c[i] } + +func (c IDs) Less(i, j int) bool { + return c[i] < c[j] +} diff --git a/syft/cataloger/packages/catalog.go b/syft/cataloger/packages/catalog.go deleted file mode 100644 index 56860fec7..000000000 --- a/syft/cataloger/packages/catalog.go +++ /dev/null @@ -1,126 +0,0 @@ -package packages - -import ( - "fmt" - "github.com/anchore/syft/syft/pkg" - - "github.com/anchore/syft/internal/bus" - "github.com/anchore/syft/internal/log" - "github.com/anchore/syft/syft/artifact" - "github.com/anchore/syft/syft/event" - "github.com/anchore/syft/syft/event/monitor" - "github.com/anchore/syft/syft/file" - "github.com/anchore/syft/syft/linux" - "github.com/anchore/syft/syft/speculate/cpes" - "github.com/hashicorp/go-multierror" - "github.com/wagoodman/go-partybus" - "github.com/wagoodman/go-progress" -) - -// Catalog a given source (container image or filesystem) with the given catalogers, returning all discovered packages. -// In order to efficiently retrieve contents from an underlying container image the content fetch requests are -// done in bulk. Specifically, all files of interest are collected from each cataloger and accumulated into a single -// request. -func Catalog(resolver file.Resolver, release *linux.Release, catalogers ...pkg.Cataloger) (*pkg.Collection, []artifact.Relationship, error) { - catalog := pkg.NewCollection() - var allRelationships []artifact.Relationship - - filesProcessed, packagesDiscovered := newPackageCatalogerMonitor() - - // perform analysis, accumulating errors for each failed analysis - var errs error - for _, c := range catalogers { - // find packages from the underlying raw data - log.Debugf("cataloging with %q", c.Name()) - packages, relationships, err := c.Catalog(resolver) - if err != nil { - errs = multierror.Append(errs, err) - continue - } - - catalogedPackages := len(packages) - - log.Debugf("discovered %d packages", catalogedPackages) - packagesDiscovered.N += int64(catalogedPackages) - - for _, p := range packages { - // generate CPEs (note: this is excluded from package ID, so is safe to mutate) - p.CPEs = cpes.Generate(p) - - // generate PURL (note: this is excluded from package ID, so is safe to mutate) - p.PURL = pkg.URL(p, release) - - // create file-to-package relationships for files owned by the package - owningRelationships, err := packageFileOwnershipRelationships(p, resolver) - if err != nil { - log.Warnf("unable to create any package-file relationships for package name=%q: %w", p.Name, err) - } else { - allRelationships = append(allRelationships, owningRelationships...) - } - - // add to catalog - catalog.Add(p) - } - - allRelationships = append(allRelationships, relationships...) - } - - allRelationships = append(allRelationships, pkg.NewRelationships(catalog)...) - - if errs != nil { - return nil, nil, errs - } - - filesProcessed.SetCompleted() - packagesDiscovered.SetCompleted() - - return catalog, allRelationships, nil -} - -func packageFileOwnershipRelationships(p pkg.Package, resolver file.PathResolver) ([]artifact.Relationship, error) { - fileOwner, ok := p.Metadata.(pkg.FileOwner) - if !ok { - return nil, nil - } - - var relationships []artifact.Relationship - - for _, path := range fileOwner.OwnedFiles() { - locations, err := resolver.FilesByPath(path) - if err != nil { - return nil, fmt.Errorf("unable to find path for path=%q: %w", path, err) - } - - if len(locations) == 0 { - // ideally we want to warn users about missing files from a package, however, it is very common for - // container image authors to delete files that are not needed in order to keep image sizes small. Adding - // a warning here would be needlessly noisy (even for popular base images). - continue - } - - for _, l := range locations { - relationships = append(relationships, artifact.Relationship{ - From: p, - To: l.Coordinates, - Type: artifact.ContainsRelationship, - }) - } - } - - return relationships, nil -} - -// newPackageCatalogerMonitor creates a new PackageCatalogerMonitor object and publishes the object on the bus as a PackageCatalogerStarted event. -func newPackageCatalogerMonitor() (*progress.Manual, *progress.Manual) { - filesProcessed := progress.Manual{} - packagesDiscovered := progress.Manual{} - - bus.Publish(partybus.Event{ - Type: event.PackageCatalogerStarted, - Value: monitor.PackageCatalogerMonitor{ - FilesProcessed: progress.Monitorable(&filesProcessed), - PackagesDiscovered: progress.Monitorable(&packagesDiscovered), - }, - }) - return &filesProcessed, &packagesDiscovered -} diff --git a/syft/cataloger/packages/cataloger_groups.go b/syft/cataloger/packages/cataloger_groups.go deleted file mode 100644 index 7921f1c85..000000000 --- a/syft/cataloger/packages/cataloger_groups.go +++ /dev/null @@ -1,82 +0,0 @@ -package packages - -import ( - "github.com/anchore/syft/syft/cataloger/packages/apkdb" - "github.com/anchore/syft/syft/cataloger/packages/deb" - "github.com/anchore/syft/syft/cataloger/packages/golang" - "github.com/anchore/syft/syft/cataloger/packages/java" - "github.com/anchore/syft/syft/cataloger/packages/javascript" - "github.com/anchore/syft/syft/cataloger/packages/php" - "github.com/anchore/syft/syft/cataloger/packages/python" - "github.com/anchore/syft/syft/cataloger/packages/rpmdb" - "github.com/anchore/syft/syft/cataloger/packages/ruby" - "github.com/anchore/syft/syft/cataloger/packages/rust" - "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/source" -) - -// TODO: add tag-based API to select appropriate package catalogers for different scenarios - -// AllCatalogers returns all implemented package catalogers -func AllCatalogers(cfg SearchConfig) []pkg.Cataloger { - return []pkg.Cataloger{ - ruby.NewGemFileLockCataloger(), - ruby.NewGemSpecCataloger(), - python.NewPythonIndexCataloger(), - python.NewPythonPackageCataloger(), - javascript.NewJavascriptLockCataloger(), - javascript.NewJavascriptPackageCataloger(), - deb.NewDpkgdbCataloger(), - rpmdb.NewRpmdbCataloger(), - java.NewJavaCataloger(cfg.Java()), - apkdb.NewApkdbCataloger(), - golang.NewGoModuleBinaryCataloger(), - golang.NewGoModFileCataloger(), - rust.NewCargoLockCataloger(), - } -} - -// InstalledCatalogers returns a slice of locally implemented package catalogers that are fit for detecting installations of packages. -func InstalledCatalogers(cfg SearchConfig) []pkg.Cataloger { - return []pkg.Cataloger{ - ruby.NewGemSpecCataloger(), - python.NewPythonPackageCataloger(), - php.NewPHPComposerInstalledCataloger(), - javascript.NewJavascriptPackageCataloger(), - deb.NewDpkgdbCataloger(), - rpmdb.NewRpmdbCataloger(), - java.NewJavaCataloger(cfg.Java()), - apkdb.NewApkdbCataloger(), - golang.NewGoModuleBinaryCataloger(), - } -} - -// IndexCatalogers returns a slice of locally implemented package catalogers that are fit for detecting packages from index files (and select installations) -func IndexCatalogers(cfg SearchConfig) []pkg.Cataloger { - return []pkg.Cataloger{ - ruby.NewGemFileLockCataloger(), - python.NewPythonIndexCataloger(), - python.NewPythonPackageCataloger(), // for install - php.NewPHPComposerLockCataloger(), - javascript.NewJavascriptLockCataloger(), - deb.NewDpkgdbCataloger(), // for install - rpmdb.NewRpmdbCataloger(), // for install - java.NewJavaCataloger(cfg.Java()), // for install - apkdb.NewApkdbCataloger(), // for install - golang.NewGoModuleBinaryCataloger(), // for install - golang.NewGoModFileCataloger(), - rust.NewCargoLockCataloger(), - } -} - -func CatalogersBySourceScheme(scheme source.Type, cfg SearchConfig) []pkg.Cataloger { - switch scheme { - case source.ImageType: - return InstalledCatalogers(cfg) - case source.FileType: - return AllCatalogers(cfg) - case source.DirectoryType: - return IndexCatalogers(cfg) - } - return nil -} diff --git a/syft/cataloger/packages/find_relationships.go b/syft/cataloger/packages/find_relationships.go new file mode 100644 index 000000000..e792ae8ff --- /dev/null +++ b/syft/cataloger/packages/find_relationships.go @@ -0,0 +1,24 @@ +package packages + +import ( + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" +) + +func FindRelationships(catalog pkg.Collection, resolver file.Resolver) []artifact.Relationship { + var allRelationships []artifact.Relationship + for p := range catalog.Enumerate() { + relationships, err := createFileOwnershipRelationships(p, resolver) + if err != nil { + log.Warnf("unable to create any package-file relationships for package name=%q: %w", p.Name, err) + continue + } + allRelationships = append(allRelationships, relationships...) + } + + allRelationships = append(allRelationships, findOwnershipByFileOverlapRelationship(catalog)...) + + return allRelationships +} diff --git a/syft/cataloger/packages/javascript/cataloger.go b/syft/cataloger/packages/javascript/cataloger.go index dee7a39aa..29b07d705 100644 --- a/syft/cataloger/packages/javascript/cataloger.go +++ b/syft/cataloger/packages/javascript/cataloger.go @@ -13,15 +13,22 @@ func NewJavascriptPackageCataloger() *generic.Cataloger { "**/package.json": parsePackageJSON, } - return generic.NewCataloger(nil, globParsers, "javascript-package-cataloger") + return generic.NewCataloger(nil, globParsers, "javascript-package-json-cataloger") } // NewJavascriptLockCataloger returns a new Javascript cataloger object base on package lock files. -func NewJavascriptLockCataloger() *generic.Cataloger { +func NewJavascriptPackageLockCataloger() *generic.Cataloger { globParsers := map[string]generic.Parser{ "**/package-lock.json": parsePackageLock, - "**/yarn.lock": parseYarnLock, } - return generic.NewCataloger(nil, globParsers, "javascript-lock-cataloger") + return generic.NewCataloger(nil, globParsers, "javascript-package-lock-cataloger") +} + +func NewJavascriptYarnLockCataloger() *generic.Cataloger { + globParsers := map[string]generic.Parser{ + "**/yarn.lock": parseYarnLock, + } + + return generic.NewCataloger(nil, globParsers, "javascript-yarn-lock-cataloger") } diff --git a/syft/cataloger/packages/python/index_cataloger.go b/syft/cataloger/packages/python/index_cataloger.go index cf80c7b46..b669f5291 100644 --- a/syft/cataloger/packages/python/index_cataloger.go +++ b/syft/cataloger/packages/python/index_cataloger.go @@ -8,13 +8,34 @@ import ( ) // NewPythonIndexCataloger returns a new cataloger for python packages referenced from poetry lock files, requirements.txt files, and setup.py files. -func NewPythonIndexCataloger() *generic.Cataloger { +func NewPythonRequirementsCataloger() *generic.Cataloger { globParsers := map[string]generic.Parser{ "**/*requirements*.txt": parseRequirementsTxt, - "**/poetry.lock": parsePoetryLock, - "**/Pipfile.lock": parsePipfileLock, - "**/setup.py": parseSetup, } - return generic.NewCataloger(nil, globParsers, "python-index-cataloger") + return generic.NewCataloger(nil, globParsers, "python-requirements-cataloger") +} + +func NewPythonPoetryCataloger() *generic.Cataloger { + globParsers := map[string]generic.Parser{ + "**/poetry.lock": parsePoetryLock, + } + + return generic.NewCataloger(nil, globParsers, "python-poetry-cataloger") +} + +func NewPythonPipfileCataloger() *generic.Cataloger { + globParsers := map[string]generic.Parser{ + "**/Pipfile.lock": parsePipfileLock, + } + + return generic.NewCataloger(nil, globParsers, "python-pipfile-cataloger") +} + +func NewPythonSetupCataloger() *generic.Cataloger { + globParsers := map[string]generic.Parser{ + "**/setup.py": parseSetup, + } + + return generic.NewCataloger(nil, globParsers, "python-setup-cataloger") } diff --git a/syft/pkg/relationships_by_file_ownership.go b/syft/cataloger/packages/relationships_by_file_ownership.go similarity index 60% rename from syft/pkg/relationships_by_file_ownership.go rename to syft/cataloger/packages/relationships_by_file_ownership.go index 019b17cf5..e3be198fd 100644 --- a/syft/pkg/relationships_by_file_ownership.go +++ b/syft/cataloger/packages/relationships_by_file_ownership.go @@ -1,8 +1,11 @@ -package pkg +package packages import ( + "fmt" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" "github.com/bmatcuk/doublestar/v4" "github.com/scylladb/go-set/strset" ) @@ -10,9 +13,9 @@ import ( var globsForbiddenFromBeingOwned = []string{ // any OS DBs should automatically be ignored to prevent cyclic issues (e.g. the "rpm" RPM owns the path to the // RPM DB, so if not ignored that package would own all other packages on the system). - ApkDBGlob, - DpkgDBGlob, - RpmDBGlob, + pkg.ApkDBGlob, + pkg.DpkgDBGlob, + pkg.RpmDBGlob, // DEB packages share common copyright info between, this does not mean that sharing these paths implies ownership. "/usr/share/doc/**/copyright", } @@ -21,17 +24,50 @@ type ownershipByFilesMetadata struct { Files []string `json:"files"` } -// RelationshipsByFileOwnership creates a package-to-package relationship based on discovering which packages have +func createFileOwnershipRelationships(p pkg.Package, resolver file.PathResolver) ([]artifact.Relationship, error) { + fileOwner, ok := p.Metadata.(pkg.FileOwner) + if !ok { + return nil, nil + } + + var relationships []artifact.Relationship + + for _, path := range fileOwner.OwnedFiles() { + locations, err := resolver.FilesByPath(path) + if err != nil { + return nil, fmt.Errorf("unable to find path for path=%q: %w", path, err) + } + + if len(locations) == 0 { + // ideally we want to warn users about missing files from a package, however, it is very common for + // container image authors to delete files that are not needed in order to keep image sizes small. Adding + // a warning here would be needlessly noisy (even for popular base images). + continue + } + + for _, l := range locations { + relationships = append(relationships, artifact.Relationship{ + From: p, + To: l.Coordinates, + Type: artifact.ContainsRelationship, + }) + } + } + + return relationships, nil +} + +// findOwnershipByFileOverlapRelationship creates a package-to-package relationship based on discovering which packages have // evidence locations that overlap with ownership claim from another package's package manager metadata. -func RelationshipsByFileOwnership(catalog *Collection) []artifact.Relationship { - var relationships = findOwnershipByFilesRelationships(catalog) +func findOwnershipByFileOverlapRelationship(catalog pkg.Collection) []artifact.Relationship { + var relationships = findFilesWithDisputedOwnership(catalog) var edges []artifact.Relationship for parentID, children := range relationships { for childID, files := range children { edges = append(edges, artifact.Relationship{ - From: catalog.byID[parentID], - To: catalog.byID[childID], + From: catalog.Package(parentID), + To: catalog.Package(childID), Type: artifact.OwnershipByFileOverlapRelationship, Data: ownershipByFilesMetadata{ Files: files.List(), @@ -43,9 +79,9 @@ func RelationshipsByFileOwnership(catalog *Collection) []artifact.Relationship { return edges } -// findOwnershipByFilesRelationships find overlaps in file ownership with a file that defines another package. Specifically, a .Location.Path of +// findFilesWithDisputedOwnership find overlaps in file ownership with a file that defines another package. Specifically, a .Location.Path of // a package is found to be owned by another (from the owner's .Metadata.Files[]). -func findOwnershipByFilesRelationships(catalog *Collection) map[artifact.ID]map[artifact.ID]*strset.Set { +func findFilesWithDisputedOwnership(catalog pkg.Collection) map[artifact.ID]map[artifact.ID]*strset.Set { var relationships = make(map[artifact.ID]map[artifact.ID]*strset.Set) if catalog == nil { @@ -59,7 +95,7 @@ func findOwnershipByFilesRelationships(catalog *Collection) map[artifact.ID]map[ } // check to see if this is a file owner - pkgFileOwner, ok := candidateOwnerPkg.Metadata.(FileOwner) + pkgFileOwner, ok := candidateOwnerPkg.Metadata.(pkg.FileOwner) if !ok { continue } diff --git a/syft/pkg/relationships_by_file_ownership_test.go b/syft/cataloger/packages/relationships_by_file_ownership_test.go similarity index 68% rename from syft/pkg/relationships_by_file_ownership_test.go rename to syft/cataloger/packages/relationships_by_file_ownership_test.go index 11cfdf618..809c3c3f5 100644 --- a/syft/pkg/relationships_by_file_ownership_test.go +++ b/syft/cataloger/packages/relationships_by_file_ownership_test.go @@ -1,7 +1,8 @@ -package pkg +package packages import ( "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" "testing" "github.com/anchore/syft/syft/artifact" @@ -12,20 +13,20 @@ func TestOwnershipByFilesRelationship(t *testing.T) { tests := []struct { name string - setup func(t testing.TB) ([]Package, []artifact.Relationship) + setup func(t testing.TB) ([]pkg.Package, []artifact.Relationship) }{ { name: "owns-by-real-path", - setup: func(t testing.TB) ([]Package, []artifact.Relationship) { - parent := Package{ + setup: func(t testing.TB) ([]pkg.Package, []artifact.Relationship) { + parent := pkg.Package{ Locations: []file.Location{ file.NewVirtualLocation("/a/path", "/another/path"), file.NewVirtualLocation("/b/path", "/bee/path"), }, - Type: RpmPkg, - MetadataType: RpmdbMetadataType, - Metadata: RpmdbMetadata{ - Files: []RpmdbFileRecord{ + Type: pkg.RpmPkg, + MetadataType: pkg.RpmdbMetadataType, + Metadata: pkg.RpmdbMetadata{ + Files: []pkg.RpmdbFileRecord{ {Path: "/owning/path/1"}, {Path: "/owning/path/2"}, {Path: "/d/path"}, @@ -34,12 +35,12 @@ func TestOwnershipByFilesRelationship(t *testing.T) { } parent.SetID() - child := Package{ + child := pkg.Package{ Locations: []file.Location{ file.NewVirtualLocation("/c/path", "/another/path"), file.NewVirtualLocation("/d/path", "/another/path"), }, - Type: NpmPkg, + Type: pkg.NpmPkg, } child.SetID() @@ -54,21 +55,21 @@ func TestOwnershipByFilesRelationship(t *testing.T) { }, } - return []Package{parent, child}, []artifact.Relationship{relationship} + return []pkg.Package{parent, child}, []artifact.Relationship{relationship} }, }, { name: "owns-by-virtual-path", - setup: func(t testing.TB) ([]Package, []artifact.Relationship) { - parent := Package{ + setup: func(t testing.TB) ([]pkg.Package, []artifact.Relationship) { + parent := pkg.Package{ Locations: []file.Location{ file.NewVirtualLocation("/a/path", "/some/other/path"), file.NewVirtualLocation("/b/path", "/bee/path"), }, - Type: RpmPkg, - MetadataType: RpmdbMetadataType, - Metadata: RpmdbMetadata{ - Files: []RpmdbFileRecord{ + Type: pkg.RpmPkg, + MetadataType: pkg.RpmdbMetadataType, + Metadata: pkg.RpmdbMetadata{ + Files: []pkg.RpmdbFileRecord{ {Path: "/owning/path/1"}, {Path: "/owning/path/2"}, {Path: "/another/path"}, @@ -77,12 +78,12 @@ func TestOwnershipByFilesRelationship(t *testing.T) { } parent.SetID() - child := Package{ + child := pkg.Package{ Locations: []file.Location{ file.NewVirtualLocation("/c/path", "/another/path"), file.NewLocation("/d/path"), }, - Type: NpmPkg, + Type: pkg.NpmPkg, } child.SetID() @@ -96,21 +97,21 @@ func TestOwnershipByFilesRelationship(t *testing.T) { }, }, } - return []Package{parent, child}, []artifact.Relationship{relationship} + return []pkg.Package{parent, child}, []artifact.Relationship{relationship} }, }, { name: "ignore-empty-path", - setup: func(t testing.TB) ([]Package, []artifact.Relationship) { - parent := Package{ + setup: func(t testing.TB) ([]pkg.Package, []artifact.Relationship) { + parent := pkg.Package{ Locations: []file.Location{ file.NewVirtualLocation("/a/path", "/some/other/path"), file.NewVirtualLocation("/b/path", "/bee/path"), }, - Type: RpmPkg, - MetadataType: RpmdbMetadataType, - Metadata: RpmdbMetadata{ - Files: []RpmdbFileRecord{ + Type: pkg.RpmPkg, + MetadataType: pkg.RpmdbMetadataType, + Metadata: pkg.RpmdbMetadata{ + Files: []pkg.RpmdbFileRecord{ {Path: "/owning/path/1"}, {Path: "/owning/path/2"}, {Path: ""}, @@ -120,17 +121,17 @@ func TestOwnershipByFilesRelationship(t *testing.T) { parent.SetID() - child := Package{ + child := pkg.Package{ Locations: []file.Location{ file.NewVirtualLocation("/c/path", "/another/path"), file.NewLocation("/d/path"), }, - Type: NpmPkg, + Type: pkg.NpmPkg, } child.SetID() - return []Package{parent, child}, nil + return []pkg.Package{parent, child}, nil }, }, } @@ -138,8 +139,8 @@ func TestOwnershipByFilesRelationship(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { pkgs, expectedRelations := test.setup(t) - c := NewCollection(pkgs...) - relationships := RelationshipsByFileOwnership(c) + c := pkg.NewCollection(pkgs...) + relationships := findOwnershipByFileOverlapRelationship(c) assert.Len(t, relationships, len(expectedRelations)) for idx, expectedRelationship := range expectedRelations { diff --git a/syft/cataloger/packages/search_config.go b/syft/cataloger/packages/search_config.go index 8fba9deb5..9400b28e3 100644 --- a/syft/cataloger/packages/search_config.go +++ b/syft/cataloger/packages/search_config.go @@ -1,9 +1,5 @@ package packages -import ( - "github.com/anchore/syft/syft/cataloger/packages/java" -) - type SearchConfig struct { IncludeIndexedArchives bool IncludeUnindexedArchives bool @@ -15,10 +11,3 @@ func DefaultSearchConfig() SearchConfig { IncludeUnindexedArchives: false, } } - -func (c SearchConfig) Java() java.CatalogerConfig { - return java.CatalogerConfig{ - SearchUnindexedArchives: c.IncludeUnindexedArchives, - SearchIndexedArchives: c.IncludeIndexedArchives, - } -} diff --git a/syft/cataloging_config.go b/syft/cataloging_config.go index e8361187c..6a1d8419f 100644 --- a/syft/cataloging_config.go +++ b/syft/cataloging_config.go @@ -2,13 +2,13 @@ package syft import ( "crypto" + "github.com/anchore/syft/internal" + "github.com/anchore/syft/internal/version" + "github.com/anchore/syft/syft/cataloger" "github.com/anchore/syft/syft/cataloger/files/fileclassifier" "github.com/anchore/syft/syft/cataloger/files/filecontents" "github.com/anchore/syft/syft/cataloger/files/secrets" - - "github.com/anchore/syft/internal" - "github.com/anchore/syft/internal/version" - "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/cataloger/packages" "github.com/anchore/syft/syft/source" ) @@ -18,32 +18,32 @@ type CatalogingConfig struct { ToolVersion string ToolConfiguration interface{} // applies to all catalogers - Scope source.Scope - ProcessTasksInSerial bool + DefaultScope source.Scope // TODO: shouldn't this be in the package.SearchConfig? + ProcessTasksInSerial bool // TODO: this seems a little odd, if this should be an option is this the right spot? + EnabledCatalogers []cataloger.ID + availableTasks *taskCollection // package - PackageCatalogers []pkg.Cataloger + PackageSearch packages.SearchConfig // file metadata - CaptureFileMetadata bool - DigestHashes []crypto.Hash + DigestHashes []crypto.Hash // secrets - CaptureSecrets bool - SecretsConfig secrets.CatalogerConfig - SecretsScope source.Scope + SecretsSearch secrets.Config + SecretsScope source.Scope // file classification - ClassifyFiles bool FileClassifiers []fileclassifier.Classifier // file contents - ContentsConfig filecontents.CatalogerConfig + ContentsSearch filecontents.Config } func DefaultCatalogingConfig() CatalogingConfig { return CatalogingConfig{ - Scope: source.SquashedScope, + DefaultScope: source.SquashedScope, ToolName: internal.ApplicationName, ToolVersion: version.Guess(), SecretsScope: source.AllLayersScope, - SecretsConfig: secrets.DefaultCatalogerConfig(), + SecretsSearch: secrets.DefaultConfig(), FileClassifiers: fileclassifier.DefaultClassifiers(), - ContentsConfig: filecontents.DefaultCatalogerConfig(), + ContentsSearch: filecontents.DefaultConfig(), + PackageSearch: packages.DefaultSearchConfig(), } } diff --git a/syft/cataloging_option.go b/syft/cataloging_option.go index 2ec6c829b..09bda39c2 100644 --- a/syft/cataloging_option.go +++ b/syft/cataloging_option.go @@ -2,9 +2,9 @@ package syft import ( "crypto" + "github.com/anchore/syft/syft/cataloger" "github.com/anchore/syft/syft/cataloger/files/fileclassifier" "github.com/anchore/syft/syft/cataloger/files/secrets" - "github.com/anchore/syft/syft/cataloger/packages" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/source" ) @@ -27,7 +27,7 @@ func WithoutConcurrency() CatalogingOption { func WithScope(scope source.Scope) CatalogingOption { return func(_ *source.Source, config *CatalogingConfig) error { - config.Scope = scope + config.DefaultScope = scope return nil } } @@ -47,47 +47,55 @@ func WithToolConfiguration(c interface{}) CatalogingOption { } } -func WithPackageCatalogers(catalogers ...pkg.Cataloger) CatalogingOption { +func WithCataloger(id cataloger.ID, c pkg.Cataloger) CatalogingOption { return func(_ *source.Source, config *CatalogingConfig) error { - config.PackageCatalogers = catalogers - return nil + if config.availableTasks == nil { + config.availableTasks = newTaskCollection() + } + + var cfg CatalogingConfig + if config != nil { + cfg = *config + } + + return config.availableTasks.add(pkgCatalogerTask{ + id: id, + cataloger: c, + config: cfg, + }) } } -func WithAdditionalPackageCatalogers(catalogers ...pkg.Cataloger) CatalogingOption { - return func(_ *source.Source, config *CatalogingConfig) error { - config.PackageCatalogers = append(config.PackageCatalogers, catalogers...) - return nil - } -} - -func WithDefaultPackageCatalogers(cfg packages.SearchConfig) CatalogingOption { +func WithDefaultCatalogers() CatalogingOption { return func(src *source.Source, config *CatalogingConfig) error { - config.PackageCatalogers = packages.CatalogersBySourceScheme(src.Metadata.Scheme, cfg) + // override any previously added catalogers + config.availableTasks = newTaskCollection() + config.EnabledCatalogers = nil return nil } } func WithFileMetadata() CatalogingOption { return func(_ *source.Source, config *CatalogingConfig) error { - config.CaptureFileMetadata = true + config.EnabledCatalogers = append(config.EnabledCatalogers, cataloger.FileMetadataID) return nil } } func WithFileDigests(hashes ...crypto.Hash) CatalogingOption { return func(_ *source.Source, config *CatalogingConfig) error { + config.EnabledCatalogers = append(config.EnabledCatalogers, cataloger.FileDigestsID) config.DigestHashes = hashes return nil } } -func WithSecrets(secretConfig *secrets.CatalogerConfig) CatalogingOption { +func WithSecrets(secretConfig *secrets.Config) CatalogingOption { return func(_ *source.Source, config *CatalogingConfig) error { - config.CaptureSecrets = true + config.EnabledCatalogers = append(config.EnabledCatalogers, cataloger.SecretsID) if secretConfig != nil { - config.SecretsConfig = *secretConfig + config.SecretsSearch = *secretConfig } return nil } @@ -95,30 +103,35 @@ func WithSecrets(secretConfig *secrets.CatalogerConfig) CatalogingOption { func WithFileClassification() CatalogingOption { return func(_ *source.Source, config *CatalogingConfig) error { - config.ClassifyFiles = true + if len(config.FileClassifiers) > 0 { + config.EnabledCatalogers = append(config.EnabledCatalogers, cataloger.FileClassifierID) + } return nil } } func WithFileClassifiers(classifiers ...fileclassifier.Classifier) CatalogingOption { return func(_ *source.Source, config *CatalogingConfig) error { - config.ClassifyFiles = !(len(classifiers) > 0) config.FileClassifiers = classifiers + if len(config.FileClassifiers) > 0 { + config.EnabledCatalogers = append(config.EnabledCatalogers, cataloger.FileClassifierID) + } return nil } } func WithFileContents(globs ...string) CatalogingOption { return func(_ *source.Source, config *CatalogingConfig) error { - config.ContentsConfig.Globs = globs + config.EnabledCatalogers = append(config.EnabledCatalogers, cataloger.FileContentsID) + config.ContentsSearch.Globs = globs return nil } } func WithFileSizeLimit(byteLimit int64) CatalogingOption { return func(_ *source.Source, config *CatalogingConfig) error { - config.ContentsConfig.SkipFilesAboveSizeInBytes = byteLimit - config.SecretsConfig.MaxFileSize = byteLimit + config.ContentsSearch.SkipFilesAboveSizeInBytes = byteLimit + config.SecretsSearch.MaxFileSize = byteLimit return nil } } diff --git a/syft/event/event.go b/syft/event/event.go index c5ff81d31..c4de4d9b9 100644 --- a/syft/event/event.go +++ b/syft/event/event.go @@ -10,8 +10,8 @@ const ( // AppUpdateAvailable is a partybus event that occurs when an application update is available AppUpdateAvailable partybus.EventType = "syft-app-update-available" - // PackageCatalogerStarted is a partybus event that occurs when the package cataloging has begun - PackageCatalogerStarted partybus.EventType = "syft-package-cataloger-started-event" + // CatalogingStarted is a partybus event that occurs when the first cataloger has started + CatalogingStarted partybus.EventType = "syft-cataloging-started-event" // nolint:gosec // SecretsCatalogerStarted is a partybus event that occurs when the secrets cataloging has begun diff --git a/syft/event/monitor/package_cataloger_monitor.go b/syft/event/monitor/package_cataloger_monitor.go index 6bf7e5b89..2faf6283f 100644 --- a/syft/event/monitor/package_cataloger_monitor.go +++ b/syft/event/monitor/package_cataloger_monitor.go @@ -4,8 +4,8 @@ import ( "github.com/wagoodman/go-progress" ) -// PackageCatalogerMonitor provides progress-related data for observing the progress of a Catalog() call (published on the event bus). -type PackageCatalogerMonitor struct { +// CatalogingMonitor provides progress-related data for observing the progress of a Catalog() call (published on the event bus). +type CatalogingMonitor struct { FilesProcessed progress.Monitorable // the number of files selected and contents analyzed from all registered catalogers PackagesDiscovered progress.Monitorable // the number of packages discovered from all registered catalogers } diff --git a/syft/event/parsers/parsers.go b/syft/event/parsers/parsers.go index 9a16689a3..edac42588 100644 --- a/syft/event/parsers/parsers.go +++ b/syft/event/parsers/parsers.go @@ -38,12 +38,12 @@ func checkEventType(actual, expected partybus.EventType) error { return nil } -func ParsePackageCatalogerStarted(e partybus.Event) (*monitor.PackageCatalogerMonitor, error) { - if err := checkEventType(e.Type, event.PackageCatalogerStarted); err != nil { +func ParsePackageCatalogerStarted(e partybus.Event) (*monitor.CatalogingMonitor, error) { + if err := checkEventType(e.Type, event.CatalogingStarted); err != nil { return nil, err } - monitor, ok := e.Value.(monitor.PackageCatalogerMonitor) + monitor, ok := e.Value.(monitor.CatalogingMonitor) if !ok { return nil, newPayloadErr(e.Type, "Value", e.Value) } diff --git a/syft/pkg/cataloger/java/test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.tar.gz b/syft/pkg/cataloger/java/test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.tar.gz new file mode 100644 index 000000000..6eeeef6fd Binary files /dev/null and b/syft/pkg/cataloger/java/test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.tar.gz differ diff --git a/syft/pkg/collection.go b/syft/pkg/collection.go index cb45e09b5..de3a31ace 100644 --- a/syft/pkg/collection.go +++ b/syft/pkg/collection.go @@ -11,7 +11,17 @@ import ( ) // Collection represents a collection of Packages. -type Collection struct { +type Collection interface { + Size() int + Package(id artifact.ID) *Package + PackagesByPath(path string) []Package + Packages(ids []artifact.ID) (result []Package) + Add(p Package) + Enumerate(types ...Type) <-chan Package + Sorted(types ...Type) (pkgs []Package) +} + +type collection struct { byID map[artifact.ID]Package idsByType map[Type][]artifact.ID idsByPath map[string][]artifact.ID // note: this is real path or virtual path @@ -19,8 +29,8 @@ type Collection struct { } // NewCollection returns a new empty Collection -func NewCollection(pkgs ...Package) *Collection { - catalog := Collection{ +func NewCollection(pkgs ...Package) Collection { + catalog := &collection{ byID: make(map[artifact.ID]Package), idsByType: make(map[Type][]artifact.ID), idsByPath: make(map[string][]artifact.ID), @@ -30,16 +40,16 @@ func NewCollection(pkgs ...Package) *Collection { catalog.Add(p) } - return &catalog + return catalog } -// PackageCount returns the total number of packages that have been added. -func (c *Collection) PackageCount() int { +// Size returns the total number of packages that have been added. +func (c *collection) Size() int { return len(c.byID) } // Package returns the package with the given ID. -func (c *Collection) Package(id artifact.ID) *Package { +func (c *collection) Package(id artifact.ID) *Package { v, exists := c.byID[id] if !exists { return nil @@ -54,12 +64,12 @@ func (c *Collection) Package(id artifact.ID) *Package { } // PackagesByPath returns all packages that were discovered from the given path. -func (c *Collection) PackagesByPath(path string) []Package { +func (c *collection) PackagesByPath(path string) []Package { return c.Packages(c.idsByPath[path]) } // Packages returns all packages for the given ID. -func (c *Collection) Packages(ids []artifact.ID) (result []Package) { +func (c *collection) Packages(ids []artifact.ID) (result []Package) { for _, i := range ids { p, exists := c.byID[i] if exists { @@ -70,7 +80,7 @@ func (c *Collection) Packages(ids []artifact.ID) (result []Package) { } // Add a package to the Collection. -func (c *Collection) Add(p Package) { +func (c *collection) Add(p Package) { c.lock.Lock() defer c.lock.Unlock() @@ -102,7 +112,7 @@ func (c *Collection) Add(p Package) { } // Enumerate all packages for the given type(s), enumerating all packages if no type is specified. -func (c *Collection) Enumerate(types ...Type) <-chan Package { +func (c *collection) Enumerate(types ...Type) <-chan Package { channel := make(chan Package) go func() { defer close(channel) @@ -135,9 +145,8 @@ func (c *Collection) Enumerate(types ...Type) <-chan Package { return channel } -// Sorted enumerates all packages for the given types sorted by package name. Enumerates all packages if no type -// is specified. -func (c *Collection) Sorted(types ...Type) (pkgs []Package) { +// Sorted enumerates all packages for the given types sorted by package name. Enumerates all packages if no type is specified. +func (c *collection) Sorted(types ...Type) (pkgs []Package) { for p := range c.Enumerate(types...) { pkgs = append(pkgs, p) } diff --git a/syft/pkg/relationships.go b/syft/pkg/relationships.go deleted file mode 100644 index be59b2add..000000000 --- a/syft/pkg/relationships.go +++ /dev/null @@ -1,8 +0,0 @@ -package pkg - -import "github.com/anchore/syft/syft/artifact" - -// TODO: as more relationships are added, this function signature will probably accommodate selection -func NewRelationships(catalog *Collection) []artifact.Relationship { - return RelationshipsByFileOwnership(catalog) -} diff --git a/syft/sbom/sbom.go b/syft/sbom/sbom.go index 2ad48ad72..e2501a88c 100644 --- a/syft/sbom/sbom.go +++ b/syft/sbom/sbom.go @@ -16,7 +16,7 @@ type SBOM struct { } type Artifacts struct { - PackageCatalog *pkg.Collection + Packages pkg.Collection FileMetadata map[file.Coordinates]file.Metadata FileDigests map[file.Coordinates][]file.Digest FileClassifications map[file.Coordinates][]file.Classification diff --git a/syft/speculate/identifiers.go b/syft/speculate/identifiers.go new file mode 100644 index 000000000..b9d572033 --- /dev/null +++ b/syft/speculate/identifiers.go @@ -0,0 +1,16 @@ +package speculate + +import ( + "github.com/anchore/syft/syft/pkg" + + "github.com/anchore/syft/syft/linux" + "github.com/anchore/syft/syft/speculate/cpes" +) + +func Identifiers(p *pkg.Package, release *linux.Release) { + // generate CPEs (note: this is excluded from package ID, so is safe to mutate) + p.CPEs = cpes.Generate(*p) + + // generate PURL (note: this is excluded from package ID, so is safe to mutate) + p.PURL = pkg.URL(*p, release) +} diff --git a/syft/task_collection.go b/syft/task_collection.go new file mode 100644 index 000000000..63350b797 --- /dev/null +++ b/syft/task_collection.go @@ -0,0 +1,233 @@ +package syft + +import ( + "fmt" + "github.com/anchore/syft/syft/cataloger" + "github.com/scylladb/go-set/strset" + "sort" + "strings" +) + +const ( + packageTaskLabel = "package" + fileTaskLabel = "file" + osTaskLabel = "os" + languageTaskLabel = "language" + installedTaskLabel = "installed" + declaredTaskLabel = "declared" +) + +type taskCollection struct { + taskByName map[string]task // name -> generator + namesByLabel map[string][]string // label -> names +} + +func newTaskCollection() *taskCollection { + return &taskCollection{ + taskByName: make(map[string]task), + namesByLabel: make(map[string][]string), + } +} + +func (c *taskCollection) add(t task, labels ...string) error { + var name string + switch v := t.(type) { + case pkgCatalogerTask: + name = string(v.id) + case catalogerTask: + name = string(v.id) + default: + if len(labels) == 0 { + return fmt.Errorf("no ID found for generic task") + } + name = labels[0] + } + + if _, exists := c.taskByName[name]; exists { + return fmt.Errorf("task already exists: %q", name) + } + + c.taskByName[name] = t + + labelSet := strset.New(labels...) + labelSet.Add(name) + for _, n := range labelSet.List() { + c.namesByLabel[n] = append(c.namesByLabel[n], name) + } + return nil +} + +func (c *taskCollection) addAllCatalogers(config CatalogingConfig) error { + for _, d := range []struct { + generator taskGenerator + labels []string + }{ + { + generator: newAPKDBCatalogingTask, + labels: []string{packageTaskLabel, osTaskLabel, installedTaskLabel, "alpine", "apk", "apkdb"}, + }, + { + generator: newDPKGCatalogingTask, + labels: []string{packageTaskLabel, osTaskLabel, installedTaskLabel, "debian", "dpkg", "deb", "dpkgdb"}, + }, + { + generator: newRPMDBCatalogingTask, + labels: []string{packageTaskLabel, osTaskLabel, installedTaskLabel, "redhat", "rhel", "centos", "rpm", "rpmdb"}, + }, + { + generator: newRubyGemSpecCatalogingTask, + labels: []string{packageTaskLabel, languageTaskLabel, installedTaskLabel, "ruby", "gemspec", "gem"}, + }, + { + generator: newRubyGemFileLockCatalogingTask, + labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "ruby", "gemfile", "gem", "gemfile.lock"}, + }, + { + generator: newPythonPackageCatalogingTask, + labels: []string{packageTaskLabel, languageTaskLabel, installedTaskLabel, "python", "egg", "wheel"}, + }, + { + generator: newPythonRequirementsCatalogingTask, + labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "python", "requirements", "requirements.txt"}, + }, + { + generator: newPythonPoetryCatalogingTask, + labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "python", "poetry", "poetry.lock"}, + }, + { + generator: newPythonSetupCatalogingTask, + labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "python", "setup", "setup.py"}, + }, + { + generator: newPythonPipfileCatalogingTask, + labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "python", "pip", "pipfile"}, + }, + { + generator: newJavascriptPackageJSONCatalogingTask, + labels: []string{packageTaskLabel, languageTaskLabel, installedTaskLabel, "javascript", "node", "package.json"}, + }, + { + generator: newJavascriptPackageLockCatalogingTask, + labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "javascript", "node", "package-lock.json"}, + }, + { + generator: newJavascriptYarnLockCatalogingTask, + labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "javascript", "node", "yarn", "yarn.lock"}, + }, + { + generator: newJavaCatalogingTask, + labels: []string{packageTaskLabel, languageTaskLabel, installedTaskLabel, "java", "maven", "jar", "war", "ear", "jenkins", "hudson", "hpi", "jpi", "par", "sar", "lpkg"}, + }, + { + generator: newGolangModuleCatalogingTask, + labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "go", "golang", "go-module", "go.mod"}, + }, + { + generator: newGolangBinaryCatalogingTask, + labels: []string{packageTaskLabel, languageTaskLabel, installedTaskLabel, "go", "golang", "go-module", "binary"}, + }, + { + generator: newRustCargoLockCatalogingTask, + labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "rust", "cargo", "cargo.lock"}, + }, + { + generator: newPHPInstalledCatalogingTask, + labels: []string{packageTaskLabel, languageTaskLabel, installedTaskLabel, "php", "composer", "installed.json"}, + }, + { + generator: newPHPComposerLockCatalogingTask, + labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "php", "composer", "composer.lock"}, + }, + { + generator: newFileMetadataCatalogingTask, + labels: []string{fileTaskLabel}, + }, + { + generator: newFileDigestsCatalogingTask, + labels: []string{fileTaskLabel, "digests", "digest", "file-digests"}, + }, + { + generator: newSecretsCatalogingTask, + labels: []string{"secrets"}, + }, + { + generator: newFileClassifierTask, + labels: []string{fileTaskLabel, "classifier"}, + }, + { + generator: newFileContentsCatalogingTask, + labels: []string{fileTaskLabel, "contents", "content", "file-contents"}, + }, + } { + t, err := d.generator(config) + if err != nil { + return err + } + + if t == nil { + continue + } + + if err := c.add(t, d.labels...); err != nil { + return err + } + } + return nil +} + +func (c taskCollection) query(q string) []cataloger.ID { + fields := strings.FieldsFunc(q, func(r rune) bool { + switch r { + case '+', ',', '&': + return true + } + return false + }) + + return c.withLabels(fields...) +} + +func (c taskCollection) all() []cataloger.ID { + var ret []cataloger.ID + for k := range c.taskByName { + ret = append(ret, cataloger.ID(k)) + } + + sort.Sort(cataloger.IDs(ret)) + + return ret +} + +func (c taskCollection) withLabels(q ...string) []cataloger.ID { + req := strset.New() + for i, f := range q { + switch i { + case 0: + req.Add(c.namesByLabel[f]...) + continue + default: + req = strset.Intersection(req, strset.New(c.namesByLabel[f]...)) + } + } + + var ret []cataloger.ID + for _, i := range req.List() { + ret = append(ret, cataloger.ID(i)) + } + + // ensure stable results + sort.Sort(cataloger.IDs(ret)) + + return ret +} + +func (c taskCollection) tasks(ids ...cataloger.ID) (ts []task) { + for _, id := range ids { + t, exists := c.taskByName[string(id)] + if !exists { + continue + } + ts = append(ts, t) + } + return ts +} diff --git a/syft/tasks.go b/syft/tasks.go index e057d8920..a9b782205 100644 --- a/syft/tasks.go +++ b/syft/tasks.go @@ -2,11 +2,24 @@ package syft import ( "fmt" + "github.com/anchore/syft/syft/cataloger" "github.com/anchore/syft/syft/cataloger/files/fileclassifier" "github.com/anchore/syft/syft/cataloger/files/filecontents" "github.com/anchore/syft/syft/cataloger/files/filedigests" "github.com/anchore/syft/syft/cataloger/files/filemetadata" "github.com/anchore/syft/syft/cataloger/files/secrets" + "github.com/anchore/syft/syft/cataloger/packages/apkdb" + "github.com/anchore/syft/syft/cataloger/packages/deb" + "github.com/anchore/syft/syft/cataloger/packages/golang" + "github.com/anchore/syft/syft/cataloger/packages/java" + "github.com/anchore/syft/syft/cataloger/packages/javascript" + "github.com/anchore/syft/syft/cataloger/packages/php" + "github.com/anchore/syft/syft/cataloger/packages/python" + "github.com/anchore/syft/syft/cataloger/packages/rpmdb" + "github.com/anchore/syft/syft/cataloger/packages/ruby" + "github.com/anchore/syft/syft/cataloger/packages/rust" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/speculate" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/cataloger/packages" @@ -15,152 +28,370 @@ import ( "github.com/anchore/syft/syft/source" ) -type task func(*sbom.Artifacts, *source.Source) ([]artifact.Relationship, error) type taskGenerator func(CatalogingConfig) (task, error) -func generatePackagesCatalogingTask(config CatalogingConfig) (task, error) { - if len(config.PackageCatalogers) == 0 { - return nil, nil +type task interface { + Run(*sbom.Artifacts, *source.Source) ([]artifact.Relationship, error) +} + +type genericTask struct { + run func(*sbom.Artifacts, *source.Source) ([]artifact.Relationship, error) +} + +func (t genericTask) Run(artifacts *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { + return t.run(artifacts, src) +} + +type catalogerTask struct { + id cataloger.ID + genericTask +} + +type pkgCatalogerTask struct { + id cataloger.ID + cataloger pkg.Cataloger + config CatalogingConfig +} + +func (t pkgCatalogerTask) Run(artifacts *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { + resolver, err := src.FileResolver(t.config.DefaultScope) + if err != nil { + return nil, fmt.Errorf("unable to determine resolver while cataloging packages: %w", err) } - return func(artifacts *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { - resolver, err := src.FileResolver(config.Scope) - if err != nil { - return nil, fmt.Errorf("unable to determine resolver while cataloging packages: %w", err) - } + // catalog packages + pkgs, relationships, err := t.cataloger.Catalog(resolver) + if err != nil { + return nil, err + } - // find the distro - artifacts.LinuxDistribution = linux.IdentifyRelease(resolver) + for _, p := range pkgs { + p.FoundBy = string(t.id) + speculate.Identifiers(&p, artifacts.LinuxDistribution) + p.SetID() + artifacts.Packages.Add(p) + } - // catalog packages - catalog, relationships, err := packages.Catalog(resolver, artifacts.LinuxDistribution, config.PackageCatalogers...) - if err != nil { - return nil, err - } - artifacts.PackageCatalog = catalog + return relationships, nil +} - return relationships, nil +func newIdentifyDistroTask(config CatalogingConfig) (task, error) { + return genericTask{ + run: func(artifacts *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { + resolver, err := src.FileResolver(config.DefaultScope) + if err != nil { + return nil, fmt.Errorf("unable to determine resolver while determining linux distro: %w", err) + } + + artifacts.LinuxDistribution = linux.IdentifyRelease(resolver) + + return nil, nil + }, }, nil } -func generateFileMetadataCatalogingTask(config CatalogingConfig) (task, error) { - if !config.CaptureFileMetadata { - return nil, nil - } - - cataloger := filemetadata.NewCataloger() - - return func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { - resolver, err := src.FileResolver(config.Scope) - if err != nil { - return nil, err - } - - result, err := cataloger.Catalog(resolver) - if err != nil { - return nil, err - } - results.FileMetadata = result - return nil, nil +func newAPKDBCatalogingTask(config CatalogingConfig) (task, error) { + return pkgCatalogerTask{ + id: cataloger.ApkDBID, + cataloger: apkdb.NewApkdbCataloger(), + config: config, }, nil } -func generateFileDigestsCatalogingTask(config CatalogingConfig) (task, error) { +func newDPKGCatalogingTask(config CatalogingConfig) (task, error) { + return pkgCatalogerTask{ + id: cataloger.DpkgID, + cataloger: deb.NewDpkgdbCataloger(), + config: config, + }, nil +} + +func newGolangBinaryCatalogingTask(config CatalogingConfig) (task, error) { + return pkgCatalogerTask{ + id: cataloger.GoBinaryID, + cataloger: golang.NewGoModuleBinaryCataloger(), + config: config, + }, nil +} + +func newGolangModuleCatalogingTask(config CatalogingConfig) (task, error) { + return pkgCatalogerTask{ + id: cataloger.GoModID, + cataloger: golang.NewGoModFileCataloger(), + config: config, + }, nil +} + +func newJavaCatalogingTask(config CatalogingConfig) (task, error) { + return pkgCatalogerTask{ + id: cataloger.JavaArchiveID, + cataloger: java.NewJavaCataloger(java.CatalogerConfig{ + SearchUnindexedArchives: config.PackageSearch.IncludeUnindexedArchives, + SearchIndexedArchives: config.PackageSearch.IncludeIndexedArchives, + }), + config: config, + }, nil +} + +func newJavascriptPackageJSONCatalogingTask(config CatalogingConfig) (task, error) { + return pkgCatalogerTask{ + id: cataloger.JavascriptPackageJSONID, + cataloger: javascript.NewJavascriptPackageCataloger(), + config: config, + }, nil +} + +func newJavascriptPackageLockCatalogingTask(config CatalogingConfig) (task, error) { + return pkgCatalogerTask{ + id: cataloger.JavascriptPackageLockID, + cataloger: javascript.NewJavascriptPackageLockCataloger(), + config: config, + }, nil +} + +func newJavascriptYarnLockCatalogingTask(config CatalogingConfig) (task, error) { + return pkgCatalogerTask{ + id: cataloger.JavaScriptYarnLockID, + cataloger: javascript.NewJavascriptYarnLockCataloger(), + config: config, + }, nil +} + +func newPHPComposerLockCatalogingTask(config CatalogingConfig) (task, error) { + return pkgCatalogerTask{ + id: cataloger.PHPComposerLockID, + cataloger: php.NewPHPComposerLockCataloger(), + config: config, + }, nil +} + +func newPHPInstalledCatalogingTask(config CatalogingConfig) (task, error) { + return pkgCatalogerTask{ + id: cataloger.PHPInstalledJSONID, + cataloger: php.NewPHPComposerInstalledCataloger(), + config: config, + }, nil +} + +func newPythonPackageCatalogingTask(config CatalogingConfig) (task, error) { + return pkgCatalogerTask{ + id: cataloger.PythonPackageID, + cataloger: python.NewPythonPackageCataloger(), + config: config, + }, nil +} + +func newPythonRequirementsCatalogingTask(config CatalogingConfig) (task, error) { + return pkgCatalogerTask{ + id: cataloger.PythonRequirementsID, + cataloger: python.NewPythonRequirementsCataloger(), + config: config, + }, nil +} + +func newPythonPoetryCatalogingTask(config CatalogingConfig) (task, error) { + return pkgCatalogerTask{ + id: cataloger.PythonPoetryID, + cataloger: python.NewPythonPoetryCataloger(), + config: config, + }, nil +} + +func newPythonPipfileCatalogingTask(config CatalogingConfig) (task, error) { + return pkgCatalogerTask{ + id: cataloger.PythonPipFileID, + cataloger: python.NewPythonPipfileCataloger(), + config: config, + }, nil +} + +func newPythonSetupCatalogingTask(config CatalogingConfig) (task, error) { + return pkgCatalogerTask{ + id: cataloger.PythonSetupID, + cataloger: python.NewPythonSetupCataloger(), + config: config, + }, nil +} + +func newRPMDBCatalogingTask(config CatalogingConfig) (task, error) { + return pkgCatalogerTask{ + id: cataloger.RpmDBID, + cataloger: rpmdb.NewRpmdbCataloger(), + config: config, + }, nil +} + +func newRubyGemFileLockCatalogingTask(config CatalogingConfig) (task, error) { + return pkgCatalogerTask{ + id: cataloger.RubyGemfileLockID, + cataloger: ruby.NewGemFileLockCataloger(), + config: config, + }, nil +} + +func newRubyGemSpecCatalogingTask(config CatalogingConfig) (task, error) { + return pkgCatalogerTask{ + id: cataloger.RubyGemspecID, + cataloger: ruby.NewGemSpecCataloger(), + config: config, + }, nil +} + +func newRustCargoLockCatalogingTask(config CatalogingConfig) (task, error) { + return pkgCatalogerTask{ + id: cataloger.RustCargoLockID, + cataloger: rust.NewCargoLockCataloger(), + config: config, + }, nil +} + +func newFileMetadataCatalogingTask(config CatalogingConfig) (task, error) { + c := filemetadata.NewCataloger() + + return catalogerTask{ + id: cataloger.FileMetadataID, + genericTask: genericTask{ + run: func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { + resolver, err := src.FileResolver(config.DefaultScope) + if err != nil { + return nil, err + } + + result, err := c.Catalog(resolver) + if err != nil { + return nil, err + } + results.FileMetadata = result + return nil, nil + }, + }, + }, nil +} + +func newFileDigestsCatalogingTask(config CatalogingConfig) (task, error) { if len(config.DigestHashes) == 0 { return nil, nil } - cataloger, err := filedigests.NewCataloger(config.DigestHashes) + c, err := filedigests.NewCataloger(config.DigestHashes) if err != nil { return nil, err } - return func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { - resolver, err := src.FileResolver(config.Scope) - if err != nil { - return nil, err - } + return catalogerTask{ + id: cataloger.FileDigestsID, + genericTask: genericTask{ + run: func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { + resolver, err := src.FileResolver(config.DefaultScope) + if err != nil { + return nil, err + } - result, err := cataloger.Catalog(resolver) - if err != nil { - return nil, err - } - results.FileDigests = result - return nil, nil + result, err := c.Catalog(resolver) + if err != nil { + return nil, err + } + results.FileDigests = result + return nil, nil + }, + }, }, nil } -func generateContentsCatalogingTask(config CatalogingConfig) (task, error) { - if len(config.ContentsConfig.Globs) == 0 { +func newFileContentsCatalogingTask(config CatalogingConfig) (task, error) { + if len(config.ContentsSearch.Globs) == 0 { return nil, nil } - cataloger, err := filecontents.NewCataloger(config.ContentsConfig) + c, err := filecontents.NewCataloger(config.ContentsSearch) if err != nil { return nil, err } - return func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { - resolver, err := src.FileResolver(config.Scope) - if err != nil { - return nil, err - } + return catalogerTask{ + id: cataloger.FileContentsID, + genericTask: genericTask{ + run: func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { + resolver, err := src.FileResolver(config.DefaultScope) + if err != nil { + return nil, err + } - result, err := cataloger.Catalog(resolver) - if err != nil { - return nil, err - } - results.FileContents = result - return nil, nil + result, err := c.Catalog(resolver) + if err != nil { + return nil, err + } + results.FileContents = result + return nil, nil + }, + }, }, nil } -func generateSecretsCatalogingTask(config CatalogingConfig) (task, error) { - if !config.CaptureSecrets { - return nil, nil - } +func newSecretsCatalogingTask(config CatalogingConfig) (task, error) { - cataloger, err := secrets.NewCataloger(config.SecretsConfig) + c, err := secrets.NewCataloger(config.SecretsSearch) if err != nil { return nil, err } - return func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { - resolver, err := src.FileResolver(config.SecretsScope) - if err != nil { - return nil, err - } + return catalogerTask{ + id: cataloger.SecretsID, + genericTask: genericTask{ + run: func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { + resolver, err := src.FileResolver(config.SecretsScope) + if err != nil { + return nil, err + } - result, err := cataloger.Catalog(resolver) - if err != nil { - return nil, err - } - results.Secrets = result - return nil, nil + result, err := c.Catalog(resolver) + if err != nil { + return nil, err + } + results.Secrets = result + return nil, nil + }, + }, }, nil } -func generateFileClassifierTask(config CatalogingConfig) (task, error) { - if !config.ClassifyFiles { - return nil, nil - } +func newFileClassifierTask(config CatalogingConfig) (task, error) { - cataloger, err := fileclassifier.NewCataloger(config.FileClassifiers) + c, err := fileclassifier.NewCataloger(config.FileClassifiers) if err != nil { return nil, err } - return func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { - resolver, err := src.FileResolver(config.Scope) - if err != nil { - return nil, err - } + return catalogerTask{ + id: cataloger.FileClassifierID, + genericTask: genericTask{ + run: func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { + resolver, err := src.FileResolver(config.DefaultScope) + if err != nil { + return nil, err + } - result, err := cataloger.Catalog(resolver) - if err != nil { - return nil, err - } - results.FileClassifications = result - return nil, nil + result, err := c.Catalog(resolver) + if err != nil { + return nil, err + } + results.FileClassifications = result + return nil, nil + }, + }, + }, nil +} + +func newSynthesizePackageRelationshipsTasks(config CatalogingConfig) (task, error) { + + return genericTask{ + run: func(artifacts *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { + resolver, err := src.FileResolver(config.DefaultScope) + if err != nil { + return nil, err + } + + return packages.FindRelationships(artifacts.Packages, resolver), nil + }, }, nil } diff --git a/test/integration/catalog_packages_test.go b/test/integration/catalog_packages_test.go index 0c5f212e6..b26d3571d 100644 --- a/test/integration/catalog_packages_test.go +++ b/test/integration/catalog_packages_test.go @@ -20,7 +20,7 @@ func BenchmarkImagePackageCatalogers(b *testing.B) { imagetest.GetFixtureImage(b, "docker-archive", fixtureImageName) tarPath := imagetest.GetFixtureImageTarPath(b, fixtureImageName) - var pc *pkg.Collection + var pc pkg.Collection for _, c := range packages.InstalledCatalogers(packages.DefaultSearchConfig()) { // in case of future alteration where state is persisted, assume no dependency is safe to reuse userInput := "docker-archive:" + tarPath @@ -48,7 +48,7 @@ func BenchmarkImagePackageCatalogers(b *testing.B) { } }) - b.Logf("catalog for %q number of packages: %d", c.Name(), pc.PackageCount()) + b.Logf("catalog for %q number of packages: %d", c.Name(), pc.Size()) } } @@ -84,7 +84,7 @@ func TestPkgCoverageImage(t *testing.T) { t.Run(c.name, func(t *testing.T) { pkgCount := 0 - for a := range sbom.Artifacts.PackageCatalog.Enumerate(c.pkgType) { + for a := range sbom.Artifacts.Packages.Enumerate(c.pkgType) { if a.Language.String() != "" { observedLanguages.Add(a.Language.String()) @@ -112,7 +112,7 @@ func TestPkgCoverageImage(t *testing.T) { if pkgCount != len(c.pkgInfo)+c.duplicates { t.Logf("Discovered packages of type %+v", c.pkgType) - for a := range sbom.Artifacts.PackageCatalog.Enumerate(c.pkgType) { + for a := range sbom.Artifacts.Packages.Enumerate(c.pkgType) { t.Log(" ", a) } t.Fatalf("unexpected package count: %d!=%d", pkgCount, len(c.pkgInfo)) @@ -161,7 +161,7 @@ func TestPkgCoverageDirectory(t *testing.T) { t.Run(test.name, func(t *testing.T) { actualPkgCount := 0 - for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(test.pkgType) { + for actualPkg := range sbom.Artifacts.Packages.Enumerate(test.pkgType) { observedLanguages.Add(actualPkg.Language.String()) observedPkgs.Add(string(actualPkg.Type)) @@ -186,7 +186,7 @@ func TestPkgCoverageDirectory(t *testing.T) { } if actualPkgCount != len(test.pkgInfo)+test.duplicates { - for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(test.pkgType) { + for actualPkg := range sbom.Artifacts.Packages.Enumerate(test.pkgType) { t.Log(" ", actualPkg) } t.Fatalf("unexpected package count: %d!=%d", actualPkgCount, len(test.pkgInfo)) diff --git a/test/integration/node_packages_test.go b/test/integration/node_packages_test.go index 8505e5c78..b5f32c68f 100644 --- a/test/integration/node_packages_test.go +++ b/test/integration/node_packages_test.go @@ -13,7 +13,7 @@ func TestNpmPackageLockDirectory(t *testing.T) { foundPackages := internal.NewStringSet() - for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.NpmPkg) { + for actualPkg := range sbom.Artifacts.Packages.Enumerate(pkg.NpmPkg) { for _, actualLocation := range actualPkg.Locations { if strings.Contains(actualLocation.RealPath, "node_modules") { t.Errorf("found packages from package-lock.json in node_modules: %s", actualLocation) @@ -34,7 +34,7 @@ func TestYarnPackageLockDirectory(t *testing.T) { foundPackages := internal.NewStringSet() - for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.NpmPkg) { + for actualPkg := range sbom.Artifacts.Packages.Enumerate(pkg.NpmPkg) { for _, actualLocation := range actualPkg.Locations { if strings.Contains(actualLocation.RealPath, "node_modules") { t.Errorf("found packages from yarn.lock in node_modules: %s", actualLocation) diff --git a/test/integration/regression_apk_scanner_buffer_size_test.go b/test/integration/regression_apk_scanner_buffer_size_test.go index 19bf9f92f..9ffefe88c 100644 --- a/test/integration/regression_apk_scanner_buffer_size_test.go +++ b/test/integration/regression_apk_scanner_buffer_size_test.go @@ -13,7 +13,7 @@ func TestRegression212ApkBufferSize(t *testing.T) { expectedPkgs := 58 actualPkgs := 0 - for range sbom.Artifacts.PackageCatalog.Enumerate(pkg.ApkPkg) { + for range sbom.Artifacts.Packages.Enumerate(pkg.ApkPkg) { actualPkgs += 1 } diff --git a/test/integration/regression_go_bin_scanner_arch_test.go b/test/integration/regression_go_bin_scanner_arch_test.go index f5e21727e..0cb4f9618 100644 --- a/test/integration/regression_go_bin_scanner_arch_test.go +++ b/test/integration/regression_go_bin_scanner_arch_test.go @@ -19,7 +19,7 @@ func TestRegressionGoArchDiscovery(t *testing.T) { var actualELF, actualWIN, actualMACOS int - for p := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.GoModulePkg) { + for p := range sbom.Artifacts.Packages.Enumerate(pkg.GoModulePkg) { for _, l := range p.Locations { switch { case strings.Contains(l.RealPath, "elf"): diff --git a/test/integration/utils_test.go b/test/integration/utils_test.go index 94634ff54..c6e9f4264 100644 --- a/test/integration/utils_test.go +++ b/test/integration/utils_test.go @@ -33,7 +33,7 @@ func catalogFixtureImage(t *testing.T, fixtureImageName string) (sbom.SBOM, *sou return sbom.SBOM{ Artifacts: sbom.Artifacts{ - PackageCatalog: pkgCatalog, + Packages: pkgCatalog, LinuxDistribution: release, }, Relationships: relationships, @@ -69,7 +69,7 @@ func catalogDirectory(t *testing.T, dir string) (sbom.SBOM, *source.Source) { return sbom.SBOM{ Artifacts: sbom.Artifacts{ - PackageCatalog: pkgCatalog, + Packages: pkgCatalog, LinuxDistribution: release, }, Relationships: relationships, diff --git a/ui/handler.go b/ui/handler.go index bd11733eb..bf9f5c449 100644 --- a/ui/handler.go +++ b/ui/handler.go @@ -15,7 +15,7 @@ import ( "github.com/wagoodman/jotframe/pkg/frame" ) -// Handler is an aggregated event handler for the set of supported events (PullDockerImage, ReadImage, FetchImage, PackageCatalogerStarted) +// Handler is an aggregated event handler for the set of supported events (PullDockerImage, ReadImage, FetchImage, CatalogingStarted) type Handler struct { } @@ -27,7 +27,7 @@ func NewHandler() *Handler { // RespondsTo indicates if the handler is capable of handling the given event. func (r *Handler) RespondsTo(event partybus.Event) bool { switch event.Type { - case stereoscopeEvent.PullDockerImage, stereoscopeEvent.ReadImage, stereoscopeEvent.FetchImage, syftEvent.PackageCatalogerStarted, syftEvent.SecretsCatalogerStarted, syftEvent.FileDigestsCatalogerStarted, syftEvent.FileMetadataCatalogerStarted, syftEvent.FileIndexingStarted, syftEvent.ImportStarted: + case stereoscopeEvent.PullDockerImage, stereoscopeEvent.ReadImage, stereoscopeEvent.FetchImage, syftEvent.CatalogingStarted, syftEvent.SecretsCatalogerStarted, syftEvent.FileDigestsCatalogerStarted, syftEvent.FileMetadataCatalogerStarted, syftEvent.FileIndexingStarted, syftEvent.ImportStarted: return true default: return false @@ -46,7 +46,7 @@ func (r *Handler) Handle(ctx context.Context, fr *frame.Frame, event partybus.Ev case stereoscopeEvent.FetchImage: return FetchImageHandler(ctx, fr, event, wg) - case syftEvent.PackageCatalogerStarted: + case syftEvent.CatalogingStarted: return PackageCatalogerStartedHandler(ctx, fr, event, wg) case syftEvent.SecretsCatalogerStarted: