add configurable task collection backend

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2022-06-06 22:02:39 -04:00
parent 078dbedfb6
commit a5dd485672
No known key found for this signature in database
GPG Key ID: 5CB45AE22BAB7EA7
60 changed files with 1073 additions and 644 deletions

View File

@ -193,6 +193,12 @@ func attestationExecWorker(si source.Input, format sbom.Format, predicateType st
go func() {
defer close(errs)
catalogingConfig, err := appConfig.ToCatalogingConfig()
if err != nil {
errs <- err
return
}
src, cleanup, err := source.NewFromRegistry(si, appConfig.Registry.ToOptions(), appConfig.Exclusions)
if cleanup != nil {
defer cleanup()
@ -202,7 +208,7 @@ func attestationExecWorker(si source.Input, format sbom.Format, predicateType st
return
}
s, err := generateSBOM(src)
s, err := generateSBOM(src, catalogingConfig)
if err != nil {
errs <- err
return

View File

@ -94,7 +94,7 @@ func init() {
func setPackageFlags(flags *pflag.FlagSet) {
// Formatting & Input options //////////////////////////////////////////////
flags.StringP(
"scope", "s", syft.DefaultCatalogingConfig().Scope.String(),
"scope", "s", syft.DefaultCatalogingConfig().DefaultScope.String(),
fmt.Sprintf("selection of layers to catalog, options=%v", source.AllScopes))
flags.StringArrayP(
@ -165,7 +165,7 @@ func bindExclusivePackagesConfigOptions(flags *pflag.FlagSet) error {
// note: output is not included since this configuration option is shared between multiple subcommands
if err := viper.BindPFlag("package.cataloger.scope", flags.Lookup("scope")); err != nil {
if err := viper.BindPFlag("scope", flags.Lookup("scope")); err != nil {
return err
}
@ -257,15 +257,9 @@ func isVerbose() (result bool) {
return appConfig.CliOptions.Verbosity > 0 || isPipedInput
}
func generateSBOM(src *source.Source) (*sbom.SBOM, error) {
catalogingConfig, err := appConfig.ToCatalogingConfig()
if err != nil {
return nil, err
}
func generateSBOM(src *source.Source, config *syft.CatalogingConfig) (*sbom.SBOM, error) {
return syft.Catalog(src,
syft.WithConfig(*catalogingConfig),
syft.WithDefaultPackageCatalogers(appConfig.Package.ToConfig()),
syft.WithConfig(*config),
)
}
@ -274,6 +268,12 @@ func packagesExecWorker(si source.Input, writer sbom.Writer) <-chan error {
go func() {
defer close(errs)
catalogingConfig, err := appConfig.ToCatalogingConfig()
if err != nil {
errs <- err
return
}
src, cleanup, err := source.New(si, appConfig.Registry.ToOptions(), appConfig.Exclusions)
if cleanup != nil {
defer cleanup()
@ -283,7 +283,7 @@ func packagesExecWorker(si source.Input, writer sbom.Writer) <-chan error {
return
}
s, err := generateSBOM(src)
s, err := generateSBOM(src, catalogingConfig)
if err != nil {
errs <- err
return

View File

@ -104,10 +104,17 @@ func powerUserExecWorker(userInput string, writer sbom.Writer) <-chan error {
go func() {
defer close(errs)
appConfig.Secrets.Cataloger.Enabled = true
appConfig.FileMetadata.Cataloger.Enabled = true
appConfig.FileContents.Cataloger.Enabled = true
appConfig.FileClassification.Cataloger.Enabled = true
// TODO: replace
//appConfig.Secrets.Cataloger.Enabled = true
//appConfig.FileMetadata.Cataloger.Enabled = true
//appConfig.FileContents.Cataloger.Enabled = true
//appConfig.FileClassification.Cataloger.Enabled = true
catalogingConfig, err := appConfig.ToCatalogingConfig()
if err != nil {
errs <- err
return
}
si, err := source.ParseInput(userInput, appConfig.Platform, true)
if err != nil {
@ -124,7 +131,7 @@ func powerUserExecWorker(userInput string, writer sbom.Writer) <-chan error {
defer cleanup()
}
s, err := generateSBOM(src)
s, err := generateSBOM(src, catalogingConfig)
if err != nil {
errs <- err
return

View File

@ -54,7 +54,7 @@ func (m *mockPackageSBOMImportAPI) ImportImagePackages(ctx context.Context, sess
func sbomFixture() sbom.SBOM {
return sbom.SBOM{
Artifacts: sbom.Artifacts{
PackageCatalog: pkg.NewCollection(pkg.Package{
Packages: pkg.NewCollection(pkg.Package{
Name: "name",
Version: "version",
FoundBy: "foundBy",

View File

@ -4,6 +4,7 @@ import (
"errors"
"fmt"
"github.com/anchore/syft/syft/cataloger/files/fileclassifier"
"github.com/anchore/syft/syft/source"
"path"
"reflect"
"strings"
@ -21,6 +22,7 @@ import (
var ErrApplicationConfigNotFound = fmt.Errorf("application config not found")
// TODO: set all catalogers when this is set
var catalogerEnabledDefault = false
type defaultValueLoader interface {
@ -40,11 +42,11 @@ type Application struct {
CheckForAppUpdate bool `yaml:"check-for-app-update" json:"check-for-app-update" mapstructure:"check-for-app-update"` // whether to check for an application update on start up or not
Anchore anchore `yaml:"anchore" json:"anchore" mapstructure:"anchore"` // options for interacting with Anchore Engine/Enterprise
CliOptions CliOnlyOptions `yaml:"-" json:"-"` // all options only available through the CLI (not via env vars or config)
Scope string `yaml:"scope" json:"scope" mapstructure:"scope"`
Dev development `yaml:"dev" json:"dev" mapstructure:"dev"`
Log logging `yaml:"log" json:"log" mapstructure:"log"` // all logging-related options
Package pkg `yaml:"package" json:"package" mapstructure:"package"`
FileMetadata FileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"`
FileClassification fileClassification `yaml:"file-classification" json:"file-classification" mapstructure:"file-classification"`
FileMetadata fileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"`
FileContents fileContents `yaml:"file-contents" json:"file-contents" mapstructure:"file-contents"`
Secrets secretsCfg `yaml:"secrets" json:"secrets" mapstructure:"secrets"`
Registry registry `yaml:"registry" json:"registry" mapstructure:"registry"`
@ -59,26 +61,33 @@ func (cfg Application) ToCatalogingConfig() (*syft.CatalogingConfig, error) {
return nil, fmt.Errorf("unable to parse config item 'file-metadata.digests': %w", err)
}
scopeOption := source.ParseScope(cfg.Scope)
if scopeOption == source.UnknownScope {
return nil, fmt.Errorf("bad scope value %q", cfg.Scope)
}
secretsConfig, err := cfg.Secrets.ToConfig()
if err != nil {
return nil, err
}
secretsScopeOption := source.ParseScope(cfg.Secrets.Scope)
if secretsScopeOption == source.UnknownScope {
return nil, fmt.Errorf("bad scope value %q", cfg.Secrets.Scope)
}
return &syft.CatalogingConfig{
// note: package catalogers cannot be determined until runtime
ToolName: internal.ApplicationName,
ToolVersion: version.FromBuild().Version,
ToolConfiguration: cfg,
Scope: cfg.Package.Cataloger.ScopeOpt,
DefaultScope: scopeOption,
ProcessTasksInSerial: false,
CaptureFileMetadata: cfg.FileMetadata.Cataloger.Enabled,
DigestHashes: digests,
CaptureSecrets: cfg.Secrets.Cataloger.Enabled,
SecretsConfig: *secretsConfig,
SecretsScope: cfg.Secrets.Cataloger.ScopeOpt,
ClassifyFiles: cfg.FileClassification.Cataloger.Enabled,
SecretsSearch: *secretsConfig,
SecretsScope: secretsScopeOption,
FileClassifiers: fileclassifier.DefaultClassifiers(),
ContentsConfig: cfg.FileContents.ToConfig(),
ContentsSearch: cfg.FileContents.ToConfig(),
}, nil
}

View File

@ -1,29 +0,0 @@
package config
import (
"fmt"
"github.com/spf13/viper"
"github.com/anchore/syft/syft/source"
)
type catalogerOptions struct {
Enabled bool `yaml:"enabled" json:"enabled" mapstructure:"enabled"`
Scope string `yaml:"scope" json:"scope" mapstructure:"scope"`
ScopeOpt source.Scope `yaml:"-" json:"-"`
}
func (cfg catalogerOptions) loadDefaultValues(v *viper.Viper) {
v.SetDefault("package.cataloger.enabled", true)
}
func (cfg *catalogerOptions) parseConfigValues() error {
scopeOption := source.ParseScope(cfg.Scope)
if scopeOption == source.UnknownScope {
return fmt.Errorf("bad scope value %q", cfg.Scope)
}
cfg.ScopeOpt = scopeOption
return nil
}

View File

@ -1,19 +0,0 @@
package config
import (
"github.com/anchore/syft/syft/source"
"github.com/spf13/viper"
)
type fileClassification struct {
Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"`
}
func (cfg fileClassification) loadDefaultValues(v *viper.Viper) {
v.SetDefault("file-classification.cataloger.enabled", catalogerEnabledDefault)
v.SetDefault("file-classification.cataloger.scope", source.SquashedScope)
}
func (cfg *fileClassification) parseConfigValues() error {
return cfg.Cataloger.parseConfigValues()
}

View File

@ -3,29 +3,21 @@ package config
import (
"github.com/anchore/syft/syft/cataloger/files/filecontents"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/source"
"github.com/spf13/viper"
)
type fileContents struct {
Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"`
SkipFilesAboveSize int64 `yaml:"skip-files-above-size" json:"skip-files-above-size" mapstructure:"skip-files-above-size"`
Globs []string `yaml:"globs" json:"globs" mapstructure:"globs"`
}
func (cfg fileContents) loadDefaultValues(v *viper.Viper) {
v.SetDefault("file-contents.cataloger.enabled", catalogerEnabledDefault)
v.SetDefault("file-contents.cataloger.scope", source.SquashedScope)
v.SetDefault("file-contents.skip-files-above-size", 1*file.MB)
v.SetDefault("file-contents.globs", []string{})
}
func (cfg *fileContents) parseConfigValues() error {
return cfg.Cataloger.parseConfigValues()
}
func (cfg fileContents) ToConfig() filecontents.CatalogerConfig {
return filecontents.CatalogerConfig{
func (cfg fileContents) ToConfig() filecontents.Config {
return filecontents.Config{
Globs: cfg.Globs,
SkipFilesAboveSizeInBytes: cfg.SkipFilesAboveSize,
}

View File

@ -1,21 +1,17 @@
package config
import (
"github.com/anchore/syft/syft/source"
"github.com/spf13/viper"
)
type FileMetadata struct {
Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"`
type fileMetadata struct {
Digests []string `yaml:"digests" json:"digests" mapstructure:"digests"`
}
func (cfg FileMetadata) loadDefaultValues(v *viper.Viper) {
v.SetDefault("file-metadata.cataloger.enabled", catalogerEnabledDefault)
v.SetDefault("file-metadata.cataloger.scope", source.SquashedScope)
func (cfg fileMetadata) loadDefaultValues(v *viper.Viper) {
v.SetDefault("file-metadata.digests", []string{"sha256"})
}
func (cfg *FileMetadata) parseConfigValues() error {
return cfg.Cataloger.parseConfigValues()
func (cfg *fileMetadata) parseConfigValues() error {
return nil
}

View File

@ -6,22 +6,16 @@ import (
)
type pkg struct {
Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"`
SearchUnindexedArchives bool `yaml:"search-unindexed-archives" json:"search-unindexed-archives" mapstructure:"search-unindexed-archives"`
SearchIndexedArchives bool `yaml:"search-indexed-archives" json:"search-indexed-archives" mapstructure:"search-indexed-archives"`
}
func (cfg pkg) loadDefaultValues(v *viper.Viper) {
cfg.Cataloger.loadDefaultValues(v)
c := packages.DefaultSearchConfig()
v.SetDefault("package.search-unindexed-archives", c.IncludeUnindexedArchives)
v.SetDefault("package.search-indexed-archives", c.IncludeIndexedArchives)
}
func (cfg *pkg) parseConfigValues() error {
return cfg.Cataloger.parseConfigValues()
}
func (cfg pkg) ToConfig() packages.SearchConfig {
return packages.SearchConfig{
IncludeIndexedArchives: cfg.SearchIndexedArchives,

View File

@ -10,32 +10,27 @@ import (
)
type secretsCfg struct {
Cataloger catalogerOptions `yaml:"cataloger" json:"cataloger" mapstructure:"cataloger"`
AdditionalPatterns map[string]string `yaml:"additional-patterns" json:"additional-patterns" mapstructure:"additional-patterns"`
ExcludePatternNames []string `yaml:"exclude-pattern-names" json:"exclude-pattern-names" mapstructure:"exclude-pattern-names"`
RevealValues bool `yaml:"reveal-values" json:"reveal-values" mapstructure:"reveal-values"`
SkipFilesAboveSize int64 `yaml:"skip-files-above-size" json:"skip-files-above-size" mapstructure:"skip-files-above-size"`
Scope string `yaml:"scope" json:"scope" mapstructure:"scope"`
}
func (cfg secretsCfg) loadDefaultValues(v *viper.Viper) {
v.SetDefault("secrets.cataloger.enabled", catalogerEnabledDefault)
v.SetDefault("secrets.cataloger.scope", source.AllLayersScope)
v.SetDefault("secrets.scope", source.AllLayersScope)
v.SetDefault("secrets.reveal-values", false)
v.SetDefault("secrets.skip-files-above-size", 1*file.MB)
v.SetDefault("secrets.additional-patterns", map[string]string{})
v.SetDefault("secrets.exclude-pattern-names", []string{})
}
func (cfg *secretsCfg) parseConfigValues() error {
return cfg.Cataloger.parseConfigValues()
}
func (cfg secretsCfg) ToConfig() (*secrets.CatalogerConfig, error) {
func (cfg secretsCfg) ToConfig() (*secrets.Config, error) {
patterns, err := file.GenerateSearchPatterns(secrets.DefaultSecretsPatterns, cfg.AdditionalPatterns, cfg.ExcludePatternNames)
if err != nil {
return nil, fmt.Errorf("unable to process secrets config patterns: %w", err)
}
return &secrets.CatalogerConfig{
return &secrets.Config{
Patterns: patterns,
RevealValues: cfg.RevealValues,
MaxFileSize: cfg.SkipFilesAboveSize,

View File

@ -49,7 +49,7 @@ func toSyftModel(bom *cyclonedx.BOM) (*sbom.SBOM, error) {
}
s := &sbom.SBOM{
Artifacts: sbom.Artifacts{
PackageCatalog: pkg.NewCollection(),
Packages: pkg.NewCollection(),
LinuxDistribution: linuxReleaseFromComponents(*bom.Components),
},
Source: meta,
@ -86,7 +86,7 @@ func collectPackages(component *cyclonedx.Component, s *sbom.SBOM, idMap map[str
idMap[component.BOMRef] = p
// TODO there must be a better way than needing to call this manually:
p.SetID()
s.Artifacts.PackageCatalog.Add(*p)
s.Artifacts.Packages.Add(*p)
}
if component.Components != nil {

View File

@ -212,7 +212,7 @@ func Test_decode(t *testing.T) {
assert.Equal(t, e.ver, sbom.Artifacts.LinuxDistribution.VersionID)
}
if e.pkg != "" {
for p := range sbom.Artifacts.PackageCatalog.Enumerate() {
for p := range sbom.Artifacts.Packages.Enumerate() {
if e.pkg != p.Name {
continue
}
@ -240,7 +240,7 @@ func Test_decode(t *testing.T) {
if e.relation != "" {
foundRelation := false
for _, r := range sbom.Relationships {
p := sbom.Artifacts.PackageCatalog.Package(r.To.ID())
p := sbom.Artifacts.Packages.Package(r.To.ID())
if e.relation == p.Name {
foundRelation = true
break

View File

@ -25,7 +25,7 @@ func ToFormatModel(s sbom.SBOM) *cyclonedx.BOM {
cdxBOM.SerialNumber = uuid.New().URN()
cdxBOM.Metadata = toBomDescriptor(internal.ApplicationName, versionInfo.Version, s.Source)
packages := s.Artifacts.PackageCatalog.Sorted()
packages := s.Artifacts.Packages.Sorted()
components := make([]cyclonedx.Component, len(packages))
for i, p := range packages {
components[i] = encodeComponent(p)

View File

@ -21,7 +21,7 @@ func ToSyftModel(doc *spdx.Document2_2) (*sbom.SBOM, error) {
s := &sbom.SBOM{
Artifacts: sbom.Artifacts{
PackageCatalog: pkg.NewCollection(),
Packages: pkg.NewCollection(),
FileMetadata: map[file.Coordinates]file.Metadata{},
FileDigests: map[file.Coordinates][]file.Digest{},
LinuxDistribution: findLinuxReleaseByPURL(doc),
@ -74,7 +74,7 @@ func collectSyftPackages(s *sbom.SBOM, spdxIDMap map[string]interface{}, doc *sp
for _, p := range doc.Packages {
syftPkg := toSyftPackage(p)
spdxIDMap[string(p.PackageSPDXIdentifier)] = syftPkg
s.Artifacts.PackageCatalog.Add(*syftPkg)
s.Artifacts.Packages.Add(*syftPkg)
}
}

View File

@ -94,7 +94,7 @@ func TestToSyftModel(t *testing.T) {
assert.NotNil(t, sbom)
pkgs := sbom.Artifacts.PackageCatalog.Sorted()
pkgs := sbom.Artifacts.Packages.Sorted()
assert.Len(t, pkgs, 2)

View File

@ -124,7 +124,7 @@ func ImageInput(t testing.TB, testImage string, options ...ImageOption) sbom.SBO
return sbom.SBOM{
Artifacts: sbom.Artifacts{
PackageCatalog: catalog,
Packages: catalog,
LinuxDistribution: &linux.Release{
PrettyName: "debian",
Name: "debian",
@ -152,7 +152,7 @@ func carriageRedactor(s []byte) []byte {
return []byte(msg)
}
func populateImageCatalog(catalog *pkg.Collection, img *image.Image) {
func populateImageCatalog(catalog pkg.Collection, img *image.Image) {
_, ref1, _ := img.SquashedTree().File("/somefile-1.txt", filetree.FollowBasenameLinks)
_, ref2, _ := img.SquashedTree().File("/somefile-2.txt", filetree.FollowBasenameLinks)
@ -205,7 +205,7 @@ func DirectoryInput(t testing.TB) sbom.SBOM {
return sbom.SBOM{
Artifacts: sbom.Artifacts{
PackageCatalog: catalog,
Packages: catalog,
LinuxDistribution: &linux.Release{
PrettyName: "debian",
Name: "debian",
@ -228,7 +228,7 @@ func DirectoryInput(t testing.TB) sbom.SBOM {
}
}
func newDirectoryCatalog() *pkg.Collection {
func newDirectoryCatalog() pkg.Collection {
catalog := pkg.NewCollection()
// populate catalog with test data

View File

@ -57,7 +57,7 @@ func Test_decodeJSON(t *testing.T) {
split = strings.SplitN(pkg, ":", 2)
name = split[0]
version = split[1]
for p := range bom.Artifacts.PackageCatalog.Enumerate() {
for p := range bom.Artifacts.Packages.Enumerate() {
if p.Name == name {
assert.Equal(t, version, p.Version)
continue pkgs

View File

@ -57,7 +57,7 @@ func Test_decodeXML(t *testing.T) {
split = strings.SplitN(pkg, ":", 2)
name = split[0]
version = split[1]
for p := range bom.Artifacts.PackageCatalog.Enumerate() {
for p := range bom.Artifacts.Packages.Enumerate() {
if p.Name == name {
assert.Equal(t, version, p.Version)
continue pkgs

View File

@ -71,11 +71,11 @@ func TestSPDXJSONDecoder(t *testing.T) {
}
if test.packages != nil {
assert.Equal(t, sbom.Artifacts.PackageCatalog.PackageCount(), len(test.packages))
assert.Equal(t, sbom.Artifacts.Packages.Size(), len(test.packages))
packages:
for _, pkgName := range test.packages {
for _, p := range sbom.Artifacts.PackageCatalog.Sorted() {
for _, p := range sbom.Artifacts.Packages.Sorted() {
if p.Name == pkgName {
continue packages
}

View File

@ -42,13 +42,13 @@ func toFormatModel(s sbom.SBOM) (*model.Document, error) {
},
DataLicense: "CC0-1.0",
DocumentNamespace: namespace,
Packages: toPackages(s.Artifacts.PackageCatalog, s.Relationships),
Packages: toPackages(s.Artifacts.Packages, s.Relationships),
Files: toFiles(s),
Relationships: toRelationships(s.Relationships),
}, nil
}
func toPackages(catalog *pkg.Collection, relationships []artifact.Relationship) []model.Package {
func toPackages(catalog pkg.Collection, relationships []artifact.Relationship) []model.Package {
packages := make([]model.Package, 0)
for _, p := range catalog.Sorted() {

View File

@ -85,13 +85,13 @@ func toFormatModel(s sbom.SBOM) (*spdx.Document2_2, error) {
// Cardinality: optional, one
DocumentComment: "",
},
Packages: toFormatPackages(s.Artifacts.PackageCatalog),
Packages: toFormatPackages(s.Artifacts.Packages),
}, nil
}
// packages populates all Package Information from the package Collection (see https://spdx.github.io/spdx-spec/3-package-information/)
// nolint: funlen
func toFormatPackages(catalog *pkg.Collection) map[spdx.ElementID]*spdx.Package2_2 {
func toFormatPackages(catalog pkg.Collection) map[spdx.ElementID]*spdx.Package2_2 {
results := make(map[spdx.ElementID]*spdx.Package2_2)
for _, p := range catalog.Sorted() {

View File

@ -28,8 +28,8 @@ func TestEncodeDecodeCycle(t *testing.T) {
t.Errorf("metadata difference: %+v", d)
}
actualPackages := actualSBOM.Artifacts.PackageCatalog.Sorted()
for idx, p := range originalSBOM.Artifacts.PackageCatalog.Sorted() {
actualPackages := actualSBOM.Artifacts.Packages.Sorted()
for idx, p := range originalSBOM.Artifacts.Packages.Sorted() {
if !assert.Equal(t, p.Name, actualPackages[idx].Name) {
t.Errorf("different package at idx=%d: %s vs %s", idx, p.Name, actualPackages[idx].Name)
continue

View File

@ -95,7 +95,7 @@ func TestEncodeFullJSONDocument(t *testing.T) {
s := sbom.SBOM{
Artifacts: sbom.Artifacts{
PackageCatalog: catalog,
Packages: catalog,
FileMetadata: map[file.Coordinates]file.Metadata{
file.NewLocation("/a/place").Coordinates: {
Mode: 0775,

View File

@ -31,7 +31,7 @@ func ToFormatModel(s sbom.SBOM) model.Document {
}
return model.Document{
Artifacts: toPackageModels(s.Artifacts.PackageCatalog),
Artifacts: toPackageModels(s.Artifacts.Packages),
ArtifactRelationships: toRelationshipModel(s.Relationships),
Files: toFile(s),
Secrets: toSecrets(s.Artifacts.Secrets),
@ -153,7 +153,7 @@ func toFileMetadataEntry(coordinates file.Coordinates, metadata *file.Metadata)
}
}
func toPackageModels(catalog *pkg.Collection) []model.Package {
func toPackageModels(catalog pkg.Collection) []model.Package {
artifacts := make([]model.Package, 0)
if catalog == nil {
return artifacts

View File

@ -18,7 +18,7 @@ func toSyftModel(doc model.Document) (*sbom.SBOM, error) {
return &sbom.SBOM{
Artifacts: sbom.Artifacts{
PackageCatalog: catalog,
Packages: catalog,
LinuxDistribution: toSyftLinuxRelease(doc.Distro),
},
Source: *toSyftSourceData(doc.Source),
@ -48,7 +48,7 @@ func toSyftLinuxRelease(d model.LinuxRelease) *linux.Release {
}
}
func toSyftRelationships(doc *model.Document, catalog *pkg.Collection, relationships []model.Relationship) []artifact.Relationship {
func toSyftRelationships(doc *model.Document, catalog pkg.Collection, relationships []model.Relationship) []artifact.Relationship {
idMap := make(map[string]interface{})
for _, p := range catalog.Sorted() {
@ -130,7 +130,7 @@ func toSyftSourceData(s model.Source) *source.Metadata {
return nil
}
func toSyftCatalog(pkgs []model.Package) *pkg.Collection {
func toSyftCatalog(pkgs []model.Package) pkg.Collection {
catalog := pkg.NewCollection()
for _, p := range pkgs {
catalog.Add(toSyftPackage(p))

View File

@ -15,7 +15,7 @@ func encoder(output io.Writer, s sbom.SBOM) error {
var rows [][]string
columns := []string{"Name", "Version", "Type"}
for _, p := range s.Artifacts.PackageCatalog.Sorted() {
for _, p := range s.Artifacts.Packages.Sorted() {
row := []string{
p.Name,
p.Version,

View File

@ -35,7 +35,7 @@ func encoder(output io.Writer, s sbom.SBOM) error {
// populate artifacts...
rows := 0
for _, p := range s.Artifacts.PackageCatalog.Sorted() {
for _, p := range s.Artifacts.Packages.Sorted() {
fmt.Fprintf(w, "[%s]\n", p.Name)
fmt.Fprintln(w, " Version:\t", p.Version)
fmt.Fprintln(w, " Type:\t", string(p.Type))

View File

@ -2,13 +2,28 @@ package syft
import (
"fmt"
"github.com/anchore/syft/internal/bus"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/event"
"github.com/anchore/syft/syft/event/monitor"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
"github.com/anchore/syft/syft/source"
"github.com/hashicorp/go-multierror"
"github.com/wagoodman/go-partybus"
"github.com/wagoodman/go-progress"
)
type monitorableCollection struct {
pkg.Collection
monitor *progress.Manual
}
func (m *monitorableCollection) Add(p pkg.Package) {
m.monitor.N++
m.Collection.Add(p)
}
func Catalog(src *source.Source, options ...CatalogingOption) (*sbom.SBOM, error) {
var config = DefaultCatalogingConfig()
for _, optFn := range options {
@ -17,28 +32,60 @@ func Catalog(src *source.Source, options ...CatalogingOption) (*sbom.SBOM, error
}
}
var tasks []task
generators := []taskGenerator{
generatePackagesCatalogingTask,
generateFileMetadataCatalogingTask,
generateFileDigestsCatalogingTask,
generateSecretsCatalogingTask,
generateFileClassifierTask,
generateContentsCatalogingTask,
if config.availableTasks == nil {
config.availableTasks = newTaskCollection()
}
for _, generator := range generators {
t, err := generator(config)
tc := config.availableTasks
if err := tc.addAllCatalogers(config); err != nil {
return nil, fmt.Errorf("unable to register catalogers: %w", err)
}
var catalogingTasks []task
if len(config.EnabledCatalogers) == 0 {
switch src.Metadata.Scheme {
case source.ImageType:
catalogingTasks = tc.tasks(tc.withLabels(packageTaskLabel, installedTaskLabel)...)
case source.FileType:
catalogingTasks = tc.tasks(tc.all()...)
case source.DirectoryType:
// TODO: it looks like gemspec was left out on main, is this intentional? if so it's not accounted for here...
catalogingTasks = tc.tasks(tc.withLabels(packageTaskLabel)...)
}
}
if len(catalogingTasks) == 0 {
return nil, fmt.Errorf("no cataloging tasks configured to run")
}
// special case: we need to identify the linux distro for downstream processing
identifyLinuxDistroTask, err := newIdentifyDistroTask(config)
if err != nil {
return nil, fmt.Errorf("unable to create cataloging task: %w", err)
return nil, fmt.Errorf("unable to create linux distro identification task: %+v", err)
}
if t != nil {
tasks = append(tasks, t)
synthesizePackageRelationshipsTask, err := newSynthesizePackageRelationshipsTasks(config)
if err != nil {
return nil, fmt.Errorf("unable to create task to synthesize package relationships: %+v", err)
}
taskGroups := [][]task{
{
identifyLinuxDistroTask,
},
catalogingTasks,
{
synthesizePackageRelationshipsTask,
},
}
files, pkgs := newCatalogerMonitor()
defer func() {
files.SetCompleted() // TODO: files monitor is unused... should we remove?
pkgs.SetCompleted()
}()
s := sbom.SBOM{
Source: src.Metadata,
Descriptor: sbom.Descriptor{
@ -46,12 +93,39 @@ func Catalog(src *source.Source, options ...CatalogingOption) (*sbom.SBOM, error
Version: config.ToolVersion,
Configuration: config.ToolConfiguration,
},
Artifacts: sbom.Artifacts{
Packages: &monitorableCollection{
Collection: pkg.NewCollection(),
monitor: pkgs,
},
},
}
return &s, runTasks(&s, src, tasks, config.ProcessTasksInSerial)
for _, tasks := range taskGroups {
if err := runTasks(&s, src, config.ProcessTasksInSerial, tasks...); err != nil {
return &s, err
}
}
return &s, nil
}
func runTasks(s *sbom.SBOM, src *source.Source, tasks []task, serial bool) error {
// newCatalogerMonitor creates a new CatalogingMonitor object and publishes the object on the bus as a CatalogingStarted event.
func newCatalogerMonitor() (*progress.Manual, *progress.Manual) {
filesProcessed := progress.Manual{}
packagesDiscovered := progress.Manual{}
bus.Publish(partybus.Event{
Type: event.CatalogingStarted,
Value: monitor.CatalogingMonitor{
FilesProcessed: progress.Monitorable(&filesProcessed),
PackagesDiscovered: progress.Monitorable(&packagesDiscovered),
},
})
return &filesProcessed, &packagesDiscovered
}
func runTasks(s *sbom.SBOM, src *source.Source, serial bool, tasks ...task) error {
var relationships []<-chan artifact.Relationship
var errs = make(chan error)
for _, t := range tasks {
@ -92,7 +166,7 @@ func mergeErrors(errs <-chan error) (allErrs error) {
func runTask(t task, a *sbom.Artifacts, src *source.Source, r chan<- artifact.Relationship, errs chan<- error) {
defer close(r)
relationships, err := t(a, src)
relationships, err := t.Run(a, src)
if err != nil {
errs <- err
return

View File

@ -11,23 +11,23 @@ import (
"github.com/anchore/syft/syft/file"
)
type CatalogerConfig struct {
type Config struct {
Globs []string
SkipFilesAboveSizeInBytes int64
}
type Cataloger struct {
config CatalogerConfig
config Config
}
func DefaultCatalogerConfig() CatalogerConfig {
return CatalogerConfig{
func DefaultConfig() Config {
return Config{
Globs: nil,
SkipFilesAboveSizeInBytes: 1 * file.MB,
}
}
func NewCataloger(config CatalogerConfig) (*Cataloger, error) {
func NewCataloger(config Config) (*Cataloger, error) {
return &Cataloger{
config: config,
}, nil

View File

@ -66,7 +66,7 @@ func TestContentsCataloger(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
c, err := NewCataloger(CatalogerConfig{
c, err := NewCataloger(Config{
Globs: test.globs,
SkipFilesAboveSizeInBytes: test.maxSize,
})

View File

@ -27,30 +27,30 @@ var DefaultSecretsPatterns = map[string]string{
"generic-api-key": `(?i)api(-|_)?key["'=:\s]*?(?P<value>[A-Z0-9]{20,60})["']?(\s|$)`,
}
type CatalogerConfig struct {
type Config struct {
Patterns map[string]*regexp.Regexp
RevealValues bool
MaxFileSize int64
}
type Cataloger struct {
config CatalogerConfig
config Config
}
func DefaultCatalogerConfig() CatalogerConfig {
func DefaultConfig() Config {
patterns, err := file.GenerateSearchPatterns(DefaultSecretsPatterns, nil, nil)
if err != nil {
patterns = make(map[string]*regexp.Regexp)
log.Errorf("unable to create default secrets config: %w", err)
}
return CatalogerConfig{
return Config{
Patterns: patterns,
RevealValues: false,
MaxFileSize: 1 * file.MB,
}
}
func NewCataloger(config CatalogerConfig) (*Cataloger, error) {
func NewCataloger(config Config) (*Cataloger, error) {
return &Cataloger{
config: config,
}, nil

View File

@ -174,7 +174,7 @@ func TestSecretsCataloger(t *testing.T) {
regexObjs[name] = obj
}
c, err := NewCataloger(CatalogerConfig{
c, err := NewCataloger(Config{
Patterns: regexObjs,
RevealValues: test.reveal,
MaxFileSize: test.maxSize,
@ -420,7 +420,7 @@ j4f668YfhUbKdRF6S6734856
for _, test := range tests {
t.Run(test.fixture, func(t *testing.T) {
c, err := NewCataloger(CatalogerConfig{
c, err := NewCataloger(Config{
Patterns: regexObjs,
RevealValues: true,
MaxFileSize: 10 * file.MB,

40
syft/cataloger/id.go Normal file
View File

@ -0,0 +1,40 @@
package cataloger
const (
ApkDBID ID = "os-apkdb"
DpkgID ID = "os-dpkg"
RpmDBID ID = "os-rpmdb"
RubyGemspecID ID = "ruby-gem-spec"
RubyGemfileLockID ID = "ruby-gem-file-lock"
PythonPackageID ID = "python-package"
PythonRequirementsID ID = "python-requirements"
PythonPoetryID ID = "python-poetry"
PythonSetupID ID = "python-setup"
PythonPipFileID ID = "python-pipfile"
JavascriptPackageJSONID ID = "javascript-package-json"
JavascriptPackageLockID ID = "javascript-package-lock"
JavaScriptYarnLockID ID = "javascript-yarn-lock"
JavaArchiveID ID = "java-archive"
GoModID ID = "go-mod"
GoBinaryID ID = "go-binary"
RustCargoLockID ID = "rust-cargo-lock"
PHPInstalledJSONID ID = "php-installed-json"
PHPComposerLockID ID = "php-composer-lock"
FileMetadataID ID = "file-metadata"
FileDigestsID ID = "file-digest"
SecretsID ID = "secrets"
FileClassifierID ID = "file-classifier"
FileContentsID ID = "file-content"
)
type ID string
type IDs []ID
func (c IDs) Len() int { return len(c) }
func (c IDs) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
func (c IDs) Less(i, j int) bool {
return c[i] < c[j]
}

View File

@ -1,126 +0,0 @@
package packages
import (
"fmt"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/internal/bus"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/event"
"github.com/anchore/syft/syft/event/monitor"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/linux"
"github.com/anchore/syft/syft/speculate/cpes"
"github.com/hashicorp/go-multierror"
"github.com/wagoodman/go-partybus"
"github.com/wagoodman/go-progress"
)
// Catalog a given source (container image or filesystem) with the given catalogers, returning all discovered packages.
// In order to efficiently retrieve contents from an underlying container image the content fetch requests are
// done in bulk. Specifically, all files of interest are collected from each cataloger and accumulated into a single
// request.
func Catalog(resolver file.Resolver, release *linux.Release, catalogers ...pkg.Cataloger) (*pkg.Collection, []artifact.Relationship, error) {
catalog := pkg.NewCollection()
var allRelationships []artifact.Relationship
filesProcessed, packagesDiscovered := newPackageCatalogerMonitor()
// perform analysis, accumulating errors for each failed analysis
var errs error
for _, c := range catalogers {
// find packages from the underlying raw data
log.Debugf("cataloging with %q", c.Name())
packages, relationships, err := c.Catalog(resolver)
if err != nil {
errs = multierror.Append(errs, err)
continue
}
catalogedPackages := len(packages)
log.Debugf("discovered %d packages", catalogedPackages)
packagesDiscovered.N += int64(catalogedPackages)
for _, p := range packages {
// generate CPEs (note: this is excluded from package ID, so is safe to mutate)
p.CPEs = cpes.Generate(p)
// generate PURL (note: this is excluded from package ID, so is safe to mutate)
p.PURL = pkg.URL(p, release)
// create file-to-package relationships for files owned by the package
owningRelationships, err := packageFileOwnershipRelationships(p, resolver)
if err != nil {
log.Warnf("unable to create any package-file relationships for package name=%q: %w", p.Name, err)
} else {
allRelationships = append(allRelationships, owningRelationships...)
}
// add to catalog
catalog.Add(p)
}
allRelationships = append(allRelationships, relationships...)
}
allRelationships = append(allRelationships, pkg.NewRelationships(catalog)...)
if errs != nil {
return nil, nil, errs
}
filesProcessed.SetCompleted()
packagesDiscovered.SetCompleted()
return catalog, allRelationships, nil
}
func packageFileOwnershipRelationships(p pkg.Package, resolver file.PathResolver) ([]artifact.Relationship, error) {
fileOwner, ok := p.Metadata.(pkg.FileOwner)
if !ok {
return nil, nil
}
var relationships []artifact.Relationship
for _, path := range fileOwner.OwnedFiles() {
locations, err := resolver.FilesByPath(path)
if err != nil {
return nil, fmt.Errorf("unable to find path for path=%q: %w", path, err)
}
if len(locations) == 0 {
// ideally we want to warn users about missing files from a package, however, it is very common for
// container image authors to delete files that are not needed in order to keep image sizes small. Adding
// a warning here would be needlessly noisy (even for popular base images).
continue
}
for _, l := range locations {
relationships = append(relationships, artifact.Relationship{
From: p,
To: l.Coordinates,
Type: artifact.ContainsRelationship,
})
}
}
return relationships, nil
}
// newPackageCatalogerMonitor creates a new PackageCatalogerMonitor object and publishes the object on the bus as a PackageCatalogerStarted event.
func newPackageCatalogerMonitor() (*progress.Manual, *progress.Manual) {
filesProcessed := progress.Manual{}
packagesDiscovered := progress.Manual{}
bus.Publish(partybus.Event{
Type: event.PackageCatalogerStarted,
Value: monitor.PackageCatalogerMonitor{
FilesProcessed: progress.Monitorable(&filesProcessed),
PackagesDiscovered: progress.Monitorable(&packagesDiscovered),
},
})
return &filesProcessed, &packagesDiscovered
}

View File

@ -1,82 +0,0 @@
package packages
import (
"github.com/anchore/syft/syft/cataloger/packages/apkdb"
"github.com/anchore/syft/syft/cataloger/packages/deb"
"github.com/anchore/syft/syft/cataloger/packages/golang"
"github.com/anchore/syft/syft/cataloger/packages/java"
"github.com/anchore/syft/syft/cataloger/packages/javascript"
"github.com/anchore/syft/syft/cataloger/packages/php"
"github.com/anchore/syft/syft/cataloger/packages/python"
"github.com/anchore/syft/syft/cataloger/packages/rpmdb"
"github.com/anchore/syft/syft/cataloger/packages/ruby"
"github.com/anchore/syft/syft/cataloger/packages/rust"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
)
// TODO: add tag-based API to select appropriate package catalogers for different scenarios
// AllCatalogers returns all implemented package catalogers
func AllCatalogers(cfg SearchConfig) []pkg.Cataloger {
return []pkg.Cataloger{
ruby.NewGemFileLockCataloger(),
ruby.NewGemSpecCataloger(),
python.NewPythonIndexCataloger(),
python.NewPythonPackageCataloger(),
javascript.NewJavascriptLockCataloger(),
javascript.NewJavascriptPackageCataloger(),
deb.NewDpkgdbCataloger(),
rpmdb.NewRpmdbCataloger(),
java.NewJavaCataloger(cfg.Java()),
apkdb.NewApkdbCataloger(),
golang.NewGoModuleBinaryCataloger(),
golang.NewGoModFileCataloger(),
rust.NewCargoLockCataloger(),
}
}
// InstalledCatalogers returns a slice of locally implemented package catalogers that are fit for detecting installations of packages.
func InstalledCatalogers(cfg SearchConfig) []pkg.Cataloger {
return []pkg.Cataloger{
ruby.NewGemSpecCataloger(),
python.NewPythonPackageCataloger(),
php.NewPHPComposerInstalledCataloger(),
javascript.NewJavascriptPackageCataloger(),
deb.NewDpkgdbCataloger(),
rpmdb.NewRpmdbCataloger(),
java.NewJavaCataloger(cfg.Java()),
apkdb.NewApkdbCataloger(),
golang.NewGoModuleBinaryCataloger(),
}
}
// IndexCatalogers returns a slice of locally implemented package catalogers that are fit for detecting packages from index files (and select installations)
func IndexCatalogers(cfg SearchConfig) []pkg.Cataloger {
return []pkg.Cataloger{
ruby.NewGemFileLockCataloger(),
python.NewPythonIndexCataloger(),
python.NewPythonPackageCataloger(), // for install
php.NewPHPComposerLockCataloger(),
javascript.NewJavascriptLockCataloger(),
deb.NewDpkgdbCataloger(), // for install
rpmdb.NewRpmdbCataloger(), // for install
java.NewJavaCataloger(cfg.Java()), // for install
apkdb.NewApkdbCataloger(), // for install
golang.NewGoModuleBinaryCataloger(), // for install
golang.NewGoModFileCataloger(),
rust.NewCargoLockCataloger(),
}
}
func CatalogersBySourceScheme(scheme source.Type, cfg SearchConfig) []pkg.Cataloger {
switch scheme {
case source.ImageType:
return InstalledCatalogers(cfg)
case source.FileType:
return AllCatalogers(cfg)
case source.DirectoryType:
return IndexCatalogers(cfg)
}
return nil
}

View File

@ -0,0 +1,24 @@
package packages
import (
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
)
func FindRelationships(catalog pkg.Collection, resolver file.Resolver) []artifact.Relationship {
var allRelationships []artifact.Relationship
for p := range catalog.Enumerate() {
relationships, err := createFileOwnershipRelationships(p, resolver)
if err != nil {
log.Warnf("unable to create any package-file relationships for package name=%q: %w", p.Name, err)
continue
}
allRelationships = append(allRelationships, relationships...)
}
allRelationships = append(allRelationships, findOwnershipByFileOverlapRelationship(catalog)...)
return allRelationships
}

View File

@ -13,15 +13,22 @@ func NewJavascriptPackageCataloger() *generic.Cataloger {
"**/package.json": parsePackageJSON,
}
return generic.NewCataloger(nil, globParsers, "javascript-package-cataloger")
return generic.NewCataloger(nil, globParsers, "javascript-package-json-cataloger")
}
// NewJavascriptLockCataloger returns a new Javascript cataloger object base on package lock files.
func NewJavascriptLockCataloger() *generic.Cataloger {
func NewJavascriptPackageLockCataloger() *generic.Cataloger {
globParsers := map[string]generic.Parser{
"**/package-lock.json": parsePackageLock,
}
return generic.NewCataloger(nil, globParsers, "javascript-package-lock-cataloger")
}
func NewJavascriptYarnLockCataloger() *generic.Cataloger {
globParsers := map[string]generic.Parser{
"**/yarn.lock": parseYarnLock,
}
return generic.NewCataloger(nil, globParsers, "javascript-lock-cataloger")
return generic.NewCataloger(nil, globParsers, "javascript-yarn-lock-cataloger")
}

View File

@ -8,13 +8,34 @@ import (
)
// NewPythonIndexCataloger returns a new cataloger for python packages referenced from poetry lock files, requirements.txt files, and setup.py files.
func NewPythonIndexCataloger() *generic.Cataloger {
func NewPythonRequirementsCataloger() *generic.Cataloger {
globParsers := map[string]generic.Parser{
"**/*requirements*.txt": parseRequirementsTxt,
}
return generic.NewCataloger(nil, globParsers, "python-requirements-cataloger")
}
func NewPythonPoetryCataloger() *generic.Cataloger {
globParsers := map[string]generic.Parser{
"**/poetry.lock": parsePoetryLock,
}
return generic.NewCataloger(nil, globParsers, "python-poetry-cataloger")
}
func NewPythonPipfileCataloger() *generic.Cataloger {
globParsers := map[string]generic.Parser{
"**/Pipfile.lock": parsePipfileLock,
}
return generic.NewCataloger(nil, globParsers, "python-pipfile-cataloger")
}
func NewPythonSetupCataloger() *generic.Cataloger {
globParsers := map[string]generic.Parser{
"**/setup.py": parseSetup,
}
return generic.NewCataloger(nil, globParsers, "python-index-cataloger")
return generic.NewCataloger(nil, globParsers, "python-setup-cataloger")
}

View File

@ -1,8 +1,11 @@
package pkg
package packages
import (
"fmt"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/bmatcuk/doublestar/v4"
"github.com/scylladb/go-set/strset"
)
@ -10,9 +13,9 @@ import (
var globsForbiddenFromBeingOwned = []string{
// any OS DBs should automatically be ignored to prevent cyclic issues (e.g. the "rpm" RPM owns the path to the
// RPM DB, so if not ignored that package would own all other packages on the system).
ApkDBGlob,
DpkgDBGlob,
RpmDBGlob,
pkg.ApkDBGlob,
pkg.DpkgDBGlob,
pkg.RpmDBGlob,
// DEB packages share common copyright info between, this does not mean that sharing these paths implies ownership.
"/usr/share/doc/**/copyright",
}
@ -21,17 +24,50 @@ type ownershipByFilesMetadata struct {
Files []string `json:"files"`
}
// RelationshipsByFileOwnership creates a package-to-package relationship based on discovering which packages have
func createFileOwnershipRelationships(p pkg.Package, resolver file.PathResolver) ([]artifact.Relationship, error) {
fileOwner, ok := p.Metadata.(pkg.FileOwner)
if !ok {
return nil, nil
}
var relationships []artifact.Relationship
for _, path := range fileOwner.OwnedFiles() {
locations, err := resolver.FilesByPath(path)
if err != nil {
return nil, fmt.Errorf("unable to find path for path=%q: %w", path, err)
}
if len(locations) == 0 {
// ideally we want to warn users about missing files from a package, however, it is very common for
// container image authors to delete files that are not needed in order to keep image sizes small. Adding
// a warning here would be needlessly noisy (even for popular base images).
continue
}
for _, l := range locations {
relationships = append(relationships, artifact.Relationship{
From: p,
To: l.Coordinates,
Type: artifact.ContainsRelationship,
})
}
}
return relationships, nil
}
// findOwnershipByFileOverlapRelationship creates a package-to-package relationship based on discovering which packages have
// evidence locations that overlap with ownership claim from another package's package manager metadata.
func RelationshipsByFileOwnership(catalog *Collection) []artifact.Relationship {
var relationships = findOwnershipByFilesRelationships(catalog)
func findOwnershipByFileOverlapRelationship(catalog pkg.Collection) []artifact.Relationship {
var relationships = findFilesWithDisputedOwnership(catalog)
var edges []artifact.Relationship
for parentID, children := range relationships {
for childID, files := range children {
edges = append(edges, artifact.Relationship{
From: catalog.byID[parentID],
To: catalog.byID[childID],
From: catalog.Package(parentID),
To: catalog.Package(childID),
Type: artifact.OwnershipByFileOverlapRelationship,
Data: ownershipByFilesMetadata{
Files: files.List(),
@ -43,9 +79,9 @@ func RelationshipsByFileOwnership(catalog *Collection) []artifact.Relationship {
return edges
}
// findOwnershipByFilesRelationships find overlaps in file ownership with a file that defines another package. Specifically, a .Location.Path of
// findFilesWithDisputedOwnership find overlaps in file ownership with a file that defines another package. Specifically, a .Location.Path of
// a package is found to be owned by another (from the owner's .Metadata.Files[]).
func findOwnershipByFilesRelationships(catalog *Collection) map[artifact.ID]map[artifact.ID]*strset.Set {
func findFilesWithDisputedOwnership(catalog pkg.Collection) map[artifact.ID]map[artifact.ID]*strset.Set {
var relationships = make(map[artifact.ID]map[artifact.ID]*strset.Set)
if catalog == nil {
@ -59,7 +95,7 @@ func findOwnershipByFilesRelationships(catalog *Collection) map[artifact.ID]map[
}
// check to see if this is a file owner
pkgFileOwner, ok := candidateOwnerPkg.Metadata.(FileOwner)
pkgFileOwner, ok := candidateOwnerPkg.Metadata.(pkg.FileOwner)
if !ok {
continue
}

View File

@ -1,7 +1,8 @@
package pkg
package packages
import (
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"testing"
"github.com/anchore/syft/syft/artifact"
@ -12,20 +13,20 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
tests := []struct {
name string
setup func(t testing.TB) ([]Package, []artifact.Relationship)
setup func(t testing.TB) ([]pkg.Package, []artifact.Relationship)
}{
{
name: "owns-by-real-path",
setup: func(t testing.TB) ([]Package, []artifact.Relationship) {
parent := Package{
setup: func(t testing.TB) ([]pkg.Package, []artifact.Relationship) {
parent := pkg.Package{
Locations: []file.Location{
file.NewVirtualLocation("/a/path", "/another/path"),
file.NewVirtualLocation("/b/path", "/bee/path"),
},
Type: RpmPkg,
MetadataType: RpmdbMetadataType,
Metadata: RpmdbMetadata{
Files: []RpmdbFileRecord{
Type: pkg.RpmPkg,
MetadataType: pkg.RpmdbMetadataType,
Metadata: pkg.RpmdbMetadata{
Files: []pkg.RpmdbFileRecord{
{Path: "/owning/path/1"},
{Path: "/owning/path/2"},
{Path: "/d/path"},
@ -34,12 +35,12 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
}
parent.SetID()
child := Package{
child := pkg.Package{
Locations: []file.Location{
file.NewVirtualLocation("/c/path", "/another/path"),
file.NewVirtualLocation("/d/path", "/another/path"),
},
Type: NpmPkg,
Type: pkg.NpmPkg,
}
child.SetID()
@ -54,21 +55,21 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
},
}
return []Package{parent, child}, []artifact.Relationship{relationship}
return []pkg.Package{parent, child}, []artifact.Relationship{relationship}
},
},
{
name: "owns-by-virtual-path",
setup: func(t testing.TB) ([]Package, []artifact.Relationship) {
parent := Package{
setup: func(t testing.TB) ([]pkg.Package, []artifact.Relationship) {
parent := pkg.Package{
Locations: []file.Location{
file.NewVirtualLocation("/a/path", "/some/other/path"),
file.NewVirtualLocation("/b/path", "/bee/path"),
},
Type: RpmPkg,
MetadataType: RpmdbMetadataType,
Metadata: RpmdbMetadata{
Files: []RpmdbFileRecord{
Type: pkg.RpmPkg,
MetadataType: pkg.RpmdbMetadataType,
Metadata: pkg.RpmdbMetadata{
Files: []pkg.RpmdbFileRecord{
{Path: "/owning/path/1"},
{Path: "/owning/path/2"},
{Path: "/another/path"},
@ -77,12 +78,12 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
}
parent.SetID()
child := Package{
child := pkg.Package{
Locations: []file.Location{
file.NewVirtualLocation("/c/path", "/another/path"),
file.NewLocation("/d/path"),
},
Type: NpmPkg,
Type: pkg.NpmPkg,
}
child.SetID()
@ -96,21 +97,21 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
},
},
}
return []Package{parent, child}, []artifact.Relationship{relationship}
return []pkg.Package{parent, child}, []artifact.Relationship{relationship}
},
},
{
name: "ignore-empty-path",
setup: func(t testing.TB) ([]Package, []artifact.Relationship) {
parent := Package{
setup: func(t testing.TB) ([]pkg.Package, []artifact.Relationship) {
parent := pkg.Package{
Locations: []file.Location{
file.NewVirtualLocation("/a/path", "/some/other/path"),
file.NewVirtualLocation("/b/path", "/bee/path"),
},
Type: RpmPkg,
MetadataType: RpmdbMetadataType,
Metadata: RpmdbMetadata{
Files: []RpmdbFileRecord{
Type: pkg.RpmPkg,
MetadataType: pkg.RpmdbMetadataType,
Metadata: pkg.RpmdbMetadata{
Files: []pkg.RpmdbFileRecord{
{Path: "/owning/path/1"},
{Path: "/owning/path/2"},
{Path: ""},
@ -120,17 +121,17 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
parent.SetID()
child := Package{
child := pkg.Package{
Locations: []file.Location{
file.NewVirtualLocation("/c/path", "/another/path"),
file.NewLocation("/d/path"),
},
Type: NpmPkg,
Type: pkg.NpmPkg,
}
child.SetID()
return []Package{parent, child}, nil
return []pkg.Package{parent, child}, nil
},
},
}
@ -138,8 +139,8 @@ func TestOwnershipByFilesRelationship(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
pkgs, expectedRelations := test.setup(t)
c := NewCollection(pkgs...)
relationships := RelationshipsByFileOwnership(c)
c := pkg.NewCollection(pkgs...)
relationships := findOwnershipByFileOverlapRelationship(c)
assert.Len(t, relationships, len(expectedRelations))
for idx, expectedRelationship := range expectedRelations {

View File

@ -1,9 +1,5 @@
package packages
import (
"github.com/anchore/syft/syft/cataloger/packages/java"
)
type SearchConfig struct {
IncludeIndexedArchives bool
IncludeUnindexedArchives bool
@ -15,10 +11,3 @@ func DefaultSearchConfig() SearchConfig {
IncludeUnindexedArchives: false,
}
}
func (c SearchConfig) Java() java.CatalogerConfig {
return java.CatalogerConfig{
SearchUnindexedArchives: c.IncludeUnindexedArchives,
SearchIndexedArchives: c.IncludeIndexedArchives,
}
}

View File

@ -2,13 +2,13 @@ package syft
import (
"crypto"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/version"
"github.com/anchore/syft/syft/cataloger"
"github.com/anchore/syft/syft/cataloger/files/fileclassifier"
"github.com/anchore/syft/syft/cataloger/files/filecontents"
"github.com/anchore/syft/syft/cataloger/files/secrets"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/version"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/cataloger/packages"
"github.com/anchore/syft/syft/source"
)
@ -18,32 +18,32 @@ type CatalogingConfig struct {
ToolVersion string
ToolConfiguration interface{}
// applies to all catalogers
Scope source.Scope
ProcessTasksInSerial bool
DefaultScope source.Scope // TODO: shouldn't this be in the package.SearchConfig?
ProcessTasksInSerial bool // TODO: this seems a little odd, if this should be an option is this the right spot?
EnabledCatalogers []cataloger.ID
availableTasks *taskCollection
// package
PackageCatalogers []pkg.Cataloger
PackageSearch packages.SearchConfig
// file metadata
CaptureFileMetadata bool
DigestHashes []crypto.Hash
// secrets
CaptureSecrets bool
SecretsConfig secrets.CatalogerConfig
SecretsSearch secrets.Config
SecretsScope source.Scope
// file classification
ClassifyFiles bool
FileClassifiers []fileclassifier.Classifier
// file contents
ContentsConfig filecontents.CatalogerConfig
ContentsSearch filecontents.Config
}
func DefaultCatalogingConfig() CatalogingConfig {
return CatalogingConfig{
Scope: source.SquashedScope,
DefaultScope: source.SquashedScope,
ToolName: internal.ApplicationName,
ToolVersion: version.Guess(),
SecretsScope: source.AllLayersScope,
SecretsConfig: secrets.DefaultCatalogerConfig(),
SecretsSearch: secrets.DefaultConfig(),
FileClassifiers: fileclassifier.DefaultClassifiers(),
ContentsConfig: filecontents.DefaultCatalogerConfig(),
ContentsSearch: filecontents.DefaultConfig(),
PackageSearch: packages.DefaultSearchConfig(),
}
}

View File

@ -2,9 +2,9 @@ package syft
import (
"crypto"
"github.com/anchore/syft/syft/cataloger"
"github.com/anchore/syft/syft/cataloger/files/fileclassifier"
"github.com/anchore/syft/syft/cataloger/files/secrets"
"github.com/anchore/syft/syft/cataloger/packages"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
)
@ -27,7 +27,7 @@ func WithoutConcurrency() CatalogingOption {
func WithScope(scope source.Scope) CatalogingOption {
return func(_ *source.Source, config *CatalogingConfig) error {
config.Scope = scope
config.DefaultScope = scope
return nil
}
}
@ -47,47 +47,55 @@ func WithToolConfiguration(c interface{}) CatalogingOption {
}
}
func WithPackageCatalogers(catalogers ...pkg.Cataloger) CatalogingOption {
func WithCataloger(id cataloger.ID, c pkg.Cataloger) CatalogingOption {
return func(_ *source.Source, config *CatalogingConfig) error {
config.PackageCatalogers = catalogers
return nil
if config.availableTasks == nil {
config.availableTasks = newTaskCollection()
}
var cfg CatalogingConfig
if config != nil {
cfg = *config
}
return config.availableTasks.add(pkgCatalogerTask{
id: id,
cataloger: c,
config: cfg,
})
}
}
func WithAdditionalPackageCatalogers(catalogers ...pkg.Cataloger) CatalogingOption {
return func(_ *source.Source, config *CatalogingConfig) error {
config.PackageCatalogers = append(config.PackageCatalogers, catalogers...)
return nil
}
}
func WithDefaultPackageCatalogers(cfg packages.SearchConfig) CatalogingOption {
func WithDefaultCatalogers() CatalogingOption {
return func(src *source.Source, config *CatalogingConfig) error {
config.PackageCatalogers = packages.CatalogersBySourceScheme(src.Metadata.Scheme, cfg)
// override any previously added catalogers
config.availableTasks = newTaskCollection()
config.EnabledCatalogers = nil
return nil
}
}
func WithFileMetadata() CatalogingOption {
return func(_ *source.Source, config *CatalogingConfig) error {
config.CaptureFileMetadata = true
config.EnabledCatalogers = append(config.EnabledCatalogers, cataloger.FileMetadataID)
return nil
}
}
func WithFileDigests(hashes ...crypto.Hash) CatalogingOption {
return func(_ *source.Source, config *CatalogingConfig) error {
config.EnabledCatalogers = append(config.EnabledCatalogers, cataloger.FileDigestsID)
config.DigestHashes = hashes
return nil
}
}
func WithSecrets(secretConfig *secrets.CatalogerConfig) CatalogingOption {
func WithSecrets(secretConfig *secrets.Config) CatalogingOption {
return func(_ *source.Source, config *CatalogingConfig) error {
config.CaptureSecrets = true
config.EnabledCatalogers = append(config.EnabledCatalogers, cataloger.SecretsID)
if secretConfig != nil {
config.SecretsConfig = *secretConfig
config.SecretsSearch = *secretConfig
}
return nil
}
@ -95,30 +103,35 @@ func WithSecrets(secretConfig *secrets.CatalogerConfig) CatalogingOption {
func WithFileClassification() CatalogingOption {
return func(_ *source.Source, config *CatalogingConfig) error {
config.ClassifyFiles = true
if len(config.FileClassifiers) > 0 {
config.EnabledCatalogers = append(config.EnabledCatalogers, cataloger.FileClassifierID)
}
return nil
}
}
func WithFileClassifiers(classifiers ...fileclassifier.Classifier) CatalogingOption {
return func(_ *source.Source, config *CatalogingConfig) error {
config.ClassifyFiles = !(len(classifiers) > 0)
config.FileClassifiers = classifiers
if len(config.FileClassifiers) > 0 {
config.EnabledCatalogers = append(config.EnabledCatalogers, cataloger.FileClassifierID)
}
return nil
}
}
func WithFileContents(globs ...string) CatalogingOption {
return func(_ *source.Source, config *CatalogingConfig) error {
config.ContentsConfig.Globs = globs
config.EnabledCatalogers = append(config.EnabledCatalogers, cataloger.FileContentsID)
config.ContentsSearch.Globs = globs
return nil
}
}
func WithFileSizeLimit(byteLimit int64) CatalogingOption {
return func(_ *source.Source, config *CatalogingConfig) error {
config.ContentsConfig.SkipFilesAboveSizeInBytes = byteLimit
config.SecretsConfig.MaxFileSize = byteLimit
config.ContentsSearch.SkipFilesAboveSizeInBytes = byteLimit
config.SecretsSearch.MaxFileSize = byteLimit
return nil
}
}

View File

@ -10,8 +10,8 @@ const (
// AppUpdateAvailable is a partybus event that occurs when an application update is available
AppUpdateAvailable partybus.EventType = "syft-app-update-available"
// PackageCatalogerStarted is a partybus event that occurs when the package cataloging has begun
PackageCatalogerStarted partybus.EventType = "syft-package-cataloger-started-event"
// CatalogingStarted is a partybus event that occurs when the first cataloger has started
CatalogingStarted partybus.EventType = "syft-cataloging-started-event"
// nolint:gosec
// SecretsCatalogerStarted is a partybus event that occurs when the secrets cataloging has begun

View File

@ -4,8 +4,8 @@ import (
"github.com/wagoodman/go-progress"
)
// PackageCatalogerMonitor provides progress-related data for observing the progress of a Catalog() call (published on the event bus).
type PackageCatalogerMonitor struct {
// CatalogingMonitor provides progress-related data for observing the progress of a Catalog() call (published on the event bus).
type CatalogingMonitor struct {
FilesProcessed progress.Monitorable // the number of files selected and contents analyzed from all registered catalogers
PackagesDiscovered progress.Monitorable // the number of packages discovered from all registered catalogers
}

View File

@ -38,12 +38,12 @@ func checkEventType(actual, expected partybus.EventType) error {
return nil
}
func ParsePackageCatalogerStarted(e partybus.Event) (*monitor.PackageCatalogerMonitor, error) {
if err := checkEventType(e.Type, event.PackageCatalogerStarted); err != nil {
func ParsePackageCatalogerStarted(e partybus.Event) (*monitor.CatalogingMonitor, error) {
if err := checkEventType(e.Type, event.CatalogingStarted); err != nil {
return nil, err
}
monitor, ok := e.Value.(monitor.PackageCatalogerMonitor)
monitor, ok := e.Value.(monitor.CatalogingMonitor)
if !ok {
return nil, newPayloadErr(e.Type, "Value", e.Value)
}

View File

@ -11,7 +11,17 @@ import (
)
// Collection represents a collection of Packages.
type Collection struct {
type Collection interface {
Size() int
Package(id artifact.ID) *Package
PackagesByPath(path string) []Package
Packages(ids []artifact.ID) (result []Package)
Add(p Package)
Enumerate(types ...Type) <-chan Package
Sorted(types ...Type) (pkgs []Package)
}
type collection struct {
byID map[artifact.ID]Package
idsByType map[Type][]artifact.ID
idsByPath map[string][]artifact.ID // note: this is real path or virtual path
@ -19,8 +29,8 @@ type Collection struct {
}
// NewCollection returns a new empty Collection
func NewCollection(pkgs ...Package) *Collection {
catalog := Collection{
func NewCollection(pkgs ...Package) Collection {
catalog := &collection{
byID: make(map[artifact.ID]Package),
idsByType: make(map[Type][]artifact.ID),
idsByPath: make(map[string][]artifact.ID),
@ -30,16 +40,16 @@ func NewCollection(pkgs ...Package) *Collection {
catalog.Add(p)
}
return &catalog
return catalog
}
// PackageCount returns the total number of packages that have been added.
func (c *Collection) PackageCount() int {
// Size returns the total number of packages that have been added.
func (c *collection) Size() int {
return len(c.byID)
}
// Package returns the package with the given ID.
func (c *Collection) Package(id artifact.ID) *Package {
func (c *collection) Package(id artifact.ID) *Package {
v, exists := c.byID[id]
if !exists {
return nil
@ -54,12 +64,12 @@ func (c *Collection) Package(id artifact.ID) *Package {
}
// PackagesByPath returns all packages that were discovered from the given path.
func (c *Collection) PackagesByPath(path string) []Package {
func (c *collection) PackagesByPath(path string) []Package {
return c.Packages(c.idsByPath[path])
}
// Packages returns all packages for the given ID.
func (c *Collection) Packages(ids []artifact.ID) (result []Package) {
func (c *collection) Packages(ids []artifact.ID) (result []Package) {
for _, i := range ids {
p, exists := c.byID[i]
if exists {
@ -70,7 +80,7 @@ func (c *Collection) Packages(ids []artifact.ID) (result []Package) {
}
// Add a package to the Collection.
func (c *Collection) Add(p Package) {
func (c *collection) Add(p Package) {
c.lock.Lock()
defer c.lock.Unlock()
@ -102,7 +112,7 @@ func (c *Collection) Add(p Package) {
}
// Enumerate all packages for the given type(s), enumerating all packages if no type is specified.
func (c *Collection) Enumerate(types ...Type) <-chan Package {
func (c *collection) Enumerate(types ...Type) <-chan Package {
channel := make(chan Package)
go func() {
defer close(channel)
@ -135,9 +145,8 @@ func (c *Collection) Enumerate(types ...Type) <-chan Package {
return channel
}
// Sorted enumerates all packages for the given types sorted by package name. Enumerates all packages if no type
// is specified.
func (c *Collection) Sorted(types ...Type) (pkgs []Package) {
// Sorted enumerates all packages for the given types sorted by package name. Enumerates all packages if no type is specified.
func (c *collection) Sorted(types ...Type) (pkgs []Package) {
for p := range c.Enumerate(types...) {
pkgs = append(pkgs, p)
}

View File

@ -1,8 +0,0 @@
package pkg
import "github.com/anchore/syft/syft/artifact"
// TODO: as more relationships are added, this function signature will probably accommodate selection
func NewRelationships(catalog *Collection) []artifact.Relationship {
return RelationshipsByFileOwnership(catalog)
}

View File

@ -16,7 +16,7 @@ type SBOM struct {
}
type Artifacts struct {
PackageCatalog *pkg.Collection
Packages pkg.Collection
FileMetadata map[file.Coordinates]file.Metadata
FileDigests map[file.Coordinates][]file.Digest
FileClassifications map[file.Coordinates][]file.Classification

View File

@ -0,0 +1,16 @@
package speculate
import (
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/linux"
"github.com/anchore/syft/syft/speculate/cpes"
)
func Identifiers(p *pkg.Package, release *linux.Release) {
// generate CPEs (note: this is excluded from package ID, so is safe to mutate)
p.CPEs = cpes.Generate(*p)
// generate PURL (note: this is excluded from package ID, so is safe to mutate)
p.PURL = pkg.URL(*p, release)
}

233
syft/task_collection.go Normal file
View File

@ -0,0 +1,233 @@
package syft
import (
"fmt"
"github.com/anchore/syft/syft/cataloger"
"github.com/scylladb/go-set/strset"
"sort"
"strings"
)
const (
packageTaskLabel = "package"
fileTaskLabel = "file"
osTaskLabel = "os"
languageTaskLabel = "language"
installedTaskLabel = "installed"
declaredTaskLabel = "declared"
)
type taskCollection struct {
taskByName map[string]task // name -> generator
namesByLabel map[string][]string // label -> names
}
func newTaskCollection() *taskCollection {
return &taskCollection{
taskByName: make(map[string]task),
namesByLabel: make(map[string][]string),
}
}
func (c *taskCollection) add(t task, labels ...string) error {
var name string
switch v := t.(type) {
case pkgCatalogerTask:
name = string(v.id)
case catalogerTask:
name = string(v.id)
default:
if len(labels) == 0 {
return fmt.Errorf("no ID found for generic task")
}
name = labels[0]
}
if _, exists := c.taskByName[name]; exists {
return fmt.Errorf("task already exists: %q", name)
}
c.taskByName[name] = t
labelSet := strset.New(labels...)
labelSet.Add(name)
for _, n := range labelSet.List() {
c.namesByLabel[n] = append(c.namesByLabel[n], name)
}
return nil
}
func (c *taskCollection) addAllCatalogers(config CatalogingConfig) error {
for _, d := range []struct {
generator taskGenerator
labels []string
}{
{
generator: newAPKDBCatalogingTask,
labels: []string{packageTaskLabel, osTaskLabel, installedTaskLabel, "alpine", "apk", "apkdb"},
},
{
generator: newDPKGCatalogingTask,
labels: []string{packageTaskLabel, osTaskLabel, installedTaskLabel, "debian", "dpkg", "deb", "dpkgdb"},
},
{
generator: newRPMDBCatalogingTask,
labels: []string{packageTaskLabel, osTaskLabel, installedTaskLabel, "redhat", "rhel", "centos", "rpm", "rpmdb"},
},
{
generator: newRubyGemSpecCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, installedTaskLabel, "ruby", "gemspec", "gem"},
},
{
generator: newRubyGemFileLockCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "ruby", "gemfile", "gem", "gemfile.lock"},
},
{
generator: newPythonPackageCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, installedTaskLabel, "python", "egg", "wheel"},
},
{
generator: newPythonRequirementsCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "python", "requirements", "requirements.txt"},
},
{
generator: newPythonPoetryCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "python", "poetry", "poetry.lock"},
},
{
generator: newPythonSetupCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "python", "setup", "setup.py"},
},
{
generator: newPythonPipfileCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "python", "pip", "pipfile"},
},
{
generator: newJavascriptPackageJSONCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, installedTaskLabel, "javascript", "node", "package.json"},
},
{
generator: newJavascriptPackageLockCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "javascript", "node", "package-lock.json"},
},
{
generator: newJavascriptYarnLockCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "javascript", "node", "yarn", "yarn.lock"},
},
{
generator: newJavaCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, installedTaskLabel, "java", "maven", "jar", "war", "ear", "jenkins", "hudson", "hpi", "jpi", "par", "sar", "lpkg"},
},
{
generator: newGolangModuleCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "go", "golang", "go-module", "go.mod"},
},
{
generator: newGolangBinaryCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, installedTaskLabel, "go", "golang", "go-module", "binary"},
},
{
generator: newRustCargoLockCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "rust", "cargo", "cargo.lock"},
},
{
generator: newPHPInstalledCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, installedTaskLabel, "php", "composer", "installed.json"},
},
{
generator: newPHPComposerLockCatalogingTask,
labels: []string{packageTaskLabel, languageTaskLabel, declaredTaskLabel, "php", "composer", "composer.lock"},
},
{
generator: newFileMetadataCatalogingTask,
labels: []string{fileTaskLabel},
},
{
generator: newFileDigestsCatalogingTask,
labels: []string{fileTaskLabel, "digests", "digest", "file-digests"},
},
{
generator: newSecretsCatalogingTask,
labels: []string{"secrets"},
},
{
generator: newFileClassifierTask,
labels: []string{fileTaskLabel, "classifier"},
},
{
generator: newFileContentsCatalogingTask,
labels: []string{fileTaskLabel, "contents", "content", "file-contents"},
},
} {
t, err := d.generator(config)
if err != nil {
return err
}
if t == nil {
continue
}
if err := c.add(t, d.labels...); err != nil {
return err
}
}
return nil
}
func (c taskCollection) query(q string) []cataloger.ID {
fields := strings.FieldsFunc(q, func(r rune) bool {
switch r {
case '+', ',', '&':
return true
}
return false
})
return c.withLabels(fields...)
}
func (c taskCollection) all() []cataloger.ID {
var ret []cataloger.ID
for k := range c.taskByName {
ret = append(ret, cataloger.ID(k))
}
sort.Sort(cataloger.IDs(ret))
return ret
}
func (c taskCollection) withLabels(q ...string) []cataloger.ID {
req := strset.New()
for i, f := range q {
switch i {
case 0:
req.Add(c.namesByLabel[f]...)
continue
default:
req = strset.Intersection(req, strset.New(c.namesByLabel[f]...))
}
}
var ret []cataloger.ID
for _, i := range req.List() {
ret = append(ret, cataloger.ID(i))
}
// ensure stable results
sort.Sort(cataloger.IDs(ret))
return ret
}
func (c taskCollection) tasks(ids ...cataloger.ID) (ts []task) {
for _, id := range ids {
t, exists := c.taskByName[string(id)]
if !exists {
continue
}
ts = append(ts, t)
}
return ts
}

View File

@ -2,11 +2,24 @@ package syft
import (
"fmt"
"github.com/anchore/syft/syft/cataloger"
"github.com/anchore/syft/syft/cataloger/files/fileclassifier"
"github.com/anchore/syft/syft/cataloger/files/filecontents"
"github.com/anchore/syft/syft/cataloger/files/filedigests"
"github.com/anchore/syft/syft/cataloger/files/filemetadata"
"github.com/anchore/syft/syft/cataloger/files/secrets"
"github.com/anchore/syft/syft/cataloger/packages/apkdb"
"github.com/anchore/syft/syft/cataloger/packages/deb"
"github.com/anchore/syft/syft/cataloger/packages/golang"
"github.com/anchore/syft/syft/cataloger/packages/java"
"github.com/anchore/syft/syft/cataloger/packages/javascript"
"github.com/anchore/syft/syft/cataloger/packages/php"
"github.com/anchore/syft/syft/cataloger/packages/python"
"github.com/anchore/syft/syft/cataloger/packages/rpmdb"
"github.com/anchore/syft/syft/cataloger/packages/ruby"
"github.com/anchore/syft/syft/cataloger/packages/rust"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/speculate"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/cataloger/packages"
@ -15,152 +28,370 @@ import (
"github.com/anchore/syft/syft/source"
)
type task func(*sbom.Artifacts, *source.Source) ([]artifact.Relationship, error)
type taskGenerator func(CatalogingConfig) (task, error)
func generatePackagesCatalogingTask(config CatalogingConfig) (task, error) {
if len(config.PackageCatalogers) == 0 {
return nil, nil
}
type task interface {
Run(*sbom.Artifacts, *source.Source) ([]artifact.Relationship, error)
}
return func(artifacts *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(config.Scope)
type genericTask struct {
run func(*sbom.Artifacts, *source.Source) ([]artifact.Relationship, error)
}
func (t genericTask) Run(artifacts *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
return t.run(artifacts, src)
}
type catalogerTask struct {
id cataloger.ID
genericTask
}
type pkgCatalogerTask struct {
id cataloger.ID
cataloger pkg.Cataloger
config CatalogingConfig
}
func (t pkgCatalogerTask) Run(artifacts *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(t.config.DefaultScope)
if err != nil {
return nil, fmt.Errorf("unable to determine resolver while cataloging packages: %w", err)
}
// find the distro
artifacts.LinuxDistribution = linux.IdentifyRelease(resolver)
// catalog packages
catalog, relationships, err := packages.Catalog(resolver, artifacts.LinuxDistribution, config.PackageCatalogers...)
pkgs, relationships, err := t.cataloger.Catalog(resolver)
if err != nil {
return nil, err
}
artifacts.PackageCatalog = catalog
for _, p := range pkgs {
p.FoundBy = string(t.id)
speculate.Identifiers(&p, artifacts.LinuxDistribution)
p.SetID()
artifacts.Packages.Add(p)
}
return relationships, nil
}
func newIdentifyDistroTask(config CatalogingConfig) (task, error) {
return genericTask{
run: func(artifacts *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(config.DefaultScope)
if err != nil {
return nil, fmt.Errorf("unable to determine resolver while determining linux distro: %w", err)
}
artifacts.LinuxDistribution = linux.IdentifyRelease(resolver)
return nil, nil
},
}, nil
}
func generateFileMetadataCatalogingTask(config CatalogingConfig) (task, error) {
if !config.CaptureFileMetadata {
return nil, nil
}
func newAPKDBCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.ApkDBID,
cataloger: apkdb.NewApkdbCataloger(),
config: config,
}, nil
}
cataloger := filemetadata.NewCataloger()
func newDPKGCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.DpkgID,
cataloger: deb.NewDpkgdbCataloger(),
config: config,
}, nil
}
return func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(config.Scope)
func newGolangBinaryCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.GoBinaryID,
cataloger: golang.NewGoModuleBinaryCataloger(),
config: config,
}, nil
}
func newGolangModuleCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.GoModID,
cataloger: golang.NewGoModFileCataloger(),
config: config,
}, nil
}
func newJavaCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.JavaArchiveID,
cataloger: java.NewJavaCataloger(java.CatalogerConfig{
SearchUnindexedArchives: config.PackageSearch.IncludeUnindexedArchives,
SearchIndexedArchives: config.PackageSearch.IncludeIndexedArchives,
}),
config: config,
}, nil
}
func newJavascriptPackageJSONCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.JavascriptPackageJSONID,
cataloger: javascript.NewJavascriptPackageCataloger(),
config: config,
}, nil
}
func newJavascriptPackageLockCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.JavascriptPackageLockID,
cataloger: javascript.NewJavascriptPackageLockCataloger(),
config: config,
}, nil
}
func newJavascriptYarnLockCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.JavaScriptYarnLockID,
cataloger: javascript.NewJavascriptYarnLockCataloger(),
config: config,
}, nil
}
func newPHPComposerLockCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.PHPComposerLockID,
cataloger: php.NewPHPComposerLockCataloger(),
config: config,
}, nil
}
func newPHPInstalledCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.PHPInstalledJSONID,
cataloger: php.NewPHPComposerInstalledCataloger(),
config: config,
}, nil
}
func newPythonPackageCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.PythonPackageID,
cataloger: python.NewPythonPackageCataloger(),
config: config,
}, nil
}
func newPythonRequirementsCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.PythonRequirementsID,
cataloger: python.NewPythonRequirementsCataloger(),
config: config,
}, nil
}
func newPythonPoetryCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.PythonPoetryID,
cataloger: python.NewPythonPoetryCataloger(),
config: config,
}, nil
}
func newPythonPipfileCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.PythonPipFileID,
cataloger: python.NewPythonPipfileCataloger(),
config: config,
}, nil
}
func newPythonSetupCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.PythonSetupID,
cataloger: python.NewPythonSetupCataloger(),
config: config,
}, nil
}
func newRPMDBCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.RpmDBID,
cataloger: rpmdb.NewRpmdbCataloger(),
config: config,
}, nil
}
func newRubyGemFileLockCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.RubyGemfileLockID,
cataloger: ruby.NewGemFileLockCataloger(),
config: config,
}, nil
}
func newRubyGemSpecCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.RubyGemspecID,
cataloger: ruby.NewGemSpecCataloger(),
config: config,
}, nil
}
func newRustCargoLockCatalogingTask(config CatalogingConfig) (task, error) {
return pkgCatalogerTask{
id: cataloger.RustCargoLockID,
cataloger: rust.NewCargoLockCataloger(),
config: config,
}, nil
}
func newFileMetadataCatalogingTask(config CatalogingConfig) (task, error) {
c := filemetadata.NewCataloger()
return catalogerTask{
id: cataloger.FileMetadataID,
genericTask: genericTask{
run: func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(config.DefaultScope)
if err != nil {
return nil, err
}
result, err := cataloger.Catalog(resolver)
result, err := c.Catalog(resolver)
if err != nil {
return nil, err
}
results.FileMetadata = result
return nil, nil
},
},
}, nil
}
func generateFileDigestsCatalogingTask(config CatalogingConfig) (task, error) {
func newFileDigestsCatalogingTask(config CatalogingConfig) (task, error) {
if len(config.DigestHashes) == 0 {
return nil, nil
}
cataloger, err := filedigests.NewCataloger(config.DigestHashes)
c, err := filedigests.NewCataloger(config.DigestHashes)
if err != nil {
return nil, err
}
return func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(config.Scope)
return catalogerTask{
id: cataloger.FileDigestsID,
genericTask: genericTask{
run: func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(config.DefaultScope)
if err != nil {
return nil, err
}
result, err := cataloger.Catalog(resolver)
result, err := c.Catalog(resolver)
if err != nil {
return nil, err
}
results.FileDigests = result
return nil, nil
},
},
}, nil
}
func generateContentsCatalogingTask(config CatalogingConfig) (task, error) {
if len(config.ContentsConfig.Globs) == 0 {
func newFileContentsCatalogingTask(config CatalogingConfig) (task, error) {
if len(config.ContentsSearch.Globs) == 0 {
return nil, nil
}
cataloger, err := filecontents.NewCataloger(config.ContentsConfig)
c, err := filecontents.NewCataloger(config.ContentsSearch)
if err != nil {
return nil, err
}
return func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(config.Scope)
return catalogerTask{
id: cataloger.FileContentsID,
genericTask: genericTask{
run: func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(config.DefaultScope)
if err != nil {
return nil, err
}
result, err := cataloger.Catalog(resolver)
result, err := c.Catalog(resolver)
if err != nil {
return nil, err
}
results.FileContents = result
return nil, nil
},
},
}, nil
}
func generateSecretsCatalogingTask(config CatalogingConfig) (task, error) {
if !config.CaptureSecrets {
return nil, nil
}
func newSecretsCatalogingTask(config CatalogingConfig) (task, error) {
cataloger, err := secrets.NewCataloger(config.SecretsConfig)
c, err := secrets.NewCataloger(config.SecretsSearch)
if err != nil {
return nil, err
}
return func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
return catalogerTask{
id: cataloger.SecretsID,
genericTask: genericTask{
run: func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(config.SecretsScope)
if err != nil {
return nil, err
}
result, err := cataloger.Catalog(resolver)
result, err := c.Catalog(resolver)
if err != nil {
return nil, err
}
results.Secrets = result
return nil, nil
},
},
}, nil
}
func generateFileClassifierTask(config CatalogingConfig) (task, error) {
if !config.ClassifyFiles {
return nil, nil
}
func newFileClassifierTask(config CatalogingConfig) (task, error) {
cataloger, err := fileclassifier.NewCataloger(config.FileClassifiers)
c, err := fileclassifier.NewCataloger(config.FileClassifiers)
if err != nil {
return nil, err
}
return func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(config.Scope)
return catalogerTask{
id: cataloger.FileClassifierID,
genericTask: genericTask{
run: func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(config.DefaultScope)
if err != nil {
return nil, err
}
result, err := cataloger.Catalog(resolver)
result, err := c.Catalog(resolver)
if err != nil {
return nil, err
}
results.FileClassifications = result
return nil, nil
},
},
}, nil
}
func newSynthesizePackageRelationshipsTasks(config CatalogingConfig) (task, error) {
return genericTask{
run: func(artifacts *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) {
resolver, err := src.FileResolver(config.DefaultScope)
if err != nil {
return nil, err
}
return packages.FindRelationships(artifacts.Packages, resolver), nil
},
}, nil
}

View File

@ -20,7 +20,7 @@ func BenchmarkImagePackageCatalogers(b *testing.B) {
imagetest.GetFixtureImage(b, "docker-archive", fixtureImageName)
tarPath := imagetest.GetFixtureImageTarPath(b, fixtureImageName)
var pc *pkg.Collection
var pc pkg.Collection
for _, c := range packages.InstalledCatalogers(packages.DefaultSearchConfig()) {
// in case of future alteration where state is persisted, assume no dependency is safe to reuse
userInput := "docker-archive:" + tarPath
@ -48,7 +48,7 @@ func BenchmarkImagePackageCatalogers(b *testing.B) {
}
})
b.Logf("catalog for %q number of packages: %d", c.Name(), pc.PackageCount())
b.Logf("catalog for %q number of packages: %d", c.Name(), pc.Size())
}
}
@ -84,7 +84,7 @@ func TestPkgCoverageImage(t *testing.T) {
t.Run(c.name, func(t *testing.T) {
pkgCount := 0
for a := range sbom.Artifacts.PackageCatalog.Enumerate(c.pkgType) {
for a := range sbom.Artifacts.Packages.Enumerate(c.pkgType) {
if a.Language.String() != "" {
observedLanguages.Add(a.Language.String())
@ -112,7 +112,7 @@ func TestPkgCoverageImage(t *testing.T) {
if pkgCount != len(c.pkgInfo)+c.duplicates {
t.Logf("Discovered packages of type %+v", c.pkgType)
for a := range sbom.Artifacts.PackageCatalog.Enumerate(c.pkgType) {
for a := range sbom.Artifacts.Packages.Enumerate(c.pkgType) {
t.Log(" ", a)
}
t.Fatalf("unexpected package count: %d!=%d", pkgCount, len(c.pkgInfo))
@ -161,7 +161,7 @@ func TestPkgCoverageDirectory(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
actualPkgCount := 0
for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(test.pkgType) {
for actualPkg := range sbom.Artifacts.Packages.Enumerate(test.pkgType) {
observedLanguages.Add(actualPkg.Language.String())
observedPkgs.Add(string(actualPkg.Type))
@ -186,7 +186,7 @@ func TestPkgCoverageDirectory(t *testing.T) {
}
if actualPkgCount != len(test.pkgInfo)+test.duplicates {
for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(test.pkgType) {
for actualPkg := range sbom.Artifacts.Packages.Enumerate(test.pkgType) {
t.Log(" ", actualPkg)
}
t.Fatalf("unexpected package count: %d!=%d", actualPkgCount, len(test.pkgInfo))

View File

@ -13,7 +13,7 @@ func TestNpmPackageLockDirectory(t *testing.T) {
foundPackages := internal.NewStringSet()
for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.NpmPkg) {
for actualPkg := range sbom.Artifacts.Packages.Enumerate(pkg.NpmPkg) {
for _, actualLocation := range actualPkg.Locations {
if strings.Contains(actualLocation.RealPath, "node_modules") {
t.Errorf("found packages from package-lock.json in node_modules: %s", actualLocation)
@ -34,7 +34,7 @@ func TestYarnPackageLockDirectory(t *testing.T) {
foundPackages := internal.NewStringSet()
for actualPkg := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.NpmPkg) {
for actualPkg := range sbom.Artifacts.Packages.Enumerate(pkg.NpmPkg) {
for _, actualLocation := range actualPkg.Locations {
if strings.Contains(actualLocation.RealPath, "node_modules") {
t.Errorf("found packages from yarn.lock in node_modules: %s", actualLocation)

View File

@ -13,7 +13,7 @@ func TestRegression212ApkBufferSize(t *testing.T) {
expectedPkgs := 58
actualPkgs := 0
for range sbom.Artifacts.PackageCatalog.Enumerate(pkg.ApkPkg) {
for range sbom.Artifacts.Packages.Enumerate(pkg.ApkPkg) {
actualPkgs += 1
}

View File

@ -19,7 +19,7 @@ func TestRegressionGoArchDiscovery(t *testing.T) {
var actualELF, actualWIN, actualMACOS int
for p := range sbom.Artifacts.PackageCatalog.Enumerate(pkg.GoModulePkg) {
for p := range sbom.Artifacts.Packages.Enumerate(pkg.GoModulePkg) {
for _, l := range p.Locations {
switch {
case strings.Contains(l.RealPath, "elf"):

View File

@ -33,7 +33,7 @@ func catalogFixtureImage(t *testing.T, fixtureImageName string) (sbom.SBOM, *sou
return sbom.SBOM{
Artifacts: sbom.Artifacts{
PackageCatalog: pkgCatalog,
Packages: pkgCatalog,
LinuxDistribution: release,
},
Relationships: relationships,
@ -69,7 +69,7 @@ func catalogDirectory(t *testing.T, dir string) (sbom.SBOM, *source.Source) {
return sbom.SBOM{
Artifacts: sbom.Artifacts{
PackageCatalog: pkgCatalog,
Packages: pkgCatalog,
LinuxDistribution: release,
},
Relationships: relationships,

View File

@ -15,7 +15,7 @@ import (
"github.com/wagoodman/jotframe/pkg/frame"
)
// Handler is an aggregated event handler for the set of supported events (PullDockerImage, ReadImage, FetchImage, PackageCatalogerStarted)
// Handler is an aggregated event handler for the set of supported events (PullDockerImage, ReadImage, FetchImage, CatalogingStarted)
type Handler struct {
}
@ -27,7 +27,7 @@ func NewHandler() *Handler {
// RespondsTo indicates if the handler is capable of handling the given event.
func (r *Handler) RespondsTo(event partybus.Event) bool {
switch event.Type {
case stereoscopeEvent.PullDockerImage, stereoscopeEvent.ReadImage, stereoscopeEvent.FetchImage, syftEvent.PackageCatalogerStarted, syftEvent.SecretsCatalogerStarted, syftEvent.FileDigestsCatalogerStarted, syftEvent.FileMetadataCatalogerStarted, syftEvent.FileIndexingStarted, syftEvent.ImportStarted:
case stereoscopeEvent.PullDockerImage, stereoscopeEvent.ReadImage, stereoscopeEvent.FetchImage, syftEvent.CatalogingStarted, syftEvent.SecretsCatalogerStarted, syftEvent.FileDigestsCatalogerStarted, syftEvent.FileMetadataCatalogerStarted, syftEvent.FileIndexingStarted, syftEvent.ImportStarted:
return true
default:
return false
@ -46,7 +46,7 @@ func (r *Handler) Handle(ctx context.Context, fr *frame.Frame, event partybus.Ev
case stereoscopeEvent.FetchImage:
return FetchImageHandler(ctx, fr, event, wg)
case syftEvent.PackageCatalogerStarted:
case syftEvent.CatalogingStarted:
return PackageCatalogerStartedHandler(ctx, fr, event, wg)
case syftEvent.SecretsCatalogerStarted: