syft/internal/task/package_task_factory.go
Alex Goodman b0ab75fd89
Replace core SBOM-creation API with builder pattern (#1383)
* remove existing cataloging API

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add file cataloging config

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add package cataloging config

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add configs for cross-cutting concerns

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* rename CLI option configs to not require import aliases later

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* update all nested structs for the Catalog struct

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* update Catalog cli options

- add new cataloger selection options (selection and default)
- remove the excludeBinaryOverlapByOwnership
- deprecate "catalogers" flag
- add new javascript configuration

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* migrate relationship capabilities to separate internal package

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* refactor golang cataloger to use configuration options when creating packages

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* create internal object to facilitate reading from and writing to an SBOM

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* create a command-like object (task) to facilitate partial SBOM creation

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add cataloger selection capability

- be able to parse string expressions into a set of resolved actions against sets
- be able to use expressions to select/add/remove tasks to/from the final set of tasks to run

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add package, file, and environment related tasks

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* update existing file catalogers to use nested UI elements

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add CreateSBOMConfig that drives the SBOM creation process

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* capture SBOM creation info as a struct

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add CreateSBOM() function

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* fix tests

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* update docs with SBOM selection help + breaking changes

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* fix multiple override default inputs

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* fix deprecation flag printing to stdout

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* refactor cataloger selection description to separate object

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* address review comments

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* keep expression errors and show specific suggestions only

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* address additional review feedback

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* address more review comments

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* addressed additional PR review feedback

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* fix file selection references

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* remove guess language data generation option

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add tests for coordinatesForSelection

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* rename relationship attributes

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add descriptions to relationships config fields

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* improve documentation around configuration options

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add explicit errors around legacy config entries

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

---------

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
2024-01-12 17:39:13 -05:00

208 lines
6.2 KiB
Go

package task
import (
"context"
"fmt"
"sort"
"strings"
"unicode"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal/bus"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/cataloging"
"github.com/anchore/syft/syft/cataloging/pkgcataloging"
"github.com/anchore/syft/syft/event/monitor"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common/cpe"
)
type packageTaskFactory func(cfg CatalogingFactoryConfig) Task
type PackageTaskFactories []packageTaskFactory
type CatalogingFactoryConfig struct {
SearchConfig cataloging.SearchConfig
RelationshipsConfig cataloging.RelationshipsConfig
DataGenerationConfig cataloging.DataGenerationConfig
PackagesConfig pkgcataloging.Config
}
func DefaultCatalogingFactoryConfig() CatalogingFactoryConfig {
return CatalogingFactoryConfig{
SearchConfig: cataloging.DefaultSearchConfig(),
RelationshipsConfig: cataloging.DefaultRelationshipsConfig(),
DataGenerationConfig: cataloging.DefaultDataGenerationConfig(),
PackagesConfig: pkgcataloging.DefaultConfig(),
}
}
func newPackageTaskFactory(catalogerFactory func(CatalogingFactoryConfig) pkg.Cataloger, tags ...string) packageTaskFactory {
return func(cfg CatalogingFactoryConfig) Task {
return NewPackageTask(cfg, catalogerFactory(cfg), tags...)
}
}
func newSimplePackageTaskFactory(catalogerFactory func() pkg.Cataloger, tags ...string) packageTaskFactory {
return func(cfg CatalogingFactoryConfig) Task {
return NewPackageTask(cfg, catalogerFactory(), tags...)
}
}
func (f PackageTaskFactories) Tasks(cfg CatalogingFactoryConfig) ([]Task, error) {
var allTasks []Task
taskNames := strset.New()
duplicateTaskNames := strset.New()
var err error
for _, factory := range f {
tsk := factory(cfg)
if tsk == nil {
continue
}
tskName := tsk.Name()
if taskNames.Has(tskName) {
duplicateTaskNames.Add(tskName)
}
allTasks = append(allTasks, tsk)
taskNames.Add(tskName)
}
if duplicateTaskNames.Size() > 0 {
names := duplicateTaskNames.List()
sort.Strings(names)
err = fmt.Errorf("duplicate cataloger task names: %v", strings.Join(names, ", "))
}
return allTasks, err
}
// NewPackageTask creates a Task function for a generic pkg.Cataloger, honoring the common configuration options.
//
//nolint:funlen
func NewPackageTask(cfg CatalogingFactoryConfig, c pkg.Cataloger, tags ...string) Task {
fn := func(ctx context.Context, resolver file.Resolver, sbom sbomsync.Builder) error {
catalogerName := c.Name()
log.WithFields("name", catalogerName).Trace("starting package cataloger")
info := monitor.GenericTask{
Title: monitor.Title{
Default: prettyName(catalogerName),
},
ID: catalogerName,
ParentID: monitor.PackageCatalogingTaskID,
Context: "",
HideOnSuccess: true,
}
t := bus.StartCatalogerTask(info, -1, "")
pkgs, relationships, err := c.Catalog(resolver)
if err != nil {
return fmt.Errorf("unable to catalog packages with %q: %w", c.Name(), err)
}
log.WithFields("cataloger", c.Name()).Debugf("discovered %d packages", len(pkgs))
for i, p := range pkgs {
if cfg.DataGenerationConfig.GenerateCPEs {
// generate CPEs (note: this is excluded from package ID, so is safe to mutate)
// we might have binary classified CPE already with the package so we want to append here
dictionaryCPE, ok := cpe.DictionaryFind(p)
if ok {
log.Tracef("used CPE dictionary to find CPE for %s package %q: %s", p.Type, p.Name, dictionaryCPE.BindToFmtString())
p.CPEs = append(p.CPEs, dictionaryCPE)
} else {
p.CPEs = append(p.CPEs, cpe.Generate(p)...)
}
}
// if we were not able to identify the language we have an opportunity
// to try and get this value from the PURL. Worst case we assert that
// we could not identify the language at either stage and set UnknownLanguage
if p.Language == "" {
p.Language = pkg.LanguageFromPURL(p.PURL)
}
if cfg.RelationshipsConfig.PackageFileOwnership {
// create file-to-package relationships for files owned by the package
owningRelationships, err := packageFileOwnershipRelationships(p, resolver)
if err != nil {
log.Warnf("unable to create any package-file relationships for package name=%q type=%q: %w", p.Name, p.Type, err)
} else {
relationships = append(relationships, owningRelationships...)
}
}
pkgs[i] = p
}
sbom.AddPackages(pkgs...)
sbom.AddRelationships(relationships...)
t.Add(int64(len(pkgs)))
t.SetCompleted()
log.WithFields("name", c.Name()).Trace("package cataloger completed")
return nil
}
tags = append(tags, pkgcataloging.PackageTag)
return NewTask(c.Name(), fn, tags...)
}
func prettyName(s string) string {
if s == "" {
return ""
}
// Convert first character to uppercase
r := []rune(s)
r[0] = unicode.ToUpper(r[0])
return strings.ReplaceAll(string(r), "-", " ")
}
func packageFileOwnershipRelationships(p pkg.Package, resolver file.PathResolver) ([]artifact.Relationship, error) {
fileOwner, ok := p.Metadata.(pkg.FileOwner)
if !ok {
return nil, nil
}
locations := map[artifact.ID]file.Location{}
for _, path := range fileOwner.OwnedFiles() {
pathRefs, err := resolver.FilesByPath(path)
if err != nil {
return nil, fmt.Errorf("unable to find path for path=%q: %w", path, err)
}
if len(pathRefs) == 0 {
// ideally we want to warn users about missing files from a package, however, it is very common for
// container image authors to delete files that are not needed in order to keep image sizes small. Adding
// a warning here would be needlessly noisy (even for popular base images).
continue
}
for _, ref := range pathRefs {
if oldRef, ok := locations[ref.Coordinates.ID()]; ok {
log.Debugf("found path duplicate of %s", oldRef.RealPath)
}
locations[ref.Coordinates.ID()] = ref
}
}
var relationships []artifact.Relationship
for _, location := range locations {
relationships = append(relationships, artifact.Relationship{
From: p,
To: location.Coordinates,
Type: artifact.ContainsRelationship,
})
}
return relationships, nil
}