syft/syft/create_sbom.go
2025-06-25 16:53:35 -04:00

199 lines
5.8 KiB
Go

package syft
import (
"context"
"fmt"
"runtime"
"sort"
"github.com/dustin/go-humanize"
"github.com/scylladb/go-set/strset"
"github.com/anchore/go-sync"
"github.com/anchore/syft/internal/bus"
"github.com/anchore/syft/internal/licenses"
"github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/internal/task"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/cataloging"
"github.com/anchore/syft/syft/event/monitor"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
"github.com/anchore/syft/syft/source"
)
// CreateSBOM creates a software bill-of-materials from the given source. If the CreateSBOMConfig is nil, then
// default options will be used.
func CreateSBOM(ctx context.Context, src source.Source, cfg *CreateSBOMConfig) (*sbom.SBOM, error) {
if cfg == nil {
cfg = DefaultCreateSBOMConfig()
}
if err := cfg.validate(); err != nil {
return nil, fmt.Errorf("invalid configuration: %w", err)
}
srcMetadata := src.Describe()
taskGroups, audit, err := cfg.makeTaskGroups(srcMetadata)
if err != nil {
return nil, err
}
resolver, err := src.FileResolver(cfg.Search.Scope)
if err != nil {
return nil, fmt.Errorf("unable to get file resolver: %w", err)
}
s := sbom.SBOM{
Source: srcMetadata,
Descriptor: sbom.Descriptor{
Name: cfg.ToolName,
Version: cfg.ToolVersion,
Configuration: configurationAuditTrail{
Search: cfg.Search,
Relationships: cfg.Relationships,
DataGeneration: cfg.DataGeneration,
Packages: cfg.Packages,
Files: cfg.Files,
Licenses: cfg.Licenses,
Catalogers: *audit,
ExtraConfigs: cfg.ToolConfiguration,
},
},
Artifacts: sbom.Artifacts{
Packages: pkg.NewCollection(),
},
}
// setup everything we need in context: license scanner, executors, etc.
ctx, err = setupContext(ctx, cfg)
if err != nil {
return nil, err
}
catalogingProgress := monitorCatalogingTask(src.ID(), taskGroups)
packageCatalogingProgress := monitorPackageCatalogingTask()
builder := sbomsync.NewBuilder(&s, monitorPackageCount(packageCatalogingProgress))
for i := range taskGroups {
err = sync.Collect(&ctx, cataloging.ExecutorFile, sync.ToSeq(taskGroups[i]), func(t task.Task) (any, error) {
return nil, task.RunTask(ctx, t, resolver, builder, catalogingProgress)
}, nil)
if err != nil {
// TODO: tie this to the open progress monitors...
return nil, fmt.Errorf("failed to run tasks: %w", err)
}
}
packageCatalogingProgress.SetCompleted()
catalogingProgress.SetCompleted()
return &s, nil
}
func setupContext(ctx context.Context, cfg *CreateSBOMConfig) (context.Context, error) {
// configure parallel executors
ctx = setContextExecutors(ctx, cfg)
// configure license scanner
// skip injecting a license scanner if one already set on context
if licenses.IsContextLicenseScannerSet(ctx) {
return ctx, nil
}
return SetContextLicenseScanner(ctx, cfg.Licenses)
}
// SetContextLicenseScanner creates and sets a license scanner
// on the provided context using the provided license config.
func SetContextLicenseScanner(ctx context.Context, cfg cataloging.LicenseConfig) (context.Context, error) {
// inject a single license scanner and content config for all package cataloging tasks into context
licenseScanner, err := licenses.NewDefaultScanner(
licenses.WithCoverage(cfg.Coverage),
)
if err != nil {
return nil, fmt.Errorf("could not build licenseScanner for cataloging: %w", err)
}
ctx = licenses.SetContextLicenseScanner(ctx, licenseScanner)
return ctx, nil
}
func setContextExecutors(ctx context.Context, cfg *CreateSBOMConfig) context.Context {
parallelism := 0
if cfg != nil {
parallelism = cfg.Parallelism
}
// executor parallelism is: 0 == serial, no goroutines, 1 == max 1 goroutine
// so if they set 1, we just run in serial to avoid overhead, and treat 0 as default, reasonable max for the system
// negative is unbounded, so no need for any other special handling
switch parallelism {
case 0:
parallelism = runtime.NumCPU() * 4
case 1:
parallelism = 0 // run in serial, don't spawn goroutines
case -99:
parallelism = 1 // special case to catch incorrect executor usage during testing
}
// set up executors for each dimension we want to coordinate bounds for
if !sync.HasContextExecutor(ctx, cataloging.ExecutorCPU) {
ctx = sync.SetContextExecutor(ctx, cataloging.ExecutorCPU, sync.NewExecutor(parallelism))
}
if !sync.HasContextExecutor(ctx, cataloging.ExecutorFile) {
ctx = sync.SetContextExecutor(ctx, cataloging.ExecutorFile, sync.NewExecutor(parallelism))
}
return ctx
}
func monitorPackageCount(prog *monitor.TaskProgress) func(s *sbom.SBOM) {
return func(s *sbom.SBOM) {
count := humanize.Comma(int64(s.Artifacts.Packages.PackageCount()))
prog.AtomicStage.Set(fmt.Sprintf("%s packages", count))
}
}
func monitorPackageCatalogingTask() *monitor.TaskProgress {
info := monitor.GenericTask{
Title: monitor.Title{
Default: "Packages",
},
ID: monitor.PackageCatalogingTaskID,
HideOnSuccess: false,
ParentID: monitor.TopLevelCatalogingTaskID,
}
return bus.StartCatalogerTask(info, -1, "")
}
func monitorCatalogingTask(srcID artifact.ID, tasks [][]task.Task) *monitor.TaskProgress {
info := monitor.GenericTask{
Title: monitor.Title{
Default: "Catalog contents",
WhileRunning: "Cataloging contents",
OnSuccess: "Cataloged contents",
},
ID: monitor.TopLevelCatalogingTaskID,
Context: string(srcID),
HideOnSuccess: false,
}
var length int64
for _, tg := range tasks {
length += int64(len(tg))
}
return bus.StartCatalogerTask(info, length, "")
}
func formatTaskNames(tasks []task.Task) []string {
set := strset.New()
for _, td := range tasks {
if td == nil {
continue
}
set.Add(td.Name())
}
list := set.List()
sort.Strings(list)
return list
}