Alex Goodman a05608a4c8 wire up cli config
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
2025-12-10 12:53:41 -05:00

388 lines
13 KiB
Go

package executable
import (
"bytes"
"context"
"debug/elf"
"debug/macho"
"encoding/binary"
"fmt"
"sort"
"github.com/bmatcuk/doublestar/v4"
"github.com/dustin/go-humanize"
"github.com/anchore/go-sync"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/bus"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/mimetype"
"github.com/anchore/syft/internal/unknown"
"github.com/anchore/syft/syft/cataloging"
"github.com/anchore/syft/syft/event/monitor"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/unionreader"
)
// SymbolCaptureScope defines the scope of symbols to capture from executables. For the meantime only golang binaries are supported,
// however, in the future this can be expanded to include rust audit binaries, libraries only, applications only, or all binaries.
type SymbolCaptureScope string
const SymbolScopeGolang SymbolCaptureScope = "golang" // only binaries built with the golang toolchain
type Config struct {
// MIMETypes are the MIME types that will be considered for executable cataloging.
MIMETypes []string `json:"mime-types" yaml:"mime-types" mapstructure:"mime-types"`
// Globs are the glob patterns that will be used to filter which files are cataloged.
Globs []string `json:"globs" yaml:"globs" mapstructure:"globs"`
// Symbols configures symbol extraction settings.
Symbols SymbolConfig `json:"symbols" yaml:"symbols" mapstructure:"symbols"`
}
// SymbolConfig holds settings related to symbol capturing from executables.
type SymbolConfig struct {
// CaptureScope defines the scope of symbols to capture from executables (all binaries, libraries only, applications only, golang binaries only, or none).
CaptureScope []SymbolCaptureScope `json:"capture" yaml:"capture" mapstructure:"capture"`
// Types are the types of Go symbols to capture, relative to `go tool nm` output (e.g. T, t, R, r, D, d, B, b, C, U, etc).
// If empty, all symbol types are captured.
Types []string
// Go configures Go-specific symbol capturing settings.
Go GoSymbolConfig `json:"go" yaml:"go" mapstructure:"go"`
}
// GoSymbolConfig holds settings specific to capturing symbols from binaries built with the golang toolchain.
type GoSymbolConfig struct {
// StandardLibrary indicates whether to capture Go standard library symbols (e.g. "fmt", "net/http", etc).
StandardLibrary bool `json:"standard-library" yaml:"standard-library" mapstructure:"standard-library"`
// ExtendedStandardLibrary indicates whether to capture extended Go standard library symbols (e.g. "golang.org/x/net", etc).
ExtendedStandardLibrary bool `json:"extended-standard-library" yaml:"extended-standard-library" mapstructure:"extended-standard-library"`
// ThirdPartyModules indicates whether to capture third-party module symbols (e.g. github.com/spf13/cobra, etc).
ThirdPartyModules bool `json:"third-party-modules" yaml:"third-party-modules" mapstructure:"third-party-modules"`
// NormalizeVendoredModules indicates whether to normalize vendored module paths by removing the "vendor/" prefix when capturing third-party module symbols.
NormalizeVendoredModules bool `json:"normalize-vendored-modules" yaml:"normalize-vendored-modules" mapstructure:"normalize-vendored-modules"`
// TypeEqualityFunctions indicates whether to capture type equality functions (e.g. "type..eq..T1..T2") when capturing Go symbols. These are automatically generated by the Go compiler for generic types.
TypeEqualityFunctions bool `json:"type-equality-functions" yaml:"type-equality-functions" mapstructure:"type-equality-functions"`
// GCShapeStencils indicates whether to capture GC shape stencil functions (e.g. "go.shape.*") when capturing Go symbols. These are related to how generics are implemented and are not user defined or directly callable.
GCShapeStencils bool `json:"gc-shape-stencils" yaml:"gc-shape-stencils" mapstructure:"gc-shape-stencils"`
// ExportedSymbols indicates whether to capture only exported (public/global) symbols from Go binaries.
ExportedSymbols bool `json:"exported-symbols" yaml:"exported-symbols" mapstructure:"exported-symbols"`
// UnexportedSymbols indicates whether to capture unexported (private/local) symbols from Go binaries.
UnexportedSymbols bool `json:"unexported-symbols" yaml:"unexported-symbols" mapstructure:"unexported-symbols"`
}
// Validate checks for logical configuration inconsistencies and returns an error if any are found.
func (c Config) Validate() error {
return c.Symbols.Validate()
}
// Validate checks for logical configuration inconsistencies in symbol capture settings.
func (s SymbolConfig) Validate() error {
// validate that all CaptureScope values are valid
for _, scope := range s.CaptureScope {
if !isValidCaptureScope(scope) {
return fmt.Errorf("invalid symbol capture scope %q: valid values are %q", scope, SymbolScopeGolang)
}
}
// validate NM types if specified
if len(s.Types) > 0 {
for _, t := range s.Types {
if !isValidNMType(t) {
return fmt.Errorf("invalid NM type %q: valid values are %v", t, validNMTypes())
}
}
}
// remaining validations only apply when Go symbol capture is enabled
if !s.hasGolangScope() {
return nil
}
// if Go symbol capture is enabled, at least one of exported/unexported must be true
if !s.Go.ExportedSymbols && !s.Go.UnexportedSymbols {
return fmt.Errorf("both exported-symbols and unexported-symbols are disabled; no Go symbols would be captured")
}
// if Go symbol capture is enabled, at least one module source must be enabled
if !s.Go.StandardLibrary && !s.Go.ExtendedStandardLibrary && !s.Go.ThirdPartyModules {
return fmt.Errorf("all module sources (standard-library, extended-standard-library, third-party-modules) are disabled; no meaningful Go symbols would be captured")
}
return nil
}
func (s SymbolConfig) hasGolangScope() bool {
for _, scope := range s.CaptureScope {
if scope == SymbolScopeGolang {
return true
}
}
return false
}
func isValidCaptureScope(scope SymbolCaptureScope) bool {
switch scope { //nolint:gocritic // lets elect a pattern as if we'll have multiple options in the future...
case SymbolScopeGolang:
return true
}
return false
}
type Cataloger struct {
config Config
}
func DefaultConfig() Config {
m := mimetype.ExecutableMIMETypeSet.List()
sort.Strings(m)
return Config{
MIMETypes: m,
Globs: nil,
Symbols: SymbolConfig{
CaptureScope: []SymbolCaptureScope{}, // important! by default we do not capture any symbols unless explicitly configured
Types: []string{"T"}, // by default only capture "T" (text/code) symbols, since vulnerability data tracks accessible function symbols
Go: GoSymbolConfig{
StandardLibrary: true,
ExtendedStandardLibrary: true,
ThirdPartyModules: true,
NormalizeVendoredModules: true,
ExportedSymbols: true,
TypeEqualityFunctions: false, // capturing this adds a lot of noise and have arguably little value
GCShapeStencils: false, // capturing this adds a lot of noise and have arguably little value
UnexportedSymbols: false, // vulnerabilities tend to track only exported symbols
},
},
}
}
func NewCataloger(cfg Config) *Cataloger {
return &Cataloger{
config: cfg,
}
}
func (i *Cataloger) Catalog(resolver file.Resolver) (map[file.Coordinates]file.Executable, error) {
return i.CatalogCtx(context.Background(), resolver)
}
func (i *Cataloger) CatalogCtx(ctx context.Context, resolver file.Resolver) (map[file.Coordinates]file.Executable, error) {
locs, err := resolver.FilesByMIMEType(i.config.MIMETypes...)
if err != nil {
return nil, fmt.Errorf("unable to get file locations for binaries: %w", err)
}
locs, err = filterByGlobs(locs, i.config.Globs)
if err != nil {
return nil, err
}
prog := catalogingProgress(int64(len(locs)))
results := make(map[file.Coordinates]file.Executable)
errs := sync.Collect(&ctx, cataloging.ExecutorFile, sync.ToSeq(locs), func(loc file.Location) (*file.Executable, error) {
prog.AtomicStage.Set(loc.Path())
exec, err := i.processExecutableLocation(loc, resolver)
if err != nil {
err = unknown.New(loc, err)
}
return exec, err
}, func(loc file.Location, exec *file.Executable) {
if exec != nil {
prog.Increment()
results[loc.Coordinates] = *exec
}
})
log.Debugf("executable cataloger processed %d files", len(results))
prog.AtomicStage.Set(fmt.Sprintf("%s executables", humanize.Comma(prog.Current())))
prog.SetCompleted()
return results, errs
}
func (i *Cataloger) processExecutableLocation(loc file.Location, resolver file.Resolver) (*file.Executable, error) {
reader, err := resolver.FileContentsByLocation(loc)
if err != nil {
log.WithFields("error", err, "path", loc.RealPath).Debug("unable to get file contents")
return nil, fmt.Errorf("unable to get file contents: %w", err)
}
defer internal.CloseAndLogError(reader, loc.RealPath)
uReader, err := unionreader.GetUnionReader(reader)
if err != nil {
log.WithFields("error", err, "path", loc.RealPath).Debug("unable to get union reader")
return nil, fmt.Errorf("unable to get union reader: %w", err)
}
return i.processExecutable(loc, uReader)
}
func (i *Cataloger) processExecutable(loc file.Location, reader unionreader.UnionReader) (*file.Executable, error) {
data := file.Executable{}
// determine the executable format
format, err := findExecutableFormat(reader)
if err != nil {
log.Debugf("unable to determine executable kind for %v: %v", loc.RealPath, err)
return nil, fmt.Errorf("unable to determine executable kind: %w", err)
}
if format == "" {
// this is not an "unknown", so just log -- this binary does not have parseable data in it
log.Debugf("unable to determine executable format for %q", loc.RealPath)
return nil, nil
}
data.Format = format
switch format {
case file.ELF:
if err = findELFFeatures(&data, reader, i.config.Symbols); err != nil {
log.WithFields("error", err, "path", loc.RealPath).Trace("unable to determine ELF features")
err = fmt.Errorf("unable to determine ELF features: %w", err)
}
case file.PE:
if err = findPEFeatures(&data, reader); err != nil {
log.WithFields("error", err, "path", loc.RealPath).Trace("unable to determine PE features")
err = fmt.Errorf("unable to determine PE features: %w", err)
}
case file.MachO:
if err = findMachoFeatures(&data, reader, i.config.Symbols); err != nil {
log.WithFields("error", err, "path", loc.RealPath).Trace("unable to determine Macho features")
err = fmt.Errorf("unable to determine Macho features: %w", err)
}
}
// always allocate collections for presentation
if data.ImportedLibraries == nil {
data.ImportedLibraries = []string{}
}
return &data, err
}
func catalogingProgress(locations int64) *monitor.TaskProgress {
info := monitor.GenericTask{
Title: monitor.Title{
Default: "Executables",
},
ParentID: monitor.TopLevelCatalogingTaskID,
}
return bus.StartCatalogerTask(info, locations, "")
}
func filterByGlobs(locs []file.Location, globs []string) ([]file.Location, error) {
if len(globs) == 0 {
return locs, nil
}
var filteredLocs []file.Location
for _, loc := range locs {
matches, err := locationMatchesGlob(loc, globs)
if err != nil {
return nil, err
}
if matches {
filteredLocs = append(filteredLocs, loc)
}
}
return filteredLocs, nil
}
func locationMatchesGlob(loc file.Location, globs []string) (bool, error) {
for _, glob := range globs {
for _, path := range []string{loc.RealPath, loc.AccessPath} {
if path == "" {
continue
}
matches, err := doublestar.Match(glob, path)
if err != nil {
return false, fmt.Errorf("unable to match glob %q to path %q: %w", glob, path, err)
}
if matches {
return true, nil
}
}
}
return false, nil
}
func findExecutableFormat(reader unionreader.UnionReader) (file.ExecutableFormat, error) {
// read the first sector of the file
buf := make([]byte, 512)
n, err := reader.ReadAt(buf, 0)
if err != nil {
return "", fmt.Errorf("unable to read first sector of file: %w", err)
}
if n < 512 {
return "", fmt.Errorf("unable to read enough bytes to determine executable format")
}
switch {
case isMacho(buf):
return file.MachO, nil
case isPE(buf):
return file.PE, nil
case isELF(buf):
return file.ELF, nil
}
return "", nil
}
func isMacho(by []byte) bool {
// sourced from https://github.com/gabriel-vasile/mimetype/blob/02af149c0dfd1444d9256fc33c2012bb3153e1d2/internal/magic/binary.go#L44
if classOrMachOFat(by) && by[7] < 20 {
return true
}
if len(by) < 4 {
return false
}
be := binary.BigEndian.Uint32(by)
le := binary.LittleEndian.Uint32(by)
return be == macho.Magic32 ||
le == macho.Magic32 ||
be == macho.Magic64 ||
le == macho.Magic64
}
// Java bytecode and Mach-O binaries share the same magic number.
// More info here https://github.com/threatstack/libmagic/blob/master/magic/Magdir/cafebabe
func classOrMachOFat(in []byte) bool {
// sourced from https://github.com/gabriel-vasile/mimetype/blob/02af149c0dfd1444d9256fc33c2012bb3153e1d2/internal/magic/binary.go#L44
// There should be at least 8 bytes for both of them because the only way to
// quickly distinguish them is by comparing byte at position 7
if len(in) < 8 {
return false
}
return bytes.HasPrefix(in, []byte{0xCA, 0xFE, 0xBA, 0xBE})
}
func isPE(by []byte) bool {
return bytes.HasPrefix(by, []byte("MZ"))
}
func isELF(by []byte) bool {
return bytes.HasPrefix(by, []byte(elf.ELFMAG))
}