mirror of
https://github.com/anchore/syft.git
synced 2026-02-12 10:36:45 +01:00
388 lines
13 KiB
Go
388 lines
13 KiB
Go
package executable
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"debug/elf"
|
|
"debug/macho"
|
|
"encoding/binary"
|
|
"fmt"
|
|
"sort"
|
|
|
|
"github.com/bmatcuk/doublestar/v4"
|
|
"github.com/dustin/go-humanize"
|
|
|
|
"github.com/anchore/go-sync"
|
|
"github.com/anchore/syft/internal"
|
|
"github.com/anchore/syft/internal/bus"
|
|
"github.com/anchore/syft/internal/log"
|
|
"github.com/anchore/syft/internal/mimetype"
|
|
"github.com/anchore/syft/internal/unknown"
|
|
"github.com/anchore/syft/syft/cataloging"
|
|
"github.com/anchore/syft/syft/event/monitor"
|
|
"github.com/anchore/syft/syft/file"
|
|
"github.com/anchore/syft/syft/internal/unionreader"
|
|
)
|
|
|
|
// SymbolCaptureScope defines the scope of symbols to capture from executables. For the meantime only golang binaries are supported,
|
|
// however, in the future this can be expanded to include rust audit binaries, libraries only, applications only, or all binaries.
|
|
type SymbolCaptureScope string
|
|
|
|
const SymbolScopeGolang SymbolCaptureScope = "golang" // only binaries built with the golang toolchain
|
|
|
|
type Config struct {
|
|
// MIMETypes are the MIME types that will be considered for executable cataloging.
|
|
MIMETypes []string `json:"mime-types" yaml:"mime-types" mapstructure:"mime-types"`
|
|
|
|
// Globs are the glob patterns that will be used to filter which files are cataloged.
|
|
Globs []string `json:"globs" yaml:"globs" mapstructure:"globs"`
|
|
|
|
// Symbols configures symbol extraction settings.
|
|
Symbols SymbolConfig `json:"symbols" yaml:"symbols" mapstructure:"symbols"`
|
|
}
|
|
|
|
// SymbolConfig holds settings related to symbol capturing from executables.
|
|
type SymbolConfig struct {
|
|
// CaptureScope defines the scope of symbols to capture from executables (all binaries, libraries only, applications only, golang binaries only, or none).
|
|
CaptureScope []SymbolCaptureScope `json:"capture" yaml:"capture" mapstructure:"capture"`
|
|
|
|
// Types are the types of Go symbols to capture, relative to `go tool nm` output (e.g. T, t, R, r, D, d, B, b, C, U, etc).
|
|
// If empty, all symbol types are captured.
|
|
Types []string
|
|
|
|
// Go configures Go-specific symbol capturing settings.
|
|
Go GoSymbolConfig `json:"go" yaml:"go" mapstructure:"go"`
|
|
}
|
|
|
|
// GoSymbolConfig holds settings specific to capturing symbols from binaries built with the golang toolchain.
|
|
type GoSymbolConfig struct {
|
|
// StandardLibrary indicates whether to capture Go standard library symbols (e.g. "fmt", "net/http", etc).
|
|
StandardLibrary bool `json:"standard-library" yaml:"standard-library" mapstructure:"standard-library"`
|
|
|
|
// ExtendedStandardLibrary indicates whether to capture extended Go standard library symbols (e.g. "golang.org/x/net", etc).
|
|
ExtendedStandardLibrary bool `json:"extended-standard-library" yaml:"extended-standard-library" mapstructure:"extended-standard-library"`
|
|
|
|
// ThirdPartyModules indicates whether to capture third-party module symbols (e.g. github.com/spf13/cobra, etc).
|
|
ThirdPartyModules bool `json:"third-party-modules" yaml:"third-party-modules" mapstructure:"third-party-modules"`
|
|
|
|
// NormalizeVendoredModules indicates whether to normalize vendored module paths by removing the "vendor/" prefix when capturing third-party module symbols.
|
|
NormalizeVendoredModules bool `json:"normalize-vendored-modules" yaml:"normalize-vendored-modules" mapstructure:"normalize-vendored-modules"`
|
|
|
|
// TypeEqualityFunctions indicates whether to capture type equality functions (e.g. "type..eq..T1..T2") when capturing Go symbols. These are automatically generated by the Go compiler for generic types.
|
|
TypeEqualityFunctions bool `json:"type-equality-functions" yaml:"type-equality-functions" mapstructure:"type-equality-functions"`
|
|
|
|
// GCShapeStencils indicates whether to capture GC shape stencil functions (e.g. "go.shape.*") when capturing Go symbols. These are related to how generics are implemented and are not user defined or directly callable.
|
|
GCShapeStencils bool `json:"gc-shape-stencils" yaml:"gc-shape-stencils" mapstructure:"gc-shape-stencils"`
|
|
|
|
// ExportedSymbols indicates whether to capture only exported (public/global) symbols from Go binaries.
|
|
ExportedSymbols bool `json:"exported-symbols" yaml:"exported-symbols" mapstructure:"exported-symbols"`
|
|
|
|
// UnexportedSymbols indicates whether to capture unexported (private/local) symbols from Go binaries.
|
|
UnexportedSymbols bool `json:"unexported-symbols" yaml:"unexported-symbols" mapstructure:"unexported-symbols"`
|
|
}
|
|
|
|
// Validate checks for logical configuration inconsistencies and returns an error if any are found.
|
|
func (c Config) Validate() error {
|
|
return c.Symbols.Validate()
|
|
}
|
|
|
|
// Validate checks for logical configuration inconsistencies in symbol capture settings.
|
|
func (s SymbolConfig) Validate() error {
|
|
// validate that all CaptureScope values are valid
|
|
for _, scope := range s.CaptureScope {
|
|
if !isValidCaptureScope(scope) {
|
|
return fmt.Errorf("invalid symbol capture scope %q: valid values are %q", scope, SymbolScopeGolang)
|
|
}
|
|
}
|
|
|
|
// validate NM types if specified
|
|
if len(s.Types) > 0 {
|
|
for _, t := range s.Types {
|
|
if !isValidNMType(t) {
|
|
return fmt.Errorf("invalid NM type %q: valid values are %v", t, validNMTypes())
|
|
}
|
|
}
|
|
}
|
|
|
|
// remaining validations only apply when Go symbol capture is enabled
|
|
if !s.hasGolangScope() {
|
|
return nil
|
|
}
|
|
|
|
// if Go symbol capture is enabled, at least one of exported/unexported must be true
|
|
if !s.Go.ExportedSymbols && !s.Go.UnexportedSymbols {
|
|
return fmt.Errorf("both exported-symbols and unexported-symbols are disabled; no Go symbols would be captured")
|
|
}
|
|
|
|
// if Go symbol capture is enabled, at least one module source must be enabled
|
|
if !s.Go.StandardLibrary && !s.Go.ExtendedStandardLibrary && !s.Go.ThirdPartyModules {
|
|
return fmt.Errorf("all module sources (standard-library, extended-standard-library, third-party-modules) are disabled; no meaningful Go symbols would be captured")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s SymbolConfig) hasGolangScope() bool {
|
|
for _, scope := range s.CaptureScope {
|
|
if scope == SymbolScopeGolang {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func isValidCaptureScope(scope SymbolCaptureScope) bool {
|
|
switch scope { //nolint:gocritic // lets elect a pattern as if we'll have multiple options in the future...
|
|
case SymbolScopeGolang:
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
type Cataloger struct {
|
|
config Config
|
|
}
|
|
|
|
func DefaultConfig() Config {
|
|
m := mimetype.ExecutableMIMETypeSet.List()
|
|
sort.Strings(m)
|
|
return Config{
|
|
MIMETypes: m,
|
|
Globs: nil,
|
|
Symbols: SymbolConfig{
|
|
CaptureScope: []SymbolCaptureScope{}, // important! by default we do not capture any symbols unless explicitly configured
|
|
Types: []string{"T"}, // by default only capture "T" (text/code) symbols, since vulnerability data tracks accessible function symbols
|
|
Go: GoSymbolConfig{
|
|
StandardLibrary: true,
|
|
ExtendedStandardLibrary: true,
|
|
ThirdPartyModules: true,
|
|
NormalizeVendoredModules: true,
|
|
ExportedSymbols: true,
|
|
TypeEqualityFunctions: false, // capturing this adds a lot of noise and have arguably little value
|
|
GCShapeStencils: false, // capturing this adds a lot of noise and have arguably little value
|
|
UnexportedSymbols: false, // vulnerabilities tend to track only exported symbols
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
func NewCataloger(cfg Config) *Cataloger {
|
|
return &Cataloger{
|
|
config: cfg,
|
|
}
|
|
}
|
|
|
|
func (i *Cataloger) Catalog(resolver file.Resolver) (map[file.Coordinates]file.Executable, error) {
|
|
return i.CatalogCtx(context.Background(), resolver)
|
|
}
|
|
|
|
func (i *Cataloger) CatalogCtx(ctx context.Context, resolver file.Resolver) (map[file.Coordinates]file.Executable, error) {
|
|
locs, err := resolver.FilesByMIMEType(i.config.MIMETypes...)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unable to get file locations for binaries: %w", err)
|
|
}
|
|
|
|
locs, err = filterByGlobs(locs, i.config.Globs)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
prog := catalogingProgress(int64(len(locs)))
|
|
|
|
results := make(map[file.Coordinates]file.Executable)
|
|
errs := sync.Collect(&ctx, cataloging.ExecutorFile, sync.ToSeq(locs), func(loc file.Location) (*file.Executable, error) {
|
|
prog.AtomicStage.Set(loc.Path())
|
|
|
|
exec, err := i.processExecutableLocation(loc, resolver)
|
|
if err != nil {
|
|
err = unknown.New(loc, err)
|
|
}
|
|
return exec, err
|
|
}, func(loc file.Location, exec *file.Executable) {
|
|
if exec != nil {
|
|
prog.Increment()
|
|
results[loc.Coordinates] = *exec
|
|
}
|
|
})
|
|
|
|
log.Debugf("executable cataloger processed %d files", len(results))
|
|
|
|
prog.AtomicStage.Set(fmt.Sprintf("%s executables", humanize.Comma(prog.Current())))
|
|
prog.SetCompleted()
|
|
|
|
return results, errs
|
|
}
|
|
|
|
func (i *Cataloger) processExecutableLocation(loc file.Location, resolver file.Resolver) (*file.Executable, error) {
|
|
reader, err := resolver.FileContentsByLocation(loc)
|
|
if err != nil {
|
|
log.WithFields("error", err, "path", loc.RealPath).Debug("unable to get file contents")
|
|
return nil, fmt.Errorf("unable to get file contents: %w", err)
|
|
}
|
|
defer internal.CloseAndLogError(reader, loc.RealPath)
|
|
|
|
uReader, err := unionreader.GetUnionReader(reader)
|
|
if err != nil {
|
|
log.WithFields("error", err, "path", loc.RealPath).Debug("unable to get union reader")
|
|
return nil, fmt.Errorf("unable to get union reader: %w", err)
|
|
}
|
|
|
|
return i.processExecutable(loc, uReader)
|
|
}
|
|
|
|
func (i *Cataloger) processExecutable(loc file.Location, reader unionreader.UnionReader) (*file.Executable, error) {
|
|
data := file.Executable{}
|
|
|
|
// determine the executable format
|
|
|
|
format, err := findExecutableFormat(reader)
|
|
if err != nil {
|
|
log.Debugf("unable to determine executable kind for %v: %v", loc.RealPath, err)
|
|
return nil, fmt.Errorf("unable to determine executable kind: %w", err)
|
|
}
|
|
|
|
if format == "" {
|
|
// this is not an "unknown", so just log -- this binary does not have parseable data in it
|
|
log.Debugf("unable to determine executable format for %q", loc.RealPath)
|
|
return nil, nil
|
|
}
|
|
|
|
data.Format = format
|
|
|
|
switch format {
|
|
case file.ELF:
|
|
if err = findELFFeatures(&data, reader, i.config.Symbols); err != nil {
|
|
log.WithFields("error", err, "path", loc.RealPath).Trace("unable to determine ELF features")
|
|
err = fmt.Errorf("unable to determine ELF features: %w", err)
|
|
}
|
|
case file.PE:
|
|
if err = findPEFeatures(&data, reader); err != nil {
|
|
log.WithFields("error", err, "path", loc.RealPath).Trace("unable to determine PE features")
|
|
err = fmt.Errorf("unable to determine PE features: %w", err)
|
|
}
|
|
case file.MachO:
|
|
if err = findMachoFeatures(&data, reader, i.config.Symbols); err != nil {
|
|
log.WithFields("error", err, "path", loc.RealPath).Trace("unable to determine Macho features")
|
|
err = fmt.Errorf("unable to determine Macho features: %w", err)
|
|
}
|
|
}
|
|
|
|
// always allocate collections for presentation
|
|
if data.ImportedLibraries == nil {
|
|
data.ImportedLibraries = []string{}
|
|
}
|
|
|
|
return &data, err
|
|
}
|
|
|
|
func catalogingProgress(locations int64) *monitor.TaskProgress {
|
|
info := monitor.GenericTask{
|
|
Title: monitor.Title{
|
|
Default: "Executables",
|
|
},
|
|
ParentID: monitor.TopLevelCatalogingTaskID,
|
|
}
|
|
|
|
return bus.StartCatalogerTask(info, locations, "")
|
|
}
|
|
|
|
func filterByGlobs(locs []file.Location, globs []string) ([]file.Location, error) {
|
|
if len(globs) == 0 {
|
|
return locs, nil
|
|
}
|
|
var filteredLocs []file.Location
|
|
for _, loc := range locs {
|
|
matches, err := locationMatchesGlob(loc, globs)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if matches {
|
|
filteredLocs = append(filteredLocs, loc)
|
|
}
|
|
}
|
|
return filteredLocs, nil
|
|
}
|
|
|
|
func locationMatchesGlob(loc file.Location, globs []string) (bool, error) {
|
|
for _, glob := range globs {
|
|
for _, path := range []string{loc.RealPath, loc.AccessPath} {
|
|
if path == "" {
|
|
continue
|
|
}
|
|
matches, err := doublestar.Match(glob, path)
|
|
if err != nil {
|
|
return false, fmt.Errorf("unable to match glob %q to path %q: %w", glob, path, err)
|
|
}
|
|
if matches {
|
|
return true, nil
|
|
}
|
|
}
|
|
}
|
|
return false, nil
|
|
}
|
|
|
|
func findExecutableFormat(reader unionreader.UnionReader) (file.ExecutableFormat, error) {
|
|
// read the first sector of the file
|
|
buf := make([]byte, 512)
|
|
n, err := reader.ReadAt(buf, 0)
|
|
if err != nil {
|
|
return "", fmt.Errorf("unable to read first sector of file: %w", err)
|
|
}
|
|
if n < 512 {
|
|
return "", fmt.Errorf("unable to read enough bytes to determine executable format")
|
|
}
|
|
|
|
switch {
|
|
case isMacho(buf):
|
|
return file.MachO, nil
|
|
case isPE(buf):
|
|
return file.PE, nil
|
|
case isELF(buf):
|
|
return file.ELF, nil
|
|
}
|
|
|
|
return "", nil
|
|
}
|
|
|
|
func isMacho(by []byte) bool {
|
|
// sourced from https://github.com/gabriel-vasile/mimetype/blob/02af149c0dfd1444d9256fc33c2012bb3153e1d2/internal/magic/binary.go#L44
|
|
|
|
if classOrMachOFat(by) && by[7] < 20 {
|
|
return true
|
|
}
|
|
|
|
if len(by) < 4 {
|
|
return false
|
|
}
|
|
|
|
be := binary.BigEndian.Uint32(by)
|
|
le := binary.LittleEndian.Uint32(by)
|
|
|
|
return be == macho.Magic32 ||
|
|
le == macho.Magic32 ||
|
|
be == macho.Magic64 ||
|
|
le == macho.Magic64
|
|
}
|
|
|
|
// Java bytecode and Mach-O binaries share the same magic number.
|
|
// More info here https://github.com/threatstack/libmagic/blob/master/magic/Magdir/cafebabe
|
|
func classOrMachOFat(in []byte) bool {
|
|
// sourced from https://github.com/gabriel-vasile/mimetype/blob/02af149c0dfd1444d9256fc33c2012bb3153e1d2/internal/magic/binary.go#L44
|
|
|
|
// There should be at least 8 bytes for both of them because the only way to
|
|
// quickly distinguish them is by comparing byte at position 7
|
|
if len(in) < 8 {
|
|
return false
|
|
}
|
|
|
|
return bytes.HasPrefix(in, []byte{0xCA, 0xFE, 0xBA, 0xBE})
|
|
}
|
|
|
|
func isPE(by []byte) bool {
|
|
return bytes.HasPrefix(by, []byte("MZ"))
|
|
}
|
|
|
|
func isELF(by []byte) bool {
|
|
return bytes.HasPrefix(by, []byte(elf.ELFMAG))
|
|
}
|