diff --git a/cmd/syft/internal/options/catalog.go b/cmd/syft/internal/options/catalog.go index e365b7416..ff491b66b 100644 --- a/cmd/syft/internal/options/catalog.go +++ b/cmd/syft/internal/options/catalog.go @@ -16,8 +16,6 @@ import ( "github.com/anchore/syft/syft/cataloging" "github.com/anchore/syft/syft/cataloging/filecataloging" "github.com/anchore/syft/syft/cataloging/pkgcataloging" - "github.com/anchore/syft/syft/file/cataloger/executable" - "github.com/anchore/syft/syft/file/cataloger/filecontent" "github.com/anchore/syft/syft/pkg/cataloger/binary" "github.com/anchore/syft/syft/pkg/cataloger/dotnet" "github.com/anchore/syft/syft/pkg/cataloger/golang" @@ -142,18 +140,14 @@ func (cfg Catalog) ToFilesConfig() filecataloging.Config { log.WithFields("error", err).Warn("unable to configure file hashers") } - return filecataloging.Config{ - Selection: cfg.File.Metadata.Selection, - Hashers: hashers, - Content: filecontent.Config{ - Globs: cfg.File.Content.Globs, - SkipFilesAboveSize: cfg.File.Content.SkipFilesAboveSize, - }, - Executable: executable.Config{ - MIMETypes: executable.DefaultConfig().MIMETypes, - Globs: cfg.File.Executable.Globs, - }, - } + c := filecataloging.DefaultConfig() + c.Selection = cfg.File.Metadata.Selection + c.Hashers = hashers + c.Content.Globs = cfg.File.Content.Globs + c.Content.SkipFilesAboveSize = cfg.File.Content.SkipFilesAboveSize + c.Executable.Globs = cfg.File.Executable.Globs + + return c } func (cfg Catalog) ToLicenseConfig() cataloging.LicenseConfig { diff --git a/syft/file/cataloger/executable/cataloger.go b/syft/file/cataloger/executable/cataloger.go index f6cdb5e65..eb76e1b36 100644 --- a/syft/file/cataloger/executable/cataloger.go +++ b/syft/file/cataloger/executable/cataloger.go @@ -24,9 +24,70 @@ import ( "github.com/anchore/syft/syft/internal/unionreader" ) +type SymbolCaptureScope string + +//type SymbolTypes string + +const ( + SymbolScopeAll SymbolCaptureScope = "all" // any and all binaries + SymbolScopeLibraries SymbolCaptureScope = "libraries" // binaries with exported symbols + SymbolScopeApplications SymbolCaptureScope = "applications" // binaries with an entry point + SymbolScopeGolang SymbolCaptureScope = "golang" // only binaries built with the golang toolchain + SymbolScopeNone SymbolCaptureScope = "none" // do not capture any symbols + + //SymbolTypeCode SymbolTypes = "code" + //SymbolTypeData SymbolTypes = "data" +) + type Config struct { + // MIMETypes are the MIME types that will be considered for executable cataloging. MIMETypes []string `json:"mime-types" yaml:"mime-types" mapstructure:"mime-types"` - Globs []string `json:"globs" yaml:"globs" mapstructure:"globs"` + + // Globs are the glob patterns that will be used to filter which files are cataloged. + Globs []string `json:"globs" yaml:"globs" mapstructure:"globs"` + + // Symbols configures symbol extraction settings. + Symbols SymbolConfig `json:"symbols" yaml:"symbols" mapstructure:"symbols"` +} + +// SymbolConfig holds settings related to symbol capturing from executables. +type SymbolConfig struct { + // CaptureScope defines the scope of symbols to capture from executables (all binaries, libraries only, applications only, golang binaries only, or none). + CaptureScope []SymbolCaptureScope `json:"capture" yaml:"capture" mapstructure:"capture"` + + // Go configures Go-specific symbol capturing settings. + Go GoSymbolConfig `json:"go" yaml:"go" mapstructure:"go"` +} + +// GoSymbolConfig holds settings specific to capturing symbols from binaries built with the golang toolchain. +type GoSymbolConfig struct { + // Types are the types of Go symbols to capture, relative to `go tool nm` output (e.g. T, t, R, r, D, d, B, b, C, U, etc). + // If empty, all symbol types are captured. + Types []string + + // StandardLibrary indicates whether to capture Go standard library symbols (e.g. "fmt", "net/http", etc). + StandardLibrary bool `json:"standard-library" yaml:"standard-library" mapstructure:"standard-library"` + + // ExtendedStandardLibrary indicates whether to capture extended Go standard library symbols (e.g. "golang.org/x/net", etc). + ExtendedStandardLibrary bool `json:"extended-standard-library" yaml:"extended-standard-library" mapstructure:"extended-standard-library"` + + // ThirdPartyModules indicates whether to capture third-party module symbols (e.g. github.com/spf13/cobra, etc). + ThirdPartyModules bool `json:"third-party-modules" yaml:"third-party-modules" mapstructure:"third-party-modules"` + + // NormalizeVendoredModules indicates whether to normalize vendored module paths by removing the "vendor/" prefix when capturing third-party module symbols. + NormalizeVendoredModules bool `json:"normalize-vendored-modules" yaml:"normalize-vendored-modules" mapstructure:"normalize-vendored-modules"` + + // TypeEqualityFunctions indicates whether to capture type equality functions (e.g. "type..eq..T1..T2") when capturing Go symbols. These are automatically generated by the Go compiler for generic types. + TypeEqualityFunctions bool `json:"type-equality-functions" yaml:"type-equality-functions" mapstructure:"type-equality-functions"` + + // GCShapeStencils indicates whether to capture GC shape stencil functions (e.g. "go.shape.*") when capturing Go symbols. These are related to how generics are implemented and are not user defined or directly callable. + GCShapeStencils bool `json:"gc-shape-stencils" yaml:"gc-shape-stencils" mapstructure:"gc-shape-stencils"` + + // ExportedSymbols indicates whether to capture only exported (public/global) symbols from Go binaries. + ExportedSymbols bool `json:"exported-symbols" yaml:"exported-symbols" mapstructure:"exported-symbols"` + + // UnexportedSymbols indicates whether to capture unexported (private/local) symbols from Go binaries. + UnexportedSymbols bool `json:"unexported-symbols" yaml:"unexported-symbols" mapstructure:"unexported-symbols"` } type Cataloger struct { @@ -39,6 +100,22 @@ func DefaultConfig() Config { return Config{ MIMETypes: m, Globs: nil, + Symbols: SymbolConfig{ + CaptureScope: []SymbolCaptureScope{ + SymbolScopeGolang, + }, + Go: GoSymbolConfig{ + Types: []string{"T", "t"}, + StandardLibrary: true, + ExtendedStandardLibrary: true, + ThirdPartyModules: true, + NormalizeVendoredModules: true, + ExportedSymbols: true, + TypeEqualityFunctions: false, // capturing this adds a lot of noise and have arguably little value + GCShapeStencils: false, // capturing this adds a lot of noise and have arguably little value + UnexportedSymbols: false, // vulnerabilities tend to track only exported symbols + }, + }, } } @@ -69,7 +146,7 @@ func (i *Cataloger) CatalogCtx(ctx context.Context, resolver file.Resolver) (map errs := sync.Collect(&ctx, cataloging.ExecutorFile, sync.ToSeq(locs), func(loc file.Location) (*file.Executable, error) { prog.AtomicStage.Set(loc.Path()) - exec, err := processExecutableLocation(loc, resolver) + exec, err := i.processExecutableLocation(loc, resolver) if err != nil { err = unknown.New(loc, err) } @@ -89,7 +166,7 @@ func (i *Cataloger) CatalogCtx(ctx context.Context, resolver file.Resolver) (map return results, errs } -func processExecutableLocation(loc file.Location, resolver file.Resolver) (*file.Executable, error) { +func (i *Cataloger) processExecutableLocation(loc file.Location, resolver file.Resolver) (*file.Executable, error) { reader, err := resolver.FileContentsByLocation(loc) if err != nil { log.WithFields("error", err, "path", loc.RealPath).Debug("unable to get file contents") @@ -103,7 +180,52 @@ func processExecutableLocation(loc file.Location, resolver file.Resolver) (*file return nil, fmt.Errorf("unable to get union reader: %w", err) } - return processExecutable(loc, uReader) + return i.processExecutable(loc, uReader) +} + +func (i *Cataloger) processExecutable(loc file.Location, reader unionreader.UnionReader) (*file.Executable, error) { + data := file.Executable{} + + // determine the executable format + + format, err := findExecutableFormat(reader) + if err != nil { + log.Debugf("unable to determine executable kind for %v: %v", loc.RealPath, err) + return nil, fmt.Errorf("unable to determine executable kind: %w", err) + } + + if format == "" { + // this is not an "unknown", so just log -- this binary does not have parseable data in it + log.Debugf("unable to determine executable format for %q", loc.RealPath) + return nil, nil + } + + data.Format = format + + switch format { + case file.ELF: + if err = findELFFeatures(&data, reader, i.config.Symbols); err != nil { + log.WithFields("error", err, "path", loc.RealPath).Trace("unable to determine ELF features") + err = fmt.Errorf("unable to determine ELF features: %w", err) + } + case file.PE: + if err = findPEFeatures(&data, reader); err != nil { + log.WithFields("error", err, "path", loc.RealPath).Trace("unable to determine PE features") + err = fmt.Errorf("unable to determine PE features: %w", err) + } + case file.MachO: + if err = findMachoFeatures(&data, reader, i.config.Symbols); err != nil { + log.WithFields("error", err, "path", loc.RealPath).Trace("unable to determine Macho features") + err = fmt.Errorf("unable to determine Macho features: %w", err) + } + } + + // always allocate collections for presentation + if data.ImportedLibraries == nil { + data.ImportedLibraries = []string{} + } + + return &data, err } func catalogingProgress(locations int64) *monitor.TaskProgress { @@ -152,51 +274,6 @@ func locationMatchesGlob(loc file.Location, globs []string) (bool, error) { return false, nil } -func processExecutable(loc file.Location, reader unionreader.UnionReader) (*file.Executable, error) { - data := file.Executable{} - - // determine the executable format - - format, err := findExecutableFormat(reader) - if err != nil { - log.Debugf("unable to determine executable kind for %v: %v", loc.RealPath, err) - return nil, fmt.Errorf("unable to determine executable kind: %w", err) - } - - if format == "" { - // this is not an "unknown", so just log -- this binary does not have parseable data in it - log.Debugf("unable to determine executable format for %q", loc.RealPath) - return nil, nil - } - - data.Format = format - - switch format { - case file.ELF: - if err = findELFFeatures(&data, reader); err != nil { - log.WithFields("error", err, "path", loc.RealPath).Trace("unable to determine ELF features") - err = fmt.Errorf("unable to determine ELF features: %w", err) - } - case file.PE: - if err = findPEFeatures(&data, reader); err != nil { - log.WithFields("error", err, "path", loc.RealPath).Trace("unable to determine PE features") - err = fmt.Errorf("unable to determine PE features: %w", err) - } - case file.MachO: - if err = findMachoFeatures(&data, reader); err != nil { - log.WithFields("error", err, "path", loc.RealPath).Trace("unable to determine Macho features") - err = fmt.Errorf("unable to determine Macho features: %w", err) - } - } - - // always allocate collections for presentation - if data.ImportedLibraries == nil { - data.ImportedLibraries = []string{} - } - - return &data, err -} - func findExecutableFormat(reader unionreader.UnionReader) (file.ExecutableFormat, error) { // read the first sector of the file buf := make([]byte, 512) diff --git a/syft/file/cataloger/executable/elf.go b/syft/file/cataloger/executable/elf.go index b9d2205cf..4aebd67fc 100644 --- a/syft/file/cataloger/executable/elf.go +++ b/syft/file/cataloger/executable/elf.go @@ -1,7 +1,9 @@ package executable import ( + "debug/buildinfo" "debug/elf" + "io" "regexp" "strings" @@ -13,7 +15,7 @@ import ( "github.com/anchore/syft/syft/internal/unionreader" ) -func findELFFeatures(data *file.Executable, reader unionreader.UnionReader) error { +func findELFFeatures(data *file.Executable, reader unionreader.UnionReader, cfg SymbolConfig) error { f, err := elf.NewFile(reader) if err != nil { return err @@ -34,10 +36,176 @@ func findELFFeatures(data *file.Executable, reader unionreader.UnionReader) erro data.ELFSecurityFeatures = findELFSecurityFeatures(f) data.HasEntrypoint = elfHasEntrypoint(f) data.HasExports = elfHasExports(f) + data.Toolchains = elfToolchains(reader, f) + if shouldCaptureSymbols(data, cfg) { + data.SymbolNames = elfNMSymbols(f, cfg, data.Toolchains) + } return err } +func elfToolchains(reader unionreader.UnionReader, f *elf.File) []file.Toolchain { + return includeNoneNil( + golangToolchainEvidence(reader), + ) +} + +func shouldCaptureSymbols(data *file.Executable, cfg SymbolConfig) bool { + // TODO: IMPLEMENT ME! + return true +} + +// elfGolangToolchainEvidence attempts to extract Go toolchain information from the ELF file. +func golangToolchainEvidence(reader io.ReaderAt) *file.Toolchain { + bi, err := buildinfo.Read(reader) + if err != nil || bi == nil { + // not a golang binary + return nil + } + return &file.Toolchain{ + Name: "go", + Version: bi.GoVersion, + Kind: file.ToolchainKindCompiler, + } +} + +func includeNoneNil(evidence ...*file.Toolchain) []file.Toolchain { + var toolchains []file.Toolchain + for _, e := range evidence { + if e != nil { + toolchains = append(toolchains, *e) + } + } + return toolchains +} + +func elfNMSymbols(f *elf.File, cfg SymbolConfig, toolchains []file.Toolchain) []string { + if isGoToolchainPresent(toolchains) { + return captureElfGoSymbols(f, cfg) + } + + // TODO: capture other symbol types (non-go) based on the scope selection (lib, app, etc) + return nil +} + +func captureElfGoSymbols(f *elf.File, cfg SymbolConfig) []string { + syms, err := f.Symbols() + if err != nil { + log.WithFields("error", err).Trace("unable to read symbols from elf file") + return nil + } + + var symbols []string + filter := createGoSymbolFilter(cfg.Go) + for _, sym := range syms { + name, include := filter(sym.Name, elfSymbolType(sym, f.Sections)) + if include { + symbols = append(symbols, name) + } + } + return symbols +} + +// elfSymbolType returns the nm-style single character representing the symbol type. +// This mimics the output of `nm` for ELF binaries. +func elfSymbolType(sym elf.Symbol, sections []*elf.Section) string { + binding := elf.ST_BIND(sym.Info) + symType := elf.ST_TYPE(sym.Info) + + // handle special section indices first + switch sym.Section { + case elf.SHN_UNDEF: + // undefined symbols + if binding == elf.STB_WEAK { + if symType == elf.STT_OBJECT { + return "v" // weak object + } + return "w" // weak symbol + } + return "U" // undefined (always uppercase) + + case elf.SHN_ABS: + // absolute symbols + if binding == elf.STB_LOCAL { + return "a" + } + return "A" + + case elf.SHN_COMMON: + // common symbols (uninitialized data) + return "C" // always uppercase per nm convention + } + + // for defined symbols, determine type based on section characteristics + typeChar := elfSectionTypeChar(sym.Section, sections) + + // handle weak symbols + if binding == elf.STB_WEAK { + if typeChar == 'U' || typeChar == 'u' { + if symType == elf.STT_OBJECT { + return "v" + } + return "w" + } + // weak defined symbol + if binding == elf.STB_LOCAL { + return strings.ToLower(string(typeChar)) + } + // use 'W' for weak defined, or 'V' for weak object + if symType == elf.STT_OBJECT { + return "V" + } + return "W" + } + + // local symbols are lowercase, global symbols are uppercase + if binding == elf.STB_LOCAL { + return strings.ToLower(string(typeChar)) + } + return string(typeChar) +} + +// elfSectionTypeChar returns the nm-style character based on section flags and type. +func elfSectionTypeChar(sectIdx elf.SectionIndex, sections []*elf.Section) byte { + idx := int(sectIdx) + // the sections slice from debug/elf includes the NULL section at index 0, so we use idx directly + if idx < 0 || idx >= len(sections) { + return '?' + } + + section := sections[idx] + flags := section.Flags + stype := section.Type + + // check section characteristics to determine symbol type + switch { + case flags&elf.SHF_EXECINSTR != 0: + // executable section -> text + return 'T' + + case stype == elf.SHT_NOBITS: + // uninitialized data section -> BSS + return 'B' + + case flags&elf.SHF_WRITE == 0 && flags&elf.SHF_ALLOC != 0: + // read-only allocated section -> rodata + return 'R' + + case flags&elf.SHF_WRITE != 0 && flags&elf.SHF_ALLOC != 0: + // writable allocated section -> data + return 'D' + + case flags&elf.SHF_ALLOC != 0: + // other allocated section + return 'D' + + default: + // non-allocated sections (debug info, etc.) + // nm typically shows 'n' for debug, but we'll use 'N' for consistency + return 'N' + } +} + func findELFSecurityFeatures(f *elf.File) *file.ELFSecurityFeatures { return &file.ELFSecurityFeatures{ SymbolTableStripped: isElfSymbolTableStripped(f), diff --git a/syft/file/cataloger/executable/elf_test.go b/syft/file/cataloger/executable/elf_test.go index fd3536cca..c846a5da4 100644 --- a/syft/file/cataloger/executable/elf_test.go +++ b/syft/file/cataloger/executable/elf_test.go @@ -226,3 +226,330 @@ func Test_elfHasExports(t *testing.T) { }) } } + +func Test_elfNMSymbols_nonGoReturnsNil(t *testing.T) { + // for non-Go binaries, elfNMSymbols should return nil since we only support Go for now + readerForFixture := func(t *testing.T, fixture string) unionreader.UnionReader { + t.Helper() + f, err := os.Open(filepath.Join("test-fixtures/shared-info", fixture)) + require.NoError(t, err) + return f + } + + f, err := elf.NewFile(readerForFixture(t, "bin/hello_linux")) + require.NoError(t, err) + + // no Go toolchain present + toolchains := []file.Toolchain{} + cfg := SymbolConfig{} + + symbols := elfNMSymbols(f, cfg, toolchains) + assert.Nil(t, symbols, "expected nil symbols for non-Go binary") +} + +func Test_elfGoToolchainDetection(t *testing.T) { + readerForFixture := func(t *testing.T, fixture string) unionreader.UnionReader { + t.Helper() + f, err := os.Open(filepath.Join("test-fixtures/golang", fixture)) + require.NoError(t, err) + return f + } + + tests := []struct { + name string + fixture string + wantPresent bool + }{ + { + name: "go binary has toolchain", + fixture: "bin/hello_linux", + wantPresent: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + reader := readerForFixture(t, tt.fixture) + f, err := elf.NewFile(reader) + require.NoError(t, err) + + toolchains := elfToolchains(reader, f) + assert.Equal(t, tt.wantPresent, isGoToolchainPresent(toolchains)) + + if tt.wantPresent { + require.NotEmpty(t, toolchains) + assert.Equal(t, "go", toolchains[0].Name) + assert.NotEmpty(t, toolchains[0].Version) + assert.Equal(t, file.ToolchainKindCompiler, toolchains[0].Kind) + } + }) + } +} + +func Test_elfGoSymbolCapture(t *testing.T) { + readerForFixture := func(t *testing.T, fixture string) unionreader.UnionReader { + t.Helper() + f, err := os.Open(filepath.Join("test-fixtures/golang", fixture)) + require.NoError(t, err) + return f + } + + tests := []struct { + name string + fixture string + cfg GoSymbolConfig + wantSymbols []string // exact symbol names that must be present + wantMinSymbolCount int + }{ + { + name: "capture all symbol types", + fixture: "bin/hello_linux", + cfg: GoSymbolConfig{ + StandardLibrary: true, + ExtendedStandardLibrary: true, + ThirdPartyModules: true, + ExportedSymbols: true, + UnexportedSymbols: true, + }, + wantSymbols: []string{ + // stdlib - fmt package (used via fmt.Println) + "fmt.(*fmt).fmtInteger", + "fmt.(*pp).doPrintf", + // stdlib - strings package (used via strings.ToUpper) + "strings.ToUpper", + "strings.Map", + // stdlib - encoding/json package (used via json.Marshal) + "encoding/json.Marshal", + // extended stdlib - golang.org/x/text (used via language.English) + "golang.org/x/text/internal/language.Tag.String", + "golang.org/x/text/internal/language.Language.String", + // third-party - go-spew (used via spew.Dump) + "github.com/davecgh/go-spew/spew.(*dumpState).dump", + "github.com/davecgh/go-spew/spew.fdump", + }, + wantMinSymbolCount: 50, + }, + { + name: "capture only third-party symbols", + fixture: "bin/hello_linux", + cfg: GoSymbolConfig{ + ThirdPartyModules: true, + ExportedSymbols: true, + UnexportedSymbols: true, + }, + wantSymbols: []string{ + "github.com/davecgh/go-spew/spew.(*dumpState).dump", + "github.com/davecgh/go-spew/spew.(*formatState).Format", + "github.com/davecgh/go-spew/spew.fdump", + }, + }, + { + name: "capture only extended stdlib symbols", + fixture: "bin/hello_linux", + cfg: GoSymbolConfig{ + ExtendedStandardLibrary: true, + ExportedSymbols: true, + UnexportedSymbols: true, + }, + wantSymbols: []string{ + "golang.org/x/text/internal/language.Tag.String", + "golang.org/x/text/internal/language.Parse", + }, + }, + { + name: "capture with text section types only", + fixture: "bin/hello_linux", + cfg: GoSymbolConfig{ + Types: []string{"T", "t"}, // text section (code) symbols + StandardLibrary: true, + ExtendedStandardLibrary: true, + ThirdPartyModules: true, + ExportedSymbols: true, + UnexportedSymbols: true, + }, + wantSymbols: []string{ + "encoding/json.Marshal", + "strings.ToUpper", + }, + wantMinSymbolCount: 10, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + reader := readerForFixture(t, tt.fixture) + f, err := elf.NewFile(reader) + require.NoError(t, err) + + symbols := captureElfGoSymbols(f, SymbolConfig{Go: tt.cfg}) + symbolSet := make(map[string]struct{}, len(symbols)) + for _, sym := range symbols { + symbolSet[sym] = struct{}{} + } + + if tt.wantMinSymbolCount > 0 { + assert.GreaterOrEqual(t, len(symbols), tt.wantMinSymbolCount, + "expected at least %d symbols, got %d", tt.wantMinSymbolCount, len(symbols)) + } + + for _, want := range tt.wantSymbols { + _, found := symbolSet[want] + assert.True(t, found, "expected symbol %q to be present", want) + } + }) + } +} + +func Test_elfNMSymbols_goReturnsSymbols(t *testing.T) { + // for Go binaries, elfNMSymbols should return symbols when Go toolchain is present + readerForFixture := func(t *testing.T, fixture string) unionreader.UnionReader { + t.Helper() + f, err := os.Open(filepath.Join("test-fixtures/golang", fixture)) + require.NoError(t, err) + return f + } + + reader := readerForFixture(t, "bin/hello_linux") + f, err := elf.NewFile(reader) + require.NoError(t, err) + + toolchains := []file.Toolchain{ + {Name: "go", Version: "1.24", Kind: file.ToolchainKindCompiler}, + } + cfg := SymbolConfig{ + Go: GoSymbolConfig{ + Types: []string{"T", "t"}, + StandardLibrary: true, + ExtendedStandardLibrary: true, + ThirdPartyModules: true, + ExportedSymbols: true, + }, + } + + symbols := elfNMSymbols(f, cfg, toolchains) + assert.NotNil(t, symbols, "expected symbols for Go binary") + assert.NotEmpty(t, symbols, "expected non-empty symbols for Go binary") +} + +func Test_elfSymbolType(t *testing.T) { + tests := []struct { + name string + sym elf.Symbol + sections []*elf.Section + want string + }{ + { + name: "undefined symbol", + sym: elf.Symbol{ + Info: byte(elf.STB_GLOBAL)<<4 | byte(elf.STT_NOTYPE), + Section: elf.SHN_UNDEF, + }, + want: "U", + }, + { + name: "absolute symbol global", + sym: elf.Symbol{ + Info: byte(elf.STB_GLOBAL)<<4 | byte(elf.STT_NOTYPE), + Section: elf.SHN_ABS, + }, + want: "A", + }, + { + name: "absolute symbol local", + sym: elf.Symbol{ + Info: byte(elf.STB_LOCAL)<<4 | byte(elf.STT_NOTYPE), + Section: elf.SHN_ABS, + }, + want: "a", + }, + { + name: "common symbol", + sym: elf.Symbol{ + Info: byte(elf.STB_GLOBAL)<<4 | byte(elf.STT_OBJECT), + Section: elf.SHN_COMMON, + }, + want: "C", + }, + { + name: "weak undefined symbol", + sym: elf.Symbol{ + Info: byte(elf.STB_WEAK)<<4 | byte(elf.STT_NOTYPE), + Section: elf.SHN_UNDEF, + }, + want: "w", + }, + { + name: "weak undefined object", + sym: elf.Symbol{ + Info: byte(elf.STB_WEAK)<<4 | byte(elf.STT_OBJECT), + Section: elf.SHN_UNDEF, + }, + want: "v", + }, + { + name: "text section global", + sym: elf.Symbol{ + Info: byte(elf.STB_GLOBAL)<<4 | byte(elf.STT_FUNC), + Section: 1, + }, + sections: []*elf.Section{ + {SectionHeader: elf.SectionHeader{Type: elf.SHT_NULL}}, // index 0: NULL section + {SectionHeader: elf.SectionHeader{Flags: elf.SHF_ALLOC | elf.SHF_EXECINSTR}}, // index 1: .text + }, + want: "T", + }, + { + name: "text section local", + sym: elf.Symbol{ + Info: byte(elf.STB_LOCAL)<<4 | byte(elf.STT_FUNC), + Section: 1, + }, + sections: []*elf.Section{ + {SectionHeader: elf.SectionHeader{Type: elf.SHT_NULL}}, // index 0: NULL section + {SectionHeader: elf.SectionHeader{Flags: elf.SHF_ALLOC | elf.SHF_EXECINSTR}}, // index 1: .text + }, + want: "t", + }, + { + name: "data section global", + sym: elf.Symbol{ + Info: byte(elf.STB_GLOBAL)<<4 | byte(elf.STT_OBJECT), + Section: 1, + }, + sections: []*elf.Section{ + {SectionHeader: elf.SectionHeader{Type: elf.SHT_NULL}}, // index 0: NULL section + {SectionHeader: elf.SectionHeader{Flags: elf.SHF_ALLOC | elf.SHF_WRITE}}, // index 1: .data + }, + want: "D", + }, + { + name: "bss section global", + sym: elf.Symbol{ + Info: byte(elf.STB_GLOBAL)<<4 | byte(elf.STT_OBJECT), + Section: 1, + }, + sections: []*elf.Section{ + {SectionHeader: elf.SectionHeader{Type: elf.SHT_NULL}}, // index 0: NULL section + {SectionHeader: elf.SectionHeader{Type: elf.SHT_NOBITS, Flags: elf.SHF_ALLOC | elf.SHF_WRITE}}, // index 1: .bss + }, + want: "B", + }, + { + name: "rodata section global", + sym: elf.Symbol{ + Info: byte(elf.STB_GLOBAL)<<4 | byte(elf.STT_OBJECT), + Section: 1, + }, + sections: []*elf.Section{ + {SectionHeader: elf.SectionHeader{Type: elf.SHT_NULL}}, // index 0: NULL section + {SectionHeader: elf.SectionHeader{Flags: elf.SHF_ALLOC}}, // index 1: .rodata (no write flag = read-only) + }, + want: "R", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := elfSymbolType(tt.sym, tt.sections) + assert.Equal(t, tt.want, got) + }) + } +} diff --git a/syft/file/cataloger/executable/go_symbols.go b/syft/file/cataloger/executable/go_symbols.go new file mode 100644 index 000000000..dfd973d6d --- /dev/null +++ b/syft/file/cataloger/executable/go_symbols.go @@ -0,0 +1,201 @@ +package executable + +import ( + "strings" + "unicode" + "unicode/utf8" + + "github.com/scylladb/go-set/strset" +) + +var goNMTypes = []string{ + "T", // text (code) segment symbol + "t", // static text segment symbol + "R", // read-only data segment symbol + "r", // static read-only data segment symbol + "D", // data segment symbol + "d", // static data segment symbol + "B", // bss segment symbol + "b", // static bss segment symbol + "C", // constant address + "U", // referenced but undefined symbol +} + +const ( + vendorPrefix = "vendor/" + extendedStdlibPrefix = "golang.org/x/" + typeEqualityPrefix = "type:.eq." + gcShapeStencilPrefix = "go.shape." +) + +// createGoSymbolFilter creates a filter function for Go symbols based on the provided configuration. This filter function +// returns true if a symbol should be included based on its name and type. This also allows for modification of the symbol name +// if necessary (e.g., normalization of vendored module paths). The returned name is only valid if the boolean is true. +func createGoSymbolFilter(cfg GoSymbolConfig) func(string, string) (string, bool) { + validNmTypes := buildNmTypes(cfg.Types) + + return func(symName, symType string) (string, bool) { + // check if this is a valid type + if !validNmTypes.Has(symType) { + return "", false + } + + // filter out floating point literals and other compiler literals (e.g., $f64.3fceb851eb851eb8) + if isCompilerLiteral(symName) { + return "", false + } + + // filter based on exported/unexported symbol configuration + exported := isExportedSymbol(symName) + if !shouldIncludeByExportStatus(exported, cfg.ExportedSymbols, cfg.UnexportedSymbols) { + return "", false + } + + // handle type equality functions (e.g., type:.eq.myStruct) + if isTypeEqualityFunction(symName) { + if !cfg.TypeEqualityFunctions { + return "", false + } + return symName, true + } + + // handle GC shape stencil functions (e.g., go.shape.func()) + if isGCShapeStencil(symName) { + if !cfg.GCShapeStencils { + return "", false + } + return symName, true + } + + // normalize vendored module paths if configured + symName = normalizeVendoredPath(symName, cfg.NormalizeVendoredModules) + + // determine the package path for classification + pkgPath := extractPackagePath(symName) + + // handle extended stdlib (golang.org/x/*) + if isExtendedStdlib(pkgPath) { + if !cfg.ExtendedStandardLibrary { + return "", false + } + return symName, true + } + + // handle stdlib packages + if isStdlibPackage(pkgPath) { + if !cfg.StandardLibrary { + return "", false + } + return symName, true + } + + // this is a third-party package + if !cfg.ThirdPartyModules { + return "", false + } + return symName, true + } +} + +// buildNmTypes creates a set of valid NM types from the configuration. +// If no types are specified, all default types are used. +func buildNmTypes(types []string) *strset.Set { + tys := strset.New(types...) + if tys.Size() == 0 { + return strset.New(goNMTypes...) + } + + // only allow valid nm types to continue... + return strset.Intersection(strset.New(goNMTypes...), tys) +} + +// isCompilerLiteral checks if a symbol is literal symbol inserted by the compiler. +// This includes floating point literals, int constants, and others. +// These have the format: $f64.3fceb851eb851eb8 or $f32.3f800000 +// where the hex represents the IEEE 754 representation of the value. +func isCompilerLiteral(symName string) bool { + return strings.HasPrefix(symName, "$") +} + +// shouldIncludeByExportStatus determines if a symbol should be included based on its +// export status and the configuration settings for exported/unexported symbols. +func shouldIncludeByExportStatus(exported, includeExported, includeUnexported bool) bool { + if exported && !includeExported { + return false + } + if !exported && !includeUnexported { + return false + } + return true +} + +// isTypeEqualityFunction checks if a symbol is a compiler-generated type equality function. +// These are automatically generated by the Go compiler for generic types +// and have the format: type:.eq.TypeName +func isTypeEqualityFunction(symName string) bool { + return strings.HasPrefix(symName, typeEqualityPrefix) +} + +// isGCShapeStencil checks if a symbol is a GC shape stencil function. +// These are related to how generics are implemented and are not user defined or directly callable. +// They can appear as a prefix (e.g., "go.shape.func()") or within generic type parameters +// (e.g., "slices.partitionCmpFunc[go.shape.struct { ... }]"). +func isGCShapeStencil(symName string) bool { + // check for prefix: go.shape.* + if strings.HasPrefix(symName, gcShapeStencilPrefix) { + return true + } + // check for embedded in generic type parameter: [go.shape.* + return strings.Contains(symName, "["+gcShapeStencilPrefix) +} + +// normalizeVendoredPath removes the "vendor/" prefix from vendored module paths if normalization is enabled. +func normalizeVendoredPath(symName string, normalize bool) string { + if normalize && strings.HasPrefix(symName, vendorPrefix) { + return strings.TrimPrefix(symName, vendorPrefix) + } + return symName +} + +// isVendoredPath checks if a symbol name represents a vendored module path. +func isVendoredPath(symName string) bool { + return strings.HasPrefix(symName, vendorPrefix) +} + +// isExtendedStdlib checks if a package path is part of the Go extended standard library (golang.org/x/*). +func isExtendedStdlib(pkgPath string) bool { + return strings.HasPrefix(pkgPath, extendedStdlibPrefix) +} + +// extractPackagePath extracts the package import path from a symbol name. +// For example, "github.com/foo/bar.Baz" returns "github.com/foo/bar". +func extractPackagePath(symName string) string { + lastDot := strings.LastIndex(symName, ".") + if lastDot == -1 { + return symName + } + return symName[:lastDot] +} + +// isExportedSymbol checks if a symbol is exported (public) by examining if the first +// character of the symbol name (after the last '.') is uppercase. +func isExportedSymbol(symName string) bool { + lastDot := strings.LastIndex(symName, ".") + if lastDot == -1 || lastDot >= len(symName)-1 { + return false + } + firstRune, _ := utf8.DecodeRuneInString(symName[lastDot+1:]) + return unicode.IsUpper(firstRune) +} + +// isStdlibPackage determines if a package path represents a Go standard library package. +// Stdlib packages don't contain a '.' in their path (they use simple names like "fmt", "net/http"). +// Third-party packages start with a domain containing a '.' (e.g., "github.com", "golang.org"). +func isStdlibPackage(pkgPath string) bool { + // the "main" package is treated as stdlib for our purposes + if pkgPath == "main" { + return true + } + // stdlib packages don't contain dots in their import path + return !strings.Contains(pkgPath, ".") +} diff --git a/syft/file/cataloger/executable/go_symbols_test.go b/syft/file/cataloger/executable/go_symbols_test.go new file mode 100644 index 000000000..c7a1a6e63 --- /dev/null +++ b/syft/file/cataloger/executable/go_symbols_test.go @@ -0,0 +1,941 @@ +package executable + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func Test_buildNmTypes(t *testing.T) { + tests := []struct { + name string + types []string + wantSize int + contains []string + }{ + { + name: "empty types uses defaults", + types: nil, + wantSize: len(goNMTypes), + contains: []string{"T", "t", "R", "r", "D", "d", "B", "b", "C", "U"}, + }, + { + name: "custom types", + types: []string{"T", "t"}, + wantSize: 2, + contains: []string{"T", "t"}, + }, + { + name: "invalid types", + types: []string{"T", "t", "m", ",", "thing!"}, + wantSize: 2, + contains: []string{"T", "t"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := buildNmTypes(tt.types) + assert.Equal(t, tt.wantSize, got.Size()) + for _, c := range tt.contains { + assert.True(t, got.Has(c), "expected set to contain %q", c) + } + }) + } +} + +func Test_isCompilerLiteral(t *testing.T) { + tests := []struct { + name string + symName string + want bool + }{ + { + name: "64-bit float literal", + symName: "$f64.3fceb851eb851eb8", + want: true, + }, + { + name: "32-bit float literal", + symName: "$f32.3f800000", + want: true, + }, + { + name: "other dollar prefix", + symName: "$something", + want: true, + }, + { + name: "regular symbol", + symName: "main.main", + want: false, + }, + { + name: "empty string", + symName: "", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isCompilerLiteral(tt.symName) + assert.Equal(t, tt.want, got) + }) + } +} + +func Test_shouldIncludeByExportStatus(t *testing.T) { + tests := []struct { + name string + exported bool + includeExported bool + includeUnexported bool + want bool + }{ + { + name: "exported symbol with both enabled", + exported: true, + includeExported: true, + includeUnexported: true, + want: true, + }, + { + name: "unexported symbol with both enabled", + exported: false, + includeExported: true, + includeUnexported: true, + want: true, + }, + { + name: "exported symbol with only exported enabled", + exported: true, + includeExported: true, + includeUnexported: false, + want: true, + }, + { + name: "unexported symbol with only exported enabled", + exported: false, + includeExported: true, + includeUnexported: false, + want: false, + }, + { + name: "exported symbol with only unexported enabled", + exported: true, + includeExported: false, + includeUnexported: true, + want: false, + }, + { + name: "unexported symbol with only unexported enabled", + exported: false, + includeExported: false, + includeUnexported: true, + want: true, + }, + { + name: "exported symbol with both disabled", + exported: true, + includeExported: false, + includeUnexported: false, + want: false, + }, + { + name: "unexported symbol with both disabled", + exported: false, + includeExported: false, + includeUnexported: false, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := shouldIncludeByExportStatus(tt.exported, tt.includeExported, tt.includeUnexported) + assert.Equal(t, tt.want, got) + }) + } +} + +func Test_isTypeEqualityFunction(t *testing.T) { + tests := []struct { + name string + symName string + want bool + }{ + { + name: "type equality function", + symName: "type:.eq.myStruct", + want: true, + }, + { + name: "type equality with package", + symName: "type:.eq.main.MyType", + want: true, + }, + { + name: "regular function", + symName: "main.main", + want: false, + }, + { + name: "similar but not type equality", + symName: "mytype:.eq.something", + want: false, + }, + { + name: "empty string", + symName: "", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isTypeEqualityFunction(tt.symName) + assert.Equal(t, tt.want, got) + }) + } +} + +func Test_isGCShapeStencil(t *testing.T) { + tests := []struct { + name string + symName string + want bool + }{ + { + name: "gc shape stencil function prefix", + symName: "go.shape.func()", + want: true, + }, + { + name: "gc shape with type prefix", + symName: "go.shape.int", + want: true, + }, + { + name: "gc shape in generic type parameter - struct", + symName: `slices.partitionCmpFunc[go.shape.struct { Key string "json:\"key,omitempty\""; Value go.opentelemetry.io/otel/trace/internal/telemetry.Value "json:\"value,omitempty\"" }]`, + want: true, + }, + { + name: "gc shape in generic type parameter - interface", + symName: "slices.pdqsortCmpFunc[go.shape.interface { Info() (io/fs.FileInfo, error); IsDir() bool; Name() string; Type() io/fs.FileMode }]", + want: true, + }, + { + name: "gc shape in generic - syft location", + symName: `slices.partitionCmpFunc[go.shape.struct { github.com/anchore/syft/syft/file.LocationData "cyclonedx:\"\""; github.com/anchore/syft/syft/file.LocationMetadata "cyclonedx:\"\"" }]`, + want: true, + }, + { + name: "gc shape in generic - rotate", + symName: "slices.rotateCmpFunc[go.shape.struct { Key go.opentelemetry.io/otel/attribute.Key; Value go.opentelemetry.io/otel/attribute.Value }]", + want: true, + }, + { + name: "regular function", + symName: "main.main", + want: false, + }, + { + name: "go package but not shape", + symName: "go.string.something", + want: false, + }, + { + name: "generic without go.shape", + symName: "slices.Sort[int]", + want: false, + }, + { + name: "go.shape in comment or string would not match", + symName: "mypackage.FuncWithComment", + want: false, + }, + { + name: "empty string", + symName: "", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isGCShapeStencil(tt.symName) + assert.Equal(t, tt.want, got) + }) + } +} + +func Test_normalizeVendoredPath(t *testing.T) { + tests := []struct { + name string + symName string + normalize bool + want string + }{ + { + name: "vendored path with normalization enabled", + symName: "vendor/github.com/foo/bar.Baz", + normalize: true, + want: "github.com/foo/bar.Baz", + }, + { + name: "vendored path with normalization disabled", + symName: "vendor/github.com/foo/bar.Baz", + normalize: false, + want: "vendor/github.com/foo/bar.Baz", + }, + { + name: "non-vendored path with normalization enabled", + symName: "github.com/foo/bar.Baz", + normalize: true, + want: "github.com/foo/bar.Baz", + }, + { + name: "non-vendored path with normalization disabled", + symName: "github.com/foo/bar.Baz", + normalize: false, + want: "github.com/foo/bar.Baz", + }, + { + name: "stdlib path with normalization enabled", + symName: "fmt.Println", + normalize: true, + want: "fmt.Println", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := normalizeVendoredPath(tt.symName, tt.normalize) + assert.Equal(t, tt.want, got) + }) + } +} + +func Test_isVendoredPath(t *testing.T) { + tests := []struct { + name string + symName string + want bool + }{ + { + name: "vendored third-party", + symName: "vendor/github.com/foo/bar.Baz", + want: true, + }, + { + name: "non-vendored third-party", + symName: "github.com/foo/bar.Baz", + want: false, + }, + { + name: "stdlib", + symName: "fmt.Println", + want: false, + }, + { + name: "main package", + symName: "main.main", + want: false, + }, + { + name: "empty string", + symName: "", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isVendoredPath(tt.symName) + assert.Equal(t, tt.want, got) + }) + } +} + +func Test_isExtendedStdlib(t *testing.T) { + tests := []struct { + name string + pkgPath string + want bool + }{ + { + name: "golang.org/x/net", + pkgPath: "golang.org/x/net", + want: true, + }, + { + name: "golang.org/x/text/encoding", + pkgPath: "golang.org/x/text/encoding", + want: true, + }, + { + name: "golang.org/x/sys/unix", + pkgPath: "golang.org/x/sys/unix", + want: true, + }, + { + name: "regular golang.org package", + pkgPath: "golang.org/protobuf", + want: false, + }, + { + name: "github package", + pkgPath: "github.com/foo/bar", + want: false, + }, + { + name: "stdlib", + pkgPath: "fmt", + want: false, + }, + { + name: "empty string", + pkgPath: "", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isExtendedStdlib(tt.pkgPath) + assert.Equal(t, tt.want, got) + }) + } +} + +func Test_extractPackagePath(t *testing.T) { + tests := []struct { + name string + symName string + want string + }{ + { + name: "simple package", + symName: "fmt.Println", + want: "fmt", + }, + { + name: "nested stdlib package", + symName: "net/http.ListenAndServe", + want: "net/http", + }, + { + name: "third-party package", + symName: "github.com/foo/bar.Baz", + want: "github.com/foo/bar", + }, + { + name: "deep third-party package", + symName: "github.com/foo/bar/pkg/util.Helper", + want: "github.com/foo/bar/pkg/util", + }, + { + name: "main package", + symName: "main.main", + want: "main", + }, + { + name: "no dot (just package name)", + symName: "fmt", + want: "fmt", + }, + { + name: "empty string", + symName: "", + want: "", + }, + { + name: "method with receiver", + symName: "github.com/foo/bar.(*MyType).Method", + want: "github.com/foo/bar.(*MyType)", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := extractPackagePath(tt.symName) + assert.Equal(t, tt.want, got) + }) + } +} + +func Test_isExportedSymbol(t *testing.T) { + tests := []struct { + name string + symName string + want bool + }{ + { + name: "exported function", + symName: "fmt.Println", + want: true, + }, + { + name: "unexported function", + symName: "fmt.println", + want: false, + }, + { + name: "exported in main", + symName: "main.Main", + want: true, + }, + { + name: "unexported main", + symName: "main.main", + want: false, + }, + { + name: "exported third-party", + symName: "github.com/foo/bar.Export", + want: true, + }, + { + name: "unexported third-party", + symName: "github.com/foo/bar.private", + want: false, + }, + { + name: "unicode uppercase", + symName: "main.Über", + want: true, + }, + { + name: "unicode lowercase", + symName: "main.über", + want: false, + }, + { + name: "no dot", + symName: "nodot", + want: false, + }, + { + name: "empty string", + symName: "", + want: false, + }, + { + name: "dot at end", + symName: "main.", + want: false, + }, + { + name: "underscore start (unexported)", + symName: "main._private", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isExportedSymbol(tt.symName) + assert.Equal(t, tt.want, got) + }) + } +} + +func Test_isStdlibPackage(t *testing.T) { + tests := []struct { + name string + pkgPath string + want bool + }{ + { + name: "fmt", + pkgPath: "fmt", + want: true, + }, + { + name: "net/http", + pkgPath: "net/http", + want: true, + }, + { + name: "crypto/sha256", + pkgPath: "crypto/sha256", + want: true, + }, + { + name: "main", + pkgPath: "main", + want: true, + }, + { + name: "runtime", + pkgPath: "runtime", + want: true, + }, + { + name: "github.com third-party", + pkgPath: "github.com/foo/bar", + want: false, + }, + { + name: "golang.org/x extended stdlib", + pkgPath: "golang.org/x/net", + want: false, + }, + { + name: "gopkg.in third-party", + pkgPath: "gopkg.in/yaml.v3", + want: false, + }, + { + name: "empty string", + pkgPath: "", + want: true, // no dots means stdlib by our heuristic + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isStdlibPackage(tt.pkgPath) + assert.Equal(t, tt.want, got) + }) + } +} + +func Test_createGoSymbolFilter(t *testing.T) { + tests := []struct { + name string + cfg GoSymbolConfig + symName string + symType string + wantName string + keep bool + }{ + // NM type filtering + { + name: "valid NM type with defaults", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: true, + StandardLibrary: true, + }, + symName: "fmt.Println", + symType: "T", + wantName: "fmt.Println", + keep: true, + }, + { + name: "invalid NM type with defaults", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: true, + StandardLibrary: true, + }, + symName: "fmt.Println", + symType: "X", // important! + wantName: "", + keep: false, + }, + { + name: "custom NM types - included", + cfg: GoSymbolConfig{ + Types: []string{"T"}, + ExportedSymbols: true, + UnexportedSymbols: true, + StandardLibrary: true, + }, + symName: "fmt.Println", + symType: "T", + wantName: "fmt.Println", + keep: true, + }, + { + name: "custom NM types - excluded", + cfg: GoSymbolConfig{ + Types: []string{"T"}, + ExportedSymbols: true, + UnexportedSymbols: true, + StandardLibrary: true, + }, + symName: "fmt.Println", + symType: "t", + wantName: "", + keep: false, + }, + + // floating point literal filtering + { + name: "floating point literal filtered", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: true, + StandardLibrary: true, + }, + symName: "$f64.3fceb851eb851eb8", + symType: "R", + wantName: "", + keep: false, + }, + + // export status filtering + { + name: "exported symbol with only exported enabled", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: false, + StandardLibrary: true, + }, + symName: "fmt.Println", + symType: "T", + wantName: "fmt.Println", + keep: true, + }, + { + name: "unexported symbol with only exported enabled", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: false, + StandardLibrary: true, + }, + symName: "fmt.println", + symType: "T", + wantName: "", + keep: false, + }, + + // type equality functions + { + name: "type equality function - enabled", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: true, + TypeEqualityFunctions: true, + }, + symName: "type:.eq.myStruct", + symType: "T", + wantName: "type:.eq.myStruct", + keep: true, + }, + { + name: "type equality function - disabled", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: true, + TypeEqualityFunctions: false, + }, + symName: "type:.eq.myStruct", + symType: "T", + wantName: "", + keep: false, + }, + + // GC shape stencils + { + name: "gc shape stencil - enabled", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: true, + GCShapeStencils: true, + }, + symName: "go.shape.func()", + symType: "T", + wantName: "go.shape.func()", + keep: true, + }, + { + name: "gc shape stencil - disabled", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: true, + GCShapeStencils: false, + }, + symName: "go.shape.func()", + symType: "T", + wantName: "", + keep: false, + }, + { + name: "gc shape stencil embedded in generic - enabled", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: true, + GCShapeStencils: true, + }, + symName: "slices.partitionCmpFunc[go.shape.struct { Key string; Value int }]", + symType: "T", + wantName: "slices.partitionCmpFunc[go.shape.struct { Key string; Value int }]", + keep: true, + }, + { + name: "gc shape stencil embedded in generic - disabled", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: true, + GCShapeStencils: false, + }, + symName: "slices.partitionCmpFunc[go.shape.struct { Key string; Value int }]", + symType: "T", + wantName: "", + keep: false, + }, + + // vendored module normalization + { + name: "vendored path - normalization enabled", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: true, + ThirdPartyModules: true, + NormalizeVendoredModules: true, + }, + symName: "vendor/github.com/foo/bar.Baz", + symType: "T", + wantName: "github.com/foo/bar.Baz", + keep: true, + }, + { + name: "vendored path - normalization disabled", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: true, + ThirdPartyModules: true, + NormalizeVendoredModules: false, + }, + symName: "vendor/github.com/foo/bar.Baz", + symType: "T", + wantName: "vendor/github.com/foo/bar.Baz", + keep: true, + }, + + // extended stdlib + { + name: "extended stdlib - enabled", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: true, + ExtendedStandardLibrary: true, + }, + symName: "golang.org/x/net/html.Parse", + symType: "T", + wantName: "golang.org/x/net/html.Parse", + keep: true, + }, + { + name: "extended stdlib - disabled", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: true, + ExtendedStandardLibrary: false, + }, + symName: "golang.org/x/net/html.Parse", + symType: "T", + wantName: "", + keep: false, + }, + + // stdlib + { + name: "stdlib - enabled", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: true, + StandardLibrary: true, + }, + symName: "fmt.Println", + symType: "T", + wantName: "fmt.Println", + keep: true, + }, + { + name: "stdlib - disabled", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: true, + StandardLibrary: false, + }, + symName: "fmt.Println", + symType: "T", + wantName: "", + keep: false, + }, + { + name: "nested stdlib - enabled", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: true, + StandardLibrary: true, + }, + symName: "net/http.ListenAndServe", + symType: "T", + wantName: "net/http.ListenAndServe", + keep: true, + }, + + // third party + { + name: "third party - enabled", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: true, + ThirdPartyModules: true, + }, + symName: "github.com/spf13/cobra.Command", + symType: "T", + wantName: "github.com/spf13/cobra.Command", + keep: true, + }, + { + name: "third party - disabled", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: true, + ThirdPartyModules: false, + }, + symName: "github.com/spf13/cobra.Command", + symType: "T", + wantName: "", + keep: false, + }, + + // main package (treated as stdlib) + { + name: "main package - stdlib enabled", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: true, + StandardLibrary: true, + }, + symName: "main.main", + symType: "T", + wantName: "main.main", + keep: true, + }, + { + name: "main package - stdlib disabled", + cfg: GoSymbolConfig{ + ExportedSymbols: true, + UnexportedSymbols: true, + StandardLibrary: false, + }, + symName: "main.main", + symType: "T", + wantName: "", + keep: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + filter := createGoSymbolFilter(tt.cfg) + require.NotNil(t, filter) + + gotName, gotKeep := filter(tt.symName, tt.symType) + assert.Equal(t, tt.keep, gotKeep) + if gotKeep { + assert.Equal(t, tt.wantName, gotName) + } + }) + } +} diff --git a/syft/file/cataloger/executable/macho.go b/syft/file/cataloger/executable/macho.go index 7cd80f7e5..179a9ed0d 100644 --- a/syft/file/cataloger/executable/macho.go +++ b/syft/file/cataloger/executable/macho.go @@ -10,14 +10,24 @@ import ( // source http://www.cilinder.be/docs/next/NeXTStep/3.3/nd/DevTools/14_MachO/MachO.htmld/index.html const ( - machoNPExt uint8 = 0x10 /* N_PEXT: private external symbol bit */ - machoNExt uint8 = 0x01 /* N_EXT: external symbol bit, set for external symbols */ + machoNStab uint8 = 0xe0 // N_STAB mask for debugging symbols + machoNPExt uint8 = 0x10 // N_PEXT: private external symbol bit + machoNType uint8 = 0x0e // N_TYPE mask for symbol type + machoNExt uint8 = 0x01 // N_EXT: external symbol bit + + // N_TYPE values (after masking with 0x0e) + machoNUndf uint8 = 0x00 // undefined symbol + machoNAbs uint8 = 0x02 // absolute symbol + machoNSect uint8 = 0x0e // defined in section + machoNPbud uint8 = 0x0c // prebound undefined + machoNIndr uint8 = 0x0a // indirect symbol + // > #define LC_REQ_DYLD 0x80000000 // > #define LC_MAIN (0x28|LC_REQ_DYLD) /* replacement for LC_UNIXTHREAD */ lcMain = 0x28 | 0x80000000 ) -func findMachoFeatures(data *file.Executable, reader unionreader.UnionReader) error { +func findMachoFeatures(data *file.Executable, reader unionreader.UnionReader, cfg SymbolConfig) error { // TODO: support security features // a universal binary may have multiple architectures, so we need to check each one @@ -26,7 +36,7 @@ func findMachoFeatures(data *file.Executable, reader unionreader.UnionReader) er return err } - var libs []string + var libs, symbols []string for _, r := range readers { f, err := macho.NewFile(r) if err != nil { @@ -48,14 +58,113 @@ func findMachoFeatures(data *file.Executable, reader unionreader.UnionReader) er if !data.HasExports { data.HasExports = machoHasExports(f) } + + data.Toolchains = machoToolchains(reader, f) + if shouldCaptureSymbols(data, cfg) { + symbols = machoNMSymbols(f, cfg, data.Toolchains) + } } - // de-duplicate libraries + // de-duplicate libraries andn symbols data.ImportedLibraries = internal.NewSet(libs...).ToSlice() + data.SymbolNames = internal.NewSet(symbols...).ToSlice() return nil } +func machoToolchains(reader unionreader.UnionReader, f *macho.File) []file.Toolchain { + return includeNoneNil( + golangToolchainEvidence(reader), + ) +} + +func machoNMSymbols(f *macho.File, cfg SymbolConfig, toolchains []file.Toolchain) []string { + if isGoToolchainPresent(toolchains) { + return captureMachoGoSymbols(f, cfg) + } + + // TODO: capture other symbol types (non-go) based on the scope selection (lib, app, etc) + return nil +} + +func captureMachoGoSymbols(f *macho.File, cfg SymbolConfig) []string { + var symbols []string + filter := createGoSymbolFilter(cfg.Go) + for _, sym := range f.Symtab.Syms { + name, include := filter(sym.Name, machoSymbolType(sym, f.Sections)) + if include { + symbols = append(symbols, name) + } + } + return symbols +} + +func isGoToolchainPresent(toolchains []file.Toolchain) bool { + for _, tc := range toolchains { + if tc.Name == "go" { + return true + } + } + return false +} + +func machoSymbolType(s macho.Symbol, sections []*macho.Section) string { + // stab (debugging) symbols get '-' + if s.Type&machoNStab != 0 { + return "-" + } + + isExternal := s.Type&machoNExt != 0 + symType := s.Type & machoNType + + var typeChar byte + switch symType { + case machoNUndf, machoNPbud: + typeChar = 'U' + case machoNAbs: + typeChar = 'A' + case machoNSect: + typeChar = machoSectionTypeChar(s.Sect, sections) + case machoNIndr: + typeChar = 'I' + default: + typeChar = '?' + } + + // lowercase for local symbols, uppercase for external + if !isExternal && typeChar != '-' && typeChar != '?' { + typeChar = typeChar + 32 // convert to lowercase + } + + return string(typeChar) +} + +// machoSectionTypeChar returns the nm-style character for a section-defined symbol. +// Section numbers are 1-based; 0 means NO_SECT. +func machoSectionTypeChar(sect uint8, sections []*macho.Section) byte { + if sect == 0 || int(sect) > len(sections) { + return 'S' + } + + section := sections[sect-1] + seg := section.Seg + + // match nm behavior based on segment and section names + switch seg { + case "__TEXT": + return 'T' + case "__DATA", "__DATA_CONST": + switch section.Name { + case "__bss", "__common": + return 'B' + default: + return 'D' + } + default: + return 'S' + } +} + func machoHasEntrypoint(f *macho.File) bool { // derived from struct entry_point_command found from which explicitly calls out LC_MAIN: // https://opensource.apple.com/source/xnu/xnu-2050.18.24/EXTERNAL_HEADERS/mach-o/loader.h diff --git a/syft/file/cataloger/executable/macho_test.go b/syft/file/cataloger/executable/macho_test.go index ed8816671..568cd12b4 100644 --- a/syft/file/cataloger/executable/macho_test.go +++ b/syft/file/cataloger/executable/macho_test.go @@ -112,7 +112,7 @@ func Test_machoUniversal(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { var data file.Executable - err := findMachoFeatures(&data, readerForFixture(t, tt.fixture)) + err := findMachoFeatures(&data, readerForFixture(t, tt.fixture), SymbolConfig{}) require.NoError(t, err) assert.Equal(t, tt.want.HasEntrypoint, data.HasEntrypoint) @@ -120,3 +120,341 @@ func Test_machoUniversal(t *testing.T) { }) } } + +func Test_machoNMSymbols_nonGoReturnsNil(t *testing.T) { + // for non-Go binaries, machoNMSymbols should return nil since we only support Go for now + readerForFixture := func(t *testing.T, fixture string) unionreader.UnionReader { + t.Helper() + f, err := os.Open(filepath.Join("test-fixtures/shared-info", fixture)) + require.NoError(t, err) + return f + } + + f, err := macho.NewFile(readerForFixture(t, "bin/hello_mac")) + require.NoError(t, err) + + // no Go toolchain present + toolchains := []file.Toolchain{} + cfg := SymbolConfig{} + + symbols := machoNMSymbols(f, cfg, toolchains) + assert.Nil(t, symbols, "expected nil symbols for non-Go binary") +} + +func Test_machoGoToolchainDetection(t *testing.T) { + readerForFixture := func(t *testing.T, fixture string) unionreader.UnionReader { + t.Helper() + f, err := os.Open(filepath.Join("test-fixtures/golang", fixture)) + require.NoError(t, err) + return f + } + + tests := []struct { + name string + fixture string + wantPresent bool + }{ + { + name: "go binary has toolchain", + fixture: "bin/hello_mac", + wantPresent: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + reader := readerForFixture(t, tt.fixture) + f, err := macho.NewFile(reader) + require.NoError(t, err) + + toolchains := machoToolchains(reader, f) + assert.Equal(t, tt.wantPresent, isGoToolchainPresent(toolchains)) + + if tt.wantPresent { + require.NotEmpty(t, toolchains) + assert.Equal(t, "go", toolchains[0].Name) + assert.NotEmpty(t, toolchains[0].Version) + assert.Equal(t, file.ToolchainKindCompiler, toolchains[0].Kind) + } + }) + } +} + +func Test_machoGoSymbolCapture(t *testing.T) { + readerForFixture := func(t *testing.T, fixture string) unionreader.UnionReader { + t.Helper() + f, err := os.Open(filepath.Join("test-fixtures/golang", fixture)) + require.NoError(t, err) + return f + } + + tests := []struct { + name string + fixture string + cfg GoSymbolConfig + wantSymbols []string // exact symbol names that must be present + wantMinSymbolCount int + }{ + { + name: "capture all symbol types", + fixture: "bin/hello_mac", + cfg: GoSymbolConfig{ + StandardLibrary: true, + ExtendedStandardLibrary: true, + ThirdPartyModules: true, + ExportedSymbols: true, + UnexportedSymbols: true, + }, + wantSymbols: []string{ + // stdlib - fmt package (used via fmt.Println) + "fmt.(*fmt).fmtInteger", + "fmt.(*pp).doPrintf", + // stdlib - strings package (used via strings.ToUpper) + "strings.ToUpper", + "strings.Map", + // stdlib - encoding/json package (used via json.Marshal) + "encoding/json.Marshal", + // extended stdlib - golang.org/x/text (used via language.English) + "golang.org/x/text/internal/language.Tag.String", + "golang.org/x/text/internal/language.Language.String", + // third-party - go-spew (used via spew.Dump) + "github.com/davecgh/go-spew/spew.(*dumpState).dump", + "github.com/davecgh/go-spew/spew.fdump", + }, + wantMinSymbolCount: 50, + }, + { + name: "capture only third-party symbols", + fixture: "bin/hello_mac", + cfg: GoSymbolConfig{ + ThirdPartyModules: true, + ExportedSymbols: true, + UnexportedSymbols: true, + }, + wantSymbols: []string{ + "github.com/davecgh/go-spew/spew.(*dumpState).dump", + "github.com/davecgh/go-spew/spew.(*formatState).Format", + "github.com/davecgh/go-spew/spew.fdump", + }, + }, + { + name: "capture only extended stdlib symbols", + fixture: "bin/hello_mac", + cfg: GoSymbolConfig{ + ExtendedStandardLibrary: true, + ExportedSymbols: true, + UnexportedSymbols: true, + }, + wantSymbols: []string{ + "golang.org/x/text/internal/language.Tag.String", + "golang.org/x/text/internal/language.Parse", + }, + }, + { + name: "capture with text section types only", + fixture: "bin/hello_mac", + cfg: GoSymbolConfig{ + Types: []string{"T", "t"}, // text section (code) symbols + StandardLibrary: true, + ExtendedStandardLibrary: true, + ThirdPartyModules: true, + ExportedSymbols: true, + UnexportedSymbols: true, + }, + wantSymbols: []string{ + "encoding/json.Marshal", + "strings.ToUpper", + }, + wantMinSymbolCount: 10, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + reader := readerForFixture(t, tt.fixture) + f, err := macho.NewFile(reader) + require.NoError(t, err) + + symbols := captureMachoGoSymbols(f, SymbolConfig{Go: tt.cfg}) + symbolSet := make(map[string]struct{}, len(symbols)) + for _, sym := range symbols { + symbolSet[sym] = struct{}{} + } + + if tt.wantMinSymbolCount > 0 { + assert.GreaterOrEqual(t, len(symbols), tt.wantMinSymbolCount, + "expected at least %d symbols, got %d", tt.wantMinSymbolCount, len(symbols)) + } + + for _, want := range tt.wantSymbols { + _, found := symbolSet[want] + assert.True(t, found, "expected symbol %q to be present", want) + } + }) + } +} + +func Test_machoNMSymbols_goReturnsSymbols(t *testing.T) { + // for Go binaries, machoNMSymbols should return symbols when Go toolchain is present + readerForFixture := func(t *testing.T, fixture string) unionreader.UnionReader { + t.Helper() + f, err := os.Open(filepath.Join("test-fixtures/golang", fixture)) + require.NoError(t, err) + return f + } + + reader := readerForFixture(t, "bin/hello_mac") + f, err := macho.NewFile(reader) + require.NoError(t, err) + + toolchains := []file.Toolchain{ + {Name: "go", Version: "1.24", Kind: file.ToolchainKindCompiler}, + } + cfg := SymbolConfig{ + Go: GoSymbolConfig{ + Types: []string{"T", "t"}, + StandardLibrary: true, + ExtendedStandardLibrary: true, + ThirdPartyModules: true, + ExportedSymbols: true, + }, + } + + symbols := machoNMSymbols(f, cfg, toolchains) + assert.NotNil(t, symbols, "expected symbols for Go binary") + assert.NotEmpty(t, symbols, "expected non-empty symbols for Go binary") +} + +func Test_machoSymbolType(t *testing.T) { + // create minimal sections for testing + textSection := &macho.Section{SectionHeader: macho.SectionHeader{Seg: "__TEXT"}} + dataSection := &macho.Section{SectionHeader: macho.SectionHeader{Seg: "__DATA"}} + bssSection := &macho.Section{SectionHeader: macho.SectionHeader{Seg: "__DATA", Name: "__bss"}} + + tests := []struct { + name string + sym macho.Symbol + sections []*macho.Section + want string + }{ + { + name: "undefined external symbol", + sym: macho.Symbol{ + Type: machoNExt, // external, undefined (N_TYPE = 0 = N_UNDF) + }, + want: "U", + }, + { + name: "absolute external symbol", + sym: macho.Symbol{ + Type: machoNExt | machoNAbs, // external, absolute + }, + want: "A", + }, + { + name: "absolute local symbol", + sym: macho.Symbol{ + Type: machoNAbs, // local, absolute + }, + want: "a", + }, + { + name: "text section external", + sym: macho.Symbol{ + Type: machoNExt | machoNSect, // external, section-defined + Sect: 1, + }, + sections: []*macho.Section{textSection}, + want: "T", + }, + { + name: "text section local", + sym: macho.Symbol{ + Type: machoNSect, // local, section-defined + Sect: 1, + }, + sections: []*macho.Section{textSection}, + want: "t", + }, + { + name: "data section external", + sym: macho.Symbol{ + Type: machoNExt | machoNSect, + Sect: 1, + }, + sections: []*macho.Section{dataSection}, + want: "D", + }, + { + name: "bss section external", + sym: macho.Symbol{ + Type: machoNExt | machoNSect, + Sect: 1, + }, + sections: []*macho.Section{bssSection}, + want: "B", + }, + { + name: "stab debugging symbol", + sym: macho.Symbol{ + Type: machoNStab, // any stab symbol + }, + want: "-", + }, + { + name: "indirect symbol", + sym: macho.Symbol{ + Type: machoNExt | machoNIndr, + }, + want: "I", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := machoSymbolType(tt.sym, tt.sections) + assert.Equal(t, tt.want, got) + }) + } +} + +func Test_isGoToolchainPresent(t *testing.T) { + tests := []struct { + name string + toolchains []file.Toolchain + want bool + }{ + { + name: "empty toolchains", + toolchains: []file.Toolchain{}, + want: false, + }, + { + name: "go toolchain present", + toolchains: []file.Toolchain{ + {Name: "go", Version: "1.21.0", Kind: file.ToolchainKindCompiler}, + }, + want: true, + }, + { + name: "other toolchain only", + toolchains: []file.Toolchain{ + {Name: "gcc", Version: "12.0", Kind: file.ToolchainKindCompiler}, + }, + want: false, + }, + { + name: "go among multiple toolchains", + toolchains: []file.Toolchain{ + {Name: "gcc", Version: "12.0", Kind: file.ToolchainKindCompiler}, + {Name: "go", Version: "1.21.0", Kind: file.ToolchainKindCompiler}, + }, + want: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isGoToolchainPresent(tt.toolchains) + assert.Equal(t, tt.want, got) + }) + } +} diff --git a/syft/file/cataloger/executable/test-fixtures/golang/Dockerfile b/syft/file/cataloger/executable/test-fixtures/golang/Dockerfile new file mode 100644 index 000000000..6ec16bcec --- /dev/null +++ b/syft/file/cataloger/executable/test-fixtures/golang/Dockerfile @@ -0,0 +1,25 @@ +# Stage 1: Build binaries for multiple platforms +FROM golang:1.24 AS builder + +WORKDIR /app + +COPY go.mod go.sum ./ +RUN go mod download + +COPY main.go ./ + +# build ELF (Linux) +RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o hello_linux . + +# build Mach-O (macOS) +RUN CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 go build -o hello_mac . + +# build PE (Windows) +RUN CGO_ENABLED=0 GOOS=windows GOARCH=amd64 go build -o hello.exe . + +# Stage 2: Minimal image with just the binaries +FROM scratch + +COPY --from=builder /app/hello_linux / +COPY --from=builder /app/hello_mac / +COPY --from=builder /app/hello.exe / diff --git a/syft/file/cataloger/executable/test-fixtures/golang/Makefile b/syft/file/cataloger/executable/test-fixtures/golang/Makefile new file mode 100644 index 000000000..71a5c73a0 --- /dev/null +++ b/syft/file/cataloger/executable/test-fixtures/golang/Makefile @@ -0,0 +1,42 @@ +BIN=./bin +TOOL_IMAGE=localhost/syft-golang-build-tools:latest +FINGERPRINT_FILE=$(BIN).fingerprint + +ifndef BIN + $(error BIN is not set) +endif + +.DEFAULT_GOAL := fixtures + +# requirement 1: 'fixtures' goal to generate any and all test fixtures +fixtures: build + +# requirement 2: 'fingerprint' goal to determine if the fixture input that indicates any existing cache should be busted +fingerprint: $(FINGERPRINT_FILE) + +tools-check: + @sha256sum -c Dockerfile.sha256 || (echo "Tools Dockerfile has changed" && exit 1) + +tools: + @(docker inspect $(TOOL_IMAGE) > /dev/null && make tools-check) || \ + (docker build -t $(TOOL_IMAGE) . && sha256sum Dockerfile > Dockerfile.sha256) + +build: tools + @mkdir -p $(BIN) + docker run -i -v $(shell pwd)/$(BIN):/out $(TOOL_IMAGE) sh -c \ + "cp /hello_linux /hello_mac /hello.exe /out/" + +debug: + docker run -it --rm -v $(shell pwd):/mount -w /mount $(TOOL_IMAGE) sh + +# requirement 3: we always need to recalculate the fingerprint based on source regardless of any existing fingerprint +.PHONY: $(FINGERPRINT_FILE) +$(FINGERPRINT_FILE): + @find . -maxdepth 1 -type f \( -name "*.go" -o -name "go.*" -o -name "Dockerfile" -o -name "Makefile" \) \ + -exec sha256sum {} \; | sort -k2 > $(FINGERPRINT_FILE) + +# requirement 4: 'clean' goal to remove all generated test fixtures +clean: + rm -rf $(BIN) Dockerfile.sha256 $(FINGERPRINT_FILE) + +.PHONY: tools tools-check build debug clean fixtures fingerprint diff --git a/syft/file/cataloger/executable/test-fixtures/golang/go.mod b/syft/file/cataloger/executable/test-fixtures/golang/go.mod new file mode 100644 index 000000000..4c7e7c0ba --- /dev/null +++ b/syft/file/cataloger/executable/test-fixtures/golang/go.mod @@ -0,0 +1,8 @@ +module x + +go 1.24.4 + +require ( + github.com/davecgh/go-spew v1.1.1 + golang.org/x/text v0.21.0 +) diff --git a/syft/file/cataloger/executable/test-fixtures/golang/go.sum b/syft/file/cataloger/executable/test-fixtures/golang/go.sum new file mode 100644 index 000000000..04d2f50ec --- /dev/null +++ b/syft/file/cataloger/executable/test-fixtures/golang/go.sum @@ -0,0 +1,4 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= diff --git a/syft/file/cataloger/executable/test-fixtures/golang/main.go b/syft/file/cataloger/executable/test-fixtures/golang/main.go new file mode 100644 index 000000000..d660a122c --- /dev/null +++ b/syft/file/cataloger/executable/test-fixtures/golang/main.go @@ -0,0 +1,28 @@ +package main + +import ( + "encoding/json" + "fmt" + "os" + "strings" + + "github.com/davecgh/go-spew/spew" + "golang.org/x/text/language" +) + +func main() { + // use stdlib packages + fmt.Println("Hello from Go!") + fmt.Println(strings.ToUpper("test")) + + // use golang.org/x package + tag := language.English + fmt.Println(tag.String()) + + // use third-party package + spew.Dump(os.Args) + + // use encoding/json + data, _ := json.Marshal(map[string]string{"hello": "world"}) + fmt.Println(string(data)) +} diff --git a/syft/file/executable.go b/syft/file/executable.go index 454b1bf9f..73de333e5 100644 --- a/syft/file/executable.go +++ b/syft/file/executable.go @@ -6,9 +6,17 @@ type ( // RelocationReadOnly indicates the RELRO security protection level applied to an ELF binary. RelocationReadOnly string + + //SymbolType string + + ToolchainKind string ) const ( + ToolchainKindCompiler ToolchainKind = "compiler" + ToolchainKindLinker ToolchainKind = "linker" + ToolchainKindRuntime ToolchainKind = "runtime" + ELF ExecutableFormat = "elf" // Executable and Linkable Format used on Unix-like systems MachO ExecutableFormat = "macho" // Mach object file format used on macOS and iOS PE ExecutableFormat = "pe" // Portable Executable format used on Windows @@ -16,6 +24,18 @@ const ( RelocationReadOnlyNone RelocationReadOnly = "none" // no RELRO protection RelocationReadOnlyPartial RelocationReadOnly = "partial" // partial RELRO protection RelocationReadOnlyFull RelocationReadOnly = "full" // full RELRO protection + + //// from https://pkg.go.dev/cmd/nm + //SymbolTypeText SymbolType = "T" // text (code) segment symbol + //SymbolTypeTextStatic SymbolType = "t" // static text segment symbol + //SymbolTypeReadOnly SymbolType = "R" // read-only data segment symbol + //SymbolTypeReadOnlyStatic SymbolType = "r" // static read-only data segment symbol + //SymbolTypeData SymbolType = "D" // data segment symbol + //SymbolTypeDataStatic SymbolType = "d" // static data segment symbol + //SymbolTypeBSS SymbolType = "B" // bss segment symbol + //SymbolTypeBSSStatic SymbolType = "b" // static bss segment symbol + //SymbolTypeConstant SymbolType = "C" // constant address + //SymbolTypeUndefined SymbolType = "U" // referenced but undefined symbol ) // Executable contains metadata about binary files and their security features. @@ -34,8 +54,29 @@ type Executable struct { // ELFSecurityFeatures contains ELF-specific security hardening information when Format is ELF. ELFSecurityFeatures *ELFSecurityFeatures `json:"elfSecurityFeatures,omitempty" yaml:"elfSecurityFeatures" mapstructure:"elfSecurityFeatures"` + + // Symbols captures the selection from the symbol table found in the binary. + //Symbols []Symbol `json:"symbols,omitempty" yaml:"symbols" mapstructure:"symbols"` + SymbolNames []string `json:"symbolNames,omitempty" yaml:"symbolNames" mapstructure:"symbolNames"` + + // Toolchains captures information about the the compiler, linker, runtime, or other toolchains used to build (or otherwise exist within) the executable. + Toolchains []Toolchain `json:"toolchains,omitempty" yaml:"toolchains" mapstructure:"toolchains"` } +type Toolchain struct { + Name string `json:"name" yaml:"name" mapstructure:"name"` + Version string `json:"version,omitempty" yaml:"version,omitempty" mapstructure:"version"` + Kind ToolchainKind `json:"kind" yaml:"kind" mapstructure:"kind"` + + // TODO: should we allow for aux information here? free form? +} + +//type Symbol struct { +// //Type SymbolType `json:"type" yaml:"type" mapstructure:"type"` +// Type string `json:"type" yaml:"type" mapstructure:"type"` +// Name string `json:"name" yaml:"name" mapstructure:"name"` +//} + // ELFSecurityFeatures captures security hardening and protection mechanisms in ELF binaries. type ELFSecurityFeatures struct { // SymbolTableStripped indicates whether debugging symbols have been removed.