From bf1f0ceea34d95149a2c3178d8797ed0c9acc98c Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Wed, 10 Dec 2025 13:53:00 -0500 Subject: [PATCH] add support for PE binaries Signed-off-by: Alex Goodman --- syft/file/cataloger/executable/cataloger.go | 2 +- syft/file/cataloger/executable/pe.go | 124 +++++++- syft/file/cataloger/executable/pe_test.go | 298 ++++++++++++++++++++ 3 files changed, 422 insertions(+), 2 deletions(-) diff --git a/syft/file/cataloger/executable/cataloger.go b/syft/file/cataloger/executable/cataloger.go index b323a5efe..c397df6e3 100644 --- a/syft/file/cataloger/executable/cataloger.go +++ b/syft/file/cataloger/executable/cataloger.go @@ -256,7 +256,7 @@ func (i *Cataloger) processExecutable(loc file.Location, reader unionreader.Unio err = fmt.Errorf("unable to determine ELF features: %w", err) } case file.PE: - if err = findPEFeatures(&data, reader); err != nil { + if err = findPEFeatures(&data, reader, i.config.Symbols); err != nil { log.WithFields("error", err, "path", loc.RealPath).Trace("unable to determine PE features") err = fmt.Errorf("unable to determine PE features: %w", err) } diff --git a/syft/file/cataloger/executable/pe.go b/syft/file/cataloger/executable/pe.go index b070c3c84..451621463 100644 --- a/syft/file/cataloger/executable/pe.go +++ b/syft/file/cataloger/executable/pe.go @@ -2,6 +2,7 @@ package executable import ( "debug/pe" + "strings" "github.com/scylladb/go-set/strset" @@ -9,7 +10,23 @@ import ( "github.com/anchore/syft/syft/internal/unionreader" ) -func findPEFeatures(data *file.Executable, reader unionreader.UnionReader) error { +// PE symbol storage class constants +const ( + peSymClassExternal = 2 // IMAGE_SYM_CLASS_EXTERNAL - external symbol + peSymClassStatic = 3 // IMAGE_SYM_CLASS_STATIC - static symbol +) + +// PE section characteristic flags +const ( + peSectionCntCode = 0x00000020 // IMAGE_SCN_CNT_CODE + peSectionCntInitializedData = 0x00000040 // IMAGE_SCN_CNT_INITIALIZED_DATA + peSectionCntUninitializedData = 0x00000080 // IMAGE_SCN_CNT_UNINITIALIZED_DATA + peSectionMemExecute = 0x20000000 // IMAGE_SCN_MEM_EXECUTE + peSectionMemRead = 0x40000000 // IMAGE_SCN_MEM_READ + peSectionMemWrite = 0x80000000 // IMAGE_SCN_MEM_WRITE +) + +func findPEFeatures(data *file.Executable, reader unionreader.UnionReader, cfg SymbolConfig) error { // TODO: support security features f, err := pe.NewFile(reader) @@ -25,6 +42,10 @@ func findPEFeatures(data *file.Executable, reader unionreader.UnionReader) error data.ImportedLibraries = libs data.HasEntrypoint = peHasEntrypoint(f) data.HasExports = peHasExports(f) + data.Toolchains = peToolchains(reader) + if shouldCaptureSymbols(data, cfg) { + data.SymbolNames = peNMSymbols(f, cfg, data.Toolchains) + } return nil } @@ -82,3 +103,104 @@ func peHasExports(f *pe.File) bool { return false } + +func peToolchains(reader unionreader.UnionReader) []file.Toolchain { + return includeNoneNil( + golangToolchainEvidence(reader), + ) +} + +func peNMSymbols(f *pe.File, cfg SymbolConfig, toolchains []file.Toolchain) []string { + if isGoToolchainPresent(toolchains) { + return capturePeGoSymbols(f, cfg) + } + + // include all symbols for non-Go binaries + if f.Symbols == nil { + return nil + } + var symbols []string + for _, sym := range f.Symbols { + symbols = append(symbols, sym.Name) + } + return symbols +} + +func capturePeGoSymbols(f *pe.File, cfg SymbolConfig) []string { + if f.Symbols == nil { + return nil + } + + var symbols []string + filter := createGoSymbolFilter(cfg) + for _, sym := range f.Symbols { + name, include := filter(sym.Name, peSymbolType(sym, f.Sections)) + if include { + symbols = append(symbols, name) + } + } + return symbols +} + +// peSymbolType returns the nm-style single character representing the symbol type. +// This mimics the output of `nm` for PE/COFF binaries. +func peSymbolType(sym *pe.Symbol, sections []*pe.Section) string { + // handle special section numbers first + switch sym.SectionNumber { + case 0: + // IMAGE_SYM_UNDEFINED - undefined symbol + return "U" + case -1: + // IMAGE_SYM_ABSOLUTE - absolute symbol + if sym.StorageClass == peSymClassExternal { + return "A" + } + return "a" + case -2: + // IMAGE_SYM_DEBUG - debugging symbol + return "-" + } + + // for defined symbols, determine type based on section characteristics + typeChar := peSectionTypeChar(sym.SectionNumber, sections) + + // lowercase for static (local) symbols, uppercase for external (global) + if sym.StorageClass != peSymClassExternal && typeChar != '-' && typeChar != '?' { + return strings.ToLower(string(typeChar)) + } + return string(typeChar) +} + +// peSectionTypeChar returns the nm-style character based on section characteristics. +// Section numbers are 1-based. +func peSectionTypeChar(sectNum int16, sections []*pe.Section) byte { + idx := int(sectNum) - 1 // convert to 0-based index + if idx < 0 || idx >= len(sections) { + return '?' + } + + section := sections[idx] + chars := section.Characteristics + + // determine symbol type based on section characteristics + switch { + case chars&peSectionMemExecute != 0 || chars&peSectionCntCode != 0: + // executable section -> text + return 'T' + + case chars&peSectionCntUninitializedData != 0: + // uninitialized data section -> BSS + return 'B' + + case chars&peSectionMemWrite == 0 && chars&peSectionCntInitializedData != 0: + // read-only initialized data -> rodata + return 'R' + + case chars&peSectionCntInitializedData != 0: + // writable initialized data -> data + return 'D' + + default: + return 'D' + } +} diff --git a/syft/file/cataloger/executable/pe_test.go b/syft/file/cataloger/executable/pe_test.go index 59ea5bc47..2c658f902 100644 --- a/syft/file/cataloger/executable/pe_test.go +++ b/syft/file/cataloger/executable/pe_test.go @@ -9,6 +9,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/internal/unionreader" ) @@ -78,3 +79,300 @@ func Test_peHasExports(t *testing.T) { }) } } + +func Test_peGoToolchainDetection(t *testing.T) { + readerForFixture := func(t *testing.T, fixture string) unionreader.UnionReader { + t.Helper() + f, err := os.Open(filepath.Join("test-fixtures/golang", fixture)) + require.NoError(t, err) + return f + } + + tests := []struct { + name string + fixture string + wantPresent bool + }{ + { + name: "go binary has toolchain", + fixture: "bin/hello.exe", + wantPresent: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + reader := readerForFixture(t, tt.fixture) + + toolchains := peToolchains(reader) + assert.Equal(t, tt.wantPresent, isGoToolchainPresent(toolchains)) + + if tt.wantPresent { + require.NotEmpty(t, toolchains) + assert.Equal(t, "go", toolchains[0].Name) + assert.NotEmpty(t, toolchains[0].Version) + assert.Equal(t, file.ToolchainKindCompiler, toolchains[0].Kind) + } + }) + } +} + +func Test_peGoSymbolCapture(t *testing.T) { + readerForFixture := func(t *testing.T, fixture string) unionreader.UnionReader { + t.Helper() + f, err := os.Open(filepath.Join("test-fixtures/golang", fixture)) + require.NoError(t, err) + return f + } + + tests := []struct { + name string + fixture string + cfg SymbolConfig + wantSymbols []string // exact symbol names that must be present + wantMinSymbolCount int + }{ + { + name: "capture all symbol types", + fixture: "bin/hello.exe", + cfg: SymbolConfig{ + Go: GoSymbolConfig{ + StandardLibrary: true, + ExtendedStandardLibrary: true, + ThirdPartyModules: true, + ExportedSymbols: true, + UnexportedSymbols: true, + }, + }, + wantSymbols: []string{ + // stdlib - fmt package (used via fmt.Println) + "fmt.(*fmt).fmtInteger", + "fmt.(*pp).doPrintf", + // stdlib - strings package (used via strings.ToUpper) + "strings.ToUpper", + "strings.Map", + // stdlib - encoding/json package (used via json.Marshal) + "encoding/json.Marshal", + // extended stdlib - golang.org/x/text (used via language.English) + "golang.org/x/text/internal/language.Tag.String", + "golang.org/x/text/internal/language.Language.String", + // third-party - go-spew (used via spew.Dump) + "github.com/davecgh/go-spew/spew.(*dumpState).dump", + "github.com/davecgh/go-spew/spew.fdump", + }, + wantMinSymbolCount: 50, + }, + { + name: "capture only third-party symbols", + fixture: "bin/hello.exe", + cfg: SymbolConfig{ + Go: GoSymbolConfig{ + ThirdPartyModules: true, + ExportedSymbols: true, + UnexportedSymbols: true, + }, + }, + wantSymbols: []string{ + "github.com/davecgh/go-spew/spew.(*dumpState).dump", + "github.com/davecgh/go-spew/spew.(*formatState).Format", + "github.com/davecgh/go-spew/spew.fdump", + }, + }, + { + name: "capture only extended stdlib symbols", + fixture: "bin/hello.exe", + cfg: SymbolConfig{ + Go: GoSymbolConfig{ + ExtendedStandardLibrary: true, + ExportedSymbols: true, + UnexportedSymbols: true, + }, + }, + wantSymbols: []string{ + "golang.org/x/text/internal/language.Tag.String", + "golang.org/x/text/internal/language.Parse", + }, + }, + { + name: "capture with text section types only", + fixture: "bin/hello.exe", + cfg: SymbolConfig{ + Types: []string{"T", "t"}, // text section (code) symbols + Go: GoSymbolConfig{ + StandardLibrary: true, + ExtendedStandardLibrary: true, + ThirdPartyModules: true, + ExportedSymbols: true, + UnexportedSymbols: true, + }, + }, + wantSymbols: []string{ + "encoding/json.Marshal", + "strings.ToUpper", + }, + wantMinSymbolCount: 10, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + reader := readerForFixture(t, tt.fixture) + f, err := pe.NewFile(reader) + require.NoError(t, err) + + symbols := capturePeGoSymbols(f, tt.cfg) + symbolSet := make(map[string]struct{}, len(symbols)) + for _, sym := range symbols { + symbolSet[sym] = struct{}{} + } + + if tt.wantMinSymbolCount > 0 { + assert.GreaterOrEqual(t, len(symbols), tt.wantMinSymbolCount, + "expected at least %d symbols, got %d", tt.wantMinSymbolCount, len(symbols)) + } + + for _, want := range tt.wantSymbols { + _, found := symbolSet[want] + assert.True(t, found, "expected symbol %q to be present", want) + } + }) + } +} + +func Test_peNMSymbols_goReturnsSymbols(t *testing.T) { + // for Go binaries, peNMSymbols should return symbols when Go toolchain is present + readerForFixture := func(t *testing.T, fixture string) unionreader.UnionReader { + t.Helper() + f, err := os.Open(filepath.Join("test-fixtures/golang", fixture)) + require.NoError(t, err) + return f + } + + reader := readerForFixture(t, "bin/hello.exe") + f, err := pe.NewFile(reader) + require.NoError(t, err) + + toolchains := []file.Toolchain{ + {Name: "go", Version: "1.24", Kind: file.ToolchainKindCompiler}, + } + cfg := SymbolConfig{ + Types: []string{"T", "t"}, + Go: GoSymbolConfig{ + StandardLibrary: true, + ExtendedStandardLibrary: true, + ThirdPartyModules: true, + ExportedSymbols: true, + }, + } + + symbols := peNMSymbols(f, cfg, toolchains) + assert.NotNil(t, symbols, "expected symbols for Go binary") + assert.NotEmpty(t, symbols, "expected non-empty symbols for Go binary") +} + +func Test_peSymbolType(t *testing.T) { + // create minimal sections for testing + textSection := &pe.Section{SectionHeader: pe.SectionHeader{Characteristics: peSectionCntCode | peSectionMemExecute | peSectionMemRead}} + dataSection := &pe.Section{SectionHeader: pe.SectionHeader{Characteristics: peSectionCntInitializedData | peSectionMemRead | peSectionMemWrite}} + rdataSection := &pe.Section{SectionHeader: pe.SectionHeader{Characteristics: peSectionCntInitializedData | peSectionMemRead}} + bssSection := &pe.Section{SectionHeader: pe.SectionHeader{Characteristics: peSectionCntUninitializedData | peSectionMemRead | peSectionMemWrite}} + + tests := []struct { + name string + sym *pe.Symbol + sections []*pe.Section + want string + }{ + { + name: "undefined symbol", + sym: &pe.Symbol{ + SectionNumber: 0, + StorageClass: peSymClassExternal, + }, + want: "U", + }, + { + name: "absolute symbol external", + sym: &pe.Symbol{ + SectionNumber: -1, + StorageClass: peSymClassExternal, + }, + want: "A", + }, + { + name: "absolute symbol static", + sym: &pe.Symbol{ + SectionNumber: -1, + StorageClass: peSymClassStatic, + }, + want: "a", + }, + { + name: "debug symbol", + sym: &pe.Symbol{ + SectionNumber: -2, + StorageClass: peSymClassExternal, + }, + want: "-", + }, + { + name: "text section external", + sym: &pe.Symbol{ + SectionNumber: 1, + StorageClass: peSymClassExternal, + }, + sections: []*pe.Section{textSection}, + want: "T", + }, + { + name: "text section static", + sym: &pe.Symbol{ + SectionNumber: 1, + StorageClass: peSymClassStatic, + }, + sections: []*pe.Section{textSection}, + want: "t", + }, + { + name: "data section external", + sym: &pe.Symbol{ + SectionNumber: 1, + StorageClass: peSymClassExternal, + }, + sections: []*pe.Section{dataSection}, + want: "D", + }, + { + name: "data section static", + sym: &pe.Symbol{ + SectionNumber: 1, + StorageClass: peSymClassStatic, + }, + sections: []*pe.Section{dataSection}, + want: "d", + }, + { + name: "rodata section external", + sym: &pe.Symbol{ + SectionNumber: 1, + StorageClass: peSymClassExternal, + }, + sections: []*pe.Section{rdataSection}, + want: "R", + }, + { + name: "bss section external", + sym: &pe.Symbol{ + SectionNumber: 1, + StorageClass: peSymClassExternal, + }, + sections: []*pe.Section{bssSection}, + want: "B", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := peSymbolType(tt.sym, tt.sections) + assert.Equal(t, tt.want, got) + }) + } +}