add support for PE binaries

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Alex Goodman 2025-12-10 13:53:00 -05:00
parent 703edff876
commit bf1f0ceea3
3 changed files with 422 additions and 2 deletions

View File

@ -256,7 +256,7 @@ func (i *Cataloger) processExecutable(loc file.Location, reader unionreader.Unio
err = fmt.Errorf("unable to determine ELF features: %w", err)
}
case file.PE:
if err = findPEFeatures(&data, reader); err != nil {
if err = findPEFeatures(&data, reader, i.config.Symbols); err != nil {
log.WithFields("error", err, "path", loc.RealPath).Trace("unable to determine PE features")
err = fmt.Errorf("unable to determine PE features: %w", err)
}

View File

@ -2,6 +2,7 @@ package executable
import (
"debug/pe"
"strings"
"github.com/scylladb/go-set/strset"
@ -9,7 +10,23 @@ import (
"github.com/anchore/syft/syft/internal/unionreader"
)
func findPEFeatures(data *file.Executable, reader unionreader.UnionReader) error {
// PE symbol storage class constants
const (
peSymClassExternal = 2 // IMAGE_SYM_CLASS_EXTERNAL - external symbol
peSymClassStatic = 3 // IMAGE_SYM_CLASS_STATIC - static symbol
)
// PE section characteristic flags
const (
peSectionCntCode = 0x00000020 // IMAGE_SCN_CNT_CODE
peSectionCntInitializedData = 0x00000040 // IMAGE_SCN_CNT_INITIALIZED_DATA
peSectionCntUninitializedData = 0x00000080 // IMAGE_SCN_CNT_UNINITIALIZED_DATA
peSectionMemExecute = 0x20000000 // IMAGE_SCN_MEM_EXECUTE
peSectionMemRead = 0x40000000 // IMAGE_SCN_MEM_READ
peSectionMemWrite = 0x80000000 // IMAGE_SCN_MEM_WRITE
)
func findPEFeatures(data *file.Executable, reader unionreader.UnionReader, cfg SymbolConfig) error {
// TODO: support security features
f, err := pe.NewFile(reader)
@ -25,6 +42,10 @@ func findPEFeatures(data *file.Executable, reader unionreader.UnionReader) error
data.ImportedLibraries = libs
data.HasEntrypoint = peHasEntrypoint(f)
data.HasExports = peHasExports(f)
data.Toolchains = peToolchains(reader)
if shouldCaptureSymbols(data, cfg) {
data.SymbolNames = peNMSymbols(f, cfg, data.Toolchains)
}
return nil
}
@ -82,3 +103,104 @@ func peHasExports(f *pe.File) bool {
return false
}
func peToolchains(reader unionreader.UnionReader) []file.Toolchain {
return includeNoneNil(
golangToolchainEvidence(reader),
)
}
func peNMSymbols(f *pe.File, cfg SymbolConfig, toolchains []file.Toolchain) []string {
if isGoToolchainPresent(toolchains) {
return capturePeGoSymbols(f, cfg)
}
// include all symbols for non-Go binaries
if f.Symbols == nil {
return nil
}
var symbols []string
for _, sym := range f.Symbols {
symbols = append(symbols, sym.Name)
}
return symbols
}
func capturePeGoSymbols(f *pe.File, cfg SymbolConfig) []string {
if f.Symbols == nil {
return nil
}
var symbols []string
filter := createGoSymbolFilter(cfg)
for _, sym := range f.Symbols {
name, include := filter(sym.Name, peSymbolType(sym, f.Sections))
if include {
symbols = append(symbols, name)
}
}
return symbols
}
// peSymbolType returns the nm-style single character representing the symbol type.
// This mimics the output of `nm` for PE/COFF binaries.
func peSymbolType(sym *pe.Symbol, sections []*pe.Section) string {
// handle special section numbers first
switch sym.SectionNumber {
case 0:
// IMAGE_SYM_UNDEFINED - undefined symbol
return "U"
case -1:
// IMAGE_SYM_ABSOLUTE - absolute symbol
if sym.StorageClass == peSymClassExternal {
return "A"
}
return "a"
case -2:
// IMAGE_SYM_DEBUG - debugging symbol
return "-"
}
// for defined symbols, determine type based on section characteristics
typeChar := peSectionTypeChar(sym.SectionNumber, sections)
// lowercase for static (local) symbols, uppercase for external (global)
if sym.StorageClass != peSymClassExternal && typeChar != '-' && typeChar != '?' {
return strings.ToLower(string(typeChar))
}
return string(typeChar)
}
// peSectionTypeChar returns the nm-style character based on section characteristics.
// Section numbers are 1-based.
func peSectionTypeChar(sectNum int16, sections []*pe.Section) byte {
idx := int(sectNum) - 1 // convert to 0-based index
if idx < 0 || idx >= len(sections) {
return '?'
}
section := sections[idx]
chars := section.Characteristics
// determine symbol type based on section characteristics
switch {
case chars&peSectionMemExecute != 0 || chars&peSectionCntCode != 0:
// executable section -> text
return 'T'
case chars&peSectionCntUninitializedData != 0:
// uninitialized data section -> BSS
return 'B'
case chars&peSectionMemWrite == 0 && chars&peSectionCntInitializedData != 0:
// read-only initialized data -> rodata
return 'R'
case chars&peSectionCntInitializedData != 0:
// writable initialized data -> data
return 'D'
default:
return 'D'
}
}

View File

@ -9,6 +9,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/unionreader"
)
@ -78,3 +79,300 @@ func Test_peHasExports(t *testing.T) {
})
}
}
func Test_peGoToolchainDetection(t *testing.T) {
readerForFixture := func(t *testing.T, fixture string) unionreader.UnionReader {
t.Helper()
f, err := os.Open(filepath.Join("test-fixtures/golang", fixture))
require.NoError(t, err)
return f
}
tests := []struct {
name string
fixture string
wantPresent bool
}{
{
name: "go binary has toolchain",
fixture: "bin/hello.exe",
wantPresent: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
reader := readerForFixture(t, tt.fixture)
toolchains := peToolchains(reader)
assert.Equal(t, tt.wantPresent, isGoToolchainPresent(toolchains))
if tt.wantPresent {
require.NotEmpty(t, toolchains)
assert.Equal(t, "go", toolchains[0].Name)
assert.NotEmpty(t, toolchains[0].Version)
assert.Equal(t, file.ToolchainKindCompiler, toolchains[0].Kind)
}
})
}
}
func Test_peGoSymbolCapture(t *testing.T) {
readerForFixture := func(t *testing.T, fixture string) unionreader.UnionReader {
t.Helper()
f, err := os.Open(filepath.Join("test-fixtures/golang", fixture))
require.NoError(t, err)
return f
}
tests := []struct {
name string
fixture string
cfg SymbolConfig
wantSymbols []string // exact symbol names that must be present
wantMinSymbolCount int
}{
{
name: "capture all symbol types",
fixture: "bin/hello.exe",
cfg: SymbolConfig{
Go: GoSymbolConfig{
StandardLibrary: true,
ExtendedStandardLibrary: true,
ThirdPartyModules: true,
ExportedSymbols: true,
UnexportedSymbols: true,
},
},
wantSymbols: []string{
// stdlib - fmt package (used via fmt.Println)
"fmt.(*fmt).fmtInteger",
"fmt.(*pp).doPrintf",
// stdlib - strings package (used via strings.ToUpper)
"strings.ToUpper",
"strings.Map",
// stdlib - encoding/json package (used via json.Marshal)
"encoding/json.Marshal",
// extended stdlib - golang.org/x/text (used via language.English)
"golang.org/x/text/internal/language.Tag.String",
"golang.org/x/text/internal/language.Language.String",
// third-party - go-spew (used via spew.Dump)
"github.com/davecgh/go-spew/spew.(*dumpState).dump",
"github.com/davecgh/go-spew/spew.fdump",
},
wantMinSymbolCount: 50,
},
{
name: "capture only third-party symbols",
fixture: "bin/hello.exe",
cfg: SymbolConfig{
Go: GoSymbolConfig{
ThirdPartyModules: true,
ExportedSymbols: true,
UnexportedSymbols: true,
},
},
wantSymbols: []string{
"github.com/davecgh/go-spew/spew.(*dumpState).dump",
"github.com/davecgh/go-spew/spew.(*formatState).Format",
"github.com/davecgh/go-spew/spew.fdump",
},
},
{
name: "capture only extended stdlib symbols",
fixture: "bin/hello.exe",
cfg: SymbolConfig{
Go: GoSymbolConfig{
ExtendedStandardLibrary: true,
ExportedSymbols: true,
UnexportedSymbols: true,
},
},
wantSymbols: []string{
"golang.org/x/text/internal/language.Tag.String",
"golang.org/x/text/internal/language.Parse",
},
},
{
name: "capture with text section types only",
fixture: "bin/hello.exe",
cfg: SymbolConfig{
Types: []string{"T", "t"}, // text section (code) symbols
Go: GoSymbolConfig{
StandardLibrary: true,
ExtendedStandardLibrary: true,
ThirdPartyModules: true,
ExportedSymbols: true,
UnexportedSymbols: true,
},
},
wantSymbols: []string{
"encoding/json.Marshal",
"strings.ToUpper",
},
wantMinSymbolCount: 10,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
reader := readerForFixture(t, tt.fixture)
f, err := pe.NewFile(reader)
require.NoError(t, err)
symbols := capturePeGoSymbols(f, tt.cfg)
symbolSet := make(map[string]struct{}, len(symbols))
for _, sym := range symbols {
symbolSet[sym] = struct{}{}
}
if tt.wantMinSymbolCount > 0 {
assert.GreaterOrEqual(t, len(symbols), tt.wantMinSymbolCount,
"expected at least %d symbols, got %d", tt.wantMinSymbolCount, len(symbols))
}
for _, want := range tt.wantSymbols {
_, found := symbolSet[want]
assert.True(t, found, "expected symbol %q to be present", want)
}
})
}
}
func Test_peNMSymbols_goReturnsSymbols(t *testing.T) {
// for Go binaries, peNMSymbols should return symbols when Go toolchain is present
readerForFixture := func(t *testing.T, fixture string) unionreader.UnionReader {
t.Helper()
f, err := os.Open(filepath.Join("test-fixtures/golang", fixture))
require.NoError(t, err)
return f
}
reader := readerForFixture(t, "bin/hello.exe")
f, err := pe.NewFile(reader)
require.NoError(t, err)
toolchains := []file.Toolchain{
{Name: "go", Version: "1.24", Kind: file.ToolchainKindCompiler},
}
cfg := SymbolConfig{
Types: []string{"T", "t"},
Go: GoSymbolConfig{
StandardLibrary: true,
ExtendedStandardLibrary: true,
ThirdPartyModules: true,
ExportedSymbols: true,
},
}
symbols := peNMSymbols(f, cfg, toolchains)
assert.NotNil(t, symbols, "expected symbols for Go binary")
assert.NotEmpty(t, symbols, "expected non-empty symbols for Go binary")
}
func Test_peSymbolType(t *testing.T) {
// create minimal sections for testing
textSection := &pe.Section{SectionHeader: pe.SectionHeader{Characteristics: peSectionCntCode | peSectionMemExecute | peSectionMemRead}}
dataSection := &pe.Section{SectionHeader: pe.SectionHeader{Characteristics: peSectionCntInitializedData | peSectionMemRead | peSectionMemWrite}}
rdataSection := &pe.Section{SectionHeader: pe.SectionHeader{Characteristics: peSectionCntInitializedData | peSectionMemRead}}
bssSection := &pe.Section{SectionHeader: pe.SectionHeader{Characteristics: peSectionCntUninitializedData | peSectionMemRead | peSectionMemWrite}}
tests := []struct {
name string
sym *pe.Symbol
sections []*pe.Section
want string
}{
{
name: "undefined symbol",
sym: &pe.Symbol{
SectionNumber: 0,
StorageClass: peSymClassExternal,
},
want: "U",
},
{
name: "absolute symbol external",
sym: &pe.Symbol{
SectionNumber: -1,
StorageClass: peSymClassExternal,
},
want: "A",
},
{
name: "absolute symbol static",
sym: &pe.Symbol{
SectionNumber: -1,
StorageClass: peSymClassStatic,
},
want: "a",
},
{
name: "debug symbol",
sym: &pe.Symbol{
SectionNumber: -2,
StorageClass: peSymClassExternal,
},
want: "-",
},
{
name: "text section external",
sym: &pe.Symbol{
SectionNumber: 1,
StorageClass: peSymClassExternal,
},
sections: []*pe.Section{textSection},
want: "T",
},
{
name: "text section static",
sym: &pe.Symbol{
SectionNumber: 1,
StorageClass: peSymClassStatic,
},
sections: []*pe.Section{textSection},
want: "t",
},
{
name: "data section external",
sym: &pe.Symbol{
SectionNumber: 1,
StorageClass: peSymClassExternal,
},
sections: []*pe.Section{dataSection},
want: "D",
},
{
name: "data section static",
sym: &pe.Symbol{
SectionNumber: 1,
StorageClass: peSymClassStatic,
},
sections: []*pe.Section{dataSection},
want: "d",
},
{
name: "rodata section external",
sym: &pe.Symbol{
SectionNumber: 1,
StorageClass: peSymClassExternal,
},
sections: []*pe.Section{rdataSection},
want: "R",
},
{
name: "bss section external",
sym: &pe.Symbol{
SectionNumber: 1,
StorageClass: peSymClassExternal,
},
sections: []*pe.Section{bssSection},
want: "B",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := peSymbolType(tt.sym, tt.sections)
assert.Equal(t, tt.want, got)
})
}
}