wip: stdlib

Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
Christopher Phillips 2026-06-17 10:36:21 -04:00
parent a08457c166
commit e4957568b6
No known key found for this signature in database
7 changed files with 181 additions and 20 deletions

View File

@ -26,10 +26,11 @@ func NewGoModuleFileCataloger(opts CatalogerConfig) pkg.Cataloger {
// NewGoModuleBinaryCataloger returns a new cataloger object that searches within binaries built by the go compiler. // NewGoModuleBinaryCataloger returns a new cataloger object that searches within binaries built by the go compiler.
func NewGoModuleBinaryCataloger(opts CatalogerConfig) pkg.Cataloger { func NewGoModuleBinaryCataloger(opts CatalogerConfig) pkg.Cataloger {
c := newGoBinaryCataloger(opts)
return generic.NewCataloger(binaryCatalogerName). return generic.NewCataloger(binaryCatalogerName).
WithParserByMimeTypes( WithParserByMimeTypes(
newGoBinaryCataloger(opts).parseGoBinary, c.parseGoBinary,
mimetype.ExecutableMIMETypeSet.List()..., mimetype.ExecutableMIMETypeSet.List()...,
). ).
WithResolvingProcessors(stdlibProcessor) WithResolvingProcessors(c.stdlibProcessor)
} }

View File

@ -58,6 +58,7 @@ func Test_Config(t *testing.T) {
NoProxy: []string{"my.private", "no.proxy"}, NoProxy: []string{"my.private", "no.proxy"},
MainModuleVersion: DefaultMainModuleVersionConfig(), MainModuleVersion: DefaultMainModuleVersionConfig(),
UsePackagesLib: true, UsePackagesLib: true,
CaptureSymbols: true,
}, },
}, },
{ {
@ -86,6 +87,7 @@ func Test_Config(t *testing.T) {
NoProxy: []string{"alt.no.proxy"}, NoProxy: []string{"alt.no.proxy"},
MainModuleVersion: DefaultMainModuleVersionConfig(), MainModuleVersion: DefaultMainModuleVersionConfig(),
UsePackagesLib: true, UsePackagesLib: true,
CaptureSymbols: true,
}, },
}, },
} }

View File

@ -13,6 +13,7 @@ import (
"runtime/debug" "runtime/debug"
"slices" "slices"
"strings" "strings"
"sync"
"time" "time"
"golang.org/x/mod/module" "golang.org/x/mod/module"
@ -50,6 +51,12 @@ type goBinaryCataloger struct {
licenseResolver goLicenseResolver licenseResolver goLicenseResolver
mainModuleVersion MainModuleVersionConfig mainModuleVersion MainModuleVersionConfig
captureSymbols bool captureSymbols bool
// stdlibSymbols holds the standard-library function symbols discovered per binary (keyed by the
// binary's location), populated during parsing and consumed by stdlibProcessor when it builds the
// synthetic "stdlib" package. Guarded by stdlibSymbolsMu because parsers run concurrently.
stdlibSymbols map[file.Coordinates][]string
stdlibSymbolsMu sync.Mutex
} }
func newGoBinaryCataloger(opts CatalogerConfig) *goBinaryCataloger { func newGoBinaryCataloger(opts CatalogerConfig) *goBinaryCataloger {
@ -57,9 +64,30 @@ func newGoBinaryCataloger(opts CatalogerConfig) *goBinaryCataloger {
licenseResolver: newGoLicenseResolver(binaryCatalogerName, opts), licenseResolver: newGoLicenseResolver(binaryCatalogerName, opts),
mainModuleVersion: opts.MainModuleVersion, mainModuleVersion: opts.MainModuleVersion,
captureSymbols: opts.CaptureSymbols, captureSymbols: opts.CaptureSymbols,
stdlibSymbols: make(map[file.Coordinates][]string),
} }
} }
// recordStdlibSymbols merges the standard-library symbols discovered for a binary location so the
// stdlib processor can attach them to the synthetic stdlib package.
func (c *goBinaryCataloger) recordStdlibSymbols(coord file.Coordinates, symbols []string) {
if len(symbols) == 0 {
return
}
c.stdlibSymbolsMu.Lock()
defer c.stdlibSymbolsMu.Unlock()
merged := append(c.stdlibSymbols[coord], symbols...)
slices.Sort(merged)
c.stdlibSymbols[coord] = slices.Compact(merged)
}
// stdlibSymbolsFor returns the standard-library symbols recorded for a binary location.
func (c *goBinaryCataloger) stdlibSymbolsFor(coord file.Coordinates) []string {
c.stdlibSymbolsMu.Lock()
defer c.stdlibSymbolsMu.Unlock()
return c.stdlibSymbols[coord]
}
// parseGoBinary catalogs packages found in the "buildinfo" section of a binary built by the go compiler. // parseGoBinary catalogs packages found in the "buildinfo" section of a binary built by the go compiler.
func (c *goBinaryCataloger) parseGoBinary(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { func (c *goBinaryCataloger) parseGoBinary(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
var pkgs []pkg.Package var pkgs []pkg.Package
@ -130,7 +158,8 @@ func (c *goBinaryCataloger) buildGoPkgInfo(ctx context.Context, resolver file.Re
mod.Main = createMainModuleFromPath(mod) mod.Main = createMainModuleFromPath(mod)
} }
symbolsByModule := moduleSymbols(mod.symbols, &mod.Main, mod.Deps) symbolsByModule, stdlibSymbols := moduleSymbols(mod.symbols, &mod.Main, mod.Deps)
c.recordStdlibSymbols(location.Coordinates, stdlibSymbols)
var pkgs []pkg.Package var pkgs []pkg.Package
for _, dep := range mod.Deps { for _, dep := range mod.Deps {

View File

@ -12,12 +12,12 @@ import (
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
) )
func stdlibProcessor(ctx context.Context, _ file.Resolver, pkgs []pkg.Package, relationships []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) { func (c *goBinaryCataloger) stdlibProcessor(ctx context.Context, _ file.Resolver, pkgs []pkg.Package, relationships []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
compilerPkgs, newRelationships := stdlibPackageAndRelationships(ctx, pkgs) compilerPkgs, newRelationships := c.stdlibPackageAndRelationships(ctx, pkgs)
return append(pkgs, compilerPkgs...), append(relationships, newRelationships...), err return append(pkgs, compilerPkgs...), append(relationships, newRelationships...), err
} }
func stdlibPackageAndRelationships(ctx context.Context, pkgs []pkg.Package) ([]pkg.Package, []artifact.Relationship) { func (c *goBinaryCataloger) stdlibPackageAndRelationships(ctx context.Context, pkgs []pkg.Package) ([]pkg.Package, []artifact.Relationship) {
var goCompilerPkgs []pkg.Package var goCompilerPkgs []pkg.Package
var relationships []artifact.Relationship var relationships []artifact.Relationship
totalLocations := file.NewLocationSet() totalLocations := file.NewLocationSet()
@ -33,7 +33,7 @@ func stdlibPackageAndRelationships(ctx context.Context, pkgs []pkg.Package) ([]p
continue continue
} }
stdLibPkg := newGoStdLib(ctx, mValue.GoCompiledVersion, goPkg.Locations) stdLibPkg := newGoStdLib(ctx, mValue.GoCompiledVersion, goPkg.Locations, c.stdlibSymbolsFor(location.Coordinates))
if stdLibPkg == nil { if stdLibPkg == nil {
continue continue
} }
@ -50,7 +50,7 @@ func stdlibPackageAndRelationships(ctx context.Context, pkgs []pkg.Package) ([]p
return goCompilerPkgs, relationships return goCompilerPkgs, relationships
} }
func newGoStdLib(ctx context.Context, version string, location file.LocationSet) *pkg.Package { func newGoStdLib(ctx context.Context, version string, location file.LocationSet, symbols []string) *pkg.Package {
stdlibCpe, err := generateStdlibCpe(version) stdlibCpe, err := generateStdlibCpe(version)
if err != nil { if err != nil {
return nil return nil
@ -66,6 +66,7 @@ func newGoStdLib(ctx context.Context, version string, location file.LocationSet)
Type: pkg.GoModulePkg, Type: pkg.GoModulePkg,
Metadata: pkg.GolangBinaryBuildinfoEntry{ Metadata: pkg.GolangBinaryBuildinfoEntry{
GoCompiledVersion: version, GoCompiledVersion: version,
Symbols: symbols,
}, },
} }
goCompilerPkg.SetID() goCompilerPkg.SetID()

View File

@ -88,7 +88,8 @@ func Test_stdlibPackageAndRelationships(t *testing.T) {
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
gotPkgs, gotRels := stdlibPackageAndRelationships(ctx, tt.pkgs) c := &goBinaryCataloger{stdlibSymbols: make(map[file.Coordinates][]string)}
gotPkgs, gotRels := c.stdlibPackageAndRelationships(ctx, tt.pkgs)
assert.Len(t, gotPkgs, tt.wantPkgs) assert.Len(t, gotPkgs, tt.wantPkgs)
assert.Len(t, gotRels, tt.wantRels) assert.Len(t, gotRels, tt.wantRels)
}) })
@ -137,7 +138,8 @@ func Test_stdlibPackageAndRelationships_values(t *testing.T) {
Type: artifact.DependencyOfRelationship, Type: artifact.DependencyOfRelationship,
} }
gotPkgs, gotRels := stdlibPackageAndRelationships(ctx, []pkg.Package{p}) c := &goBinaryCataloger{stdlibSymbols: make(map[file.Coordinates][]string)}
gotPkgs, gotRels := c.stdlibPackageAndRelationships(ctx, []pkg.Package{p})
require.Len(t, gotPkgs, 1) require.Len(t, gotPkgs, 1)
gotPkg := gotPkgs[0] gotPkg := gotPkgs[0]

View File

@ -1,9 +1,11 @@
package golang package golang
import ( import (
"bytes"
"debug/elf" "debug/elf"
"debug/gosym" "debug/gosym"
"debug/macho" "debug/macho"
"encoding/binary"
"fmt" "fmt"
"io" "io"
"runtime/debug" "runtime/debug"
@ -40,19 +42,113 @@ func getSymbols(r io.ReaderAt) (syms []binarySymbol, err error) {
return nil, fmt.Errorf("unable to parse pclntab: %w", err) return nil, fmt.Errorf("unable to parse pclntab: %w", err)
} }
seen := make(map[string]struct{})
for _, fn := range table.Funcs { for _, fn := range table.Funcs {
if fn.Sym == nil { if fn.Sym == nil {
continue continue
} }
seen[fn.Name] = struct{}{}
syms = append(syms, binarySymbol{ syms = append(syms, binarySymbol{
packagePath: fn.PackageName(), packagePath: fn.PackageName(),
name: fn.Name, name: fn.Name,
}) })
} }
// debug/gosym only exposes top-level functions; functions that the compiler inlined into their
// callers are absent from table.Funcs even though their names are recorded in the pclntab funcname
// table (used to reconstruct inlined frames in tracebacks). Recover those names so that a
// vulnerable-but-inlined function (e.g. a small stdlib wrapper) is still reported as present.
for _, name := range funcNameTable(pclntab) {
if _, ok := seen[name]; ok {
continue
}
pkgPath := packagePathFromSymbolName(name)
if pkgPath == "" {
continue
}
seen[name] = struct{}{}
syms = append(syms, binarySymbol{packagePath: pkgPath, name: name})
}
return syms, nil return syms, nil
} }
// packagePathFromSymbolName derives the owning package import path from a fully qualified symbol name.
// The package path is everything up to the first "." that follows the final "/" — e.g.
// "path/filepath.IsLocal" -> "path/filepath" and "golang.org/x/net/html.(*Tokenizer).Next" ->
// "golang.org/x/net/html". Returns "" when the name has no package-qualifying dot.
func packagePathFromSymbolName(name string) string {
slash := strings.LastIndex(name, "/")
dot := strings.IndexByte(name[slash+1:], '.')
if dot < 0 {
return ""
}
return name[:slash+1+dot]
}
// funcNameTable returns every function name recorded in the pclntab's funcname table, including the
// names of inlined functions that debug/gosym does not expose. It parses the pclntab header for the
// Go 1.16+ layouts; on any unrecognized layout or out-of-bounds offset it returns nil (fail-soft), so
// callers fall back to the debug/gosym function set. See the runtime's pcHeader / moduledata layout.
func funcNameTable(pclntab []byte) []string {
if len(pclntab) < 8 {
return nil
}
magic := binary.LittleEndian.Uint32(pclntab[0:4])
// the field before funcnameOffset is textStart, which exists in the 1.18+ headers but not 1.16/1.17
var hasTextStart bool
switch magic {
case 0xfffffff1, 0xfffffff0: // go1.20+, go1.18/1.19
hasTextStart = true
case 0xfffffffa: // go1.16/1.17
hasTextStart = false
default:
return nil
}
ptrSize := int(pclntab[7])
if ptrSize != 4 && ptrSize != 8 {
return nil
}
readWord := func(idx int) (uint64, bool) {
off := 8 + idx*ptrSize
if off+ptrSize > len(pclntab) {
return 0, false
}
if ptrSize == 8 {
return binary.LittleEndian.Uint64(pclntab[off : off+8]), true
}
return uint64(binary.LittleEndian.Uint32(pclntab[off : off+4])), true
}
// header words after (nfunc, nfiles): [textStart,] funcnameOffset, cuOffset, ...
funcnameIdx := 2
if hasTextStart {
funcnameIdx = 3
}
funcnameOffset, ok1 := readWord(funcnameIdx)
cuOffset, ok2 := readWord(funcnameIdx + 1)
if !ok1 || !ok2 {
return nil
}
start, end := int(funcnameOffset), int(cuOffset)
if start < 0 || end > len(pclntab) || start >= end {
return nil
}
var names []string
for _, raw := range bytes.Split(pclntab[start:end], []byte{0}) {
if len(raw) == 0 {
continue
}
names = append(names, string(raw))
}
return names
}
// readPclntab locates the pclntab and the start address of the text segment within the binary. // readPclntab locates the pclntab and the start address of the text segment within the binary.
func readPclntab(r io.ReaderAt) (pclntab []byte, textStart uint64, err error) { func readPclntab(r io.ReaderAt) (pclntab []byte, textStart uint64, err error) {
ident := make([]byte, 16) ident := make([]byte, 16)
@ -106,11 +202,13 @@ func readPclntab(r io.ReaderAt) (pclntab []byte, textStart uint64, err error) {
// moduleSymbols attributes each extracted symbol to the module that owns it (by longest module path prefix // moduleSymbols attributes each extracted symbol to the module that owns it (by longest module path prefix
// of the symbol's package path) and returns a sorted, deduplicated list of symbol names per module path. // of the symbol's package path) and returns a sorted, deduplicated list of symbol names per module path.
// Symbols from the "main" package are attributed to the main module. Stdlib and runtime symbols are not // Symbols from the "main" package are attributed to the main module. Standard-library symbols (which belong
// attributed to any module. // to no module) are collected separately and returned as the second value so they can be attached to the
func moduleSymbols(symbols []binarySymbol, main *debug.Module, deps []*debug.Module) map[string][]string { // synthetic "stdlib" package. Compiler/runtime-internal symbols that are neither module-owned nor a
// recognizable stdlib import path are dropped.
func moduleSymbols(symbols []binarySymbol, main *debug.Module, deps []*debug.Module) (byModule map[string][]string, stdlib []string) {
if len(symbols) == 0 { if len(symbols) == 0 {
return nil return nil, nil
} }
var modulePaths []string var modulePaths []string
@ -138,6 +236,9 @@ func moduleSymbols(symbols []binarySymbol, main *debug.Module, deps []*debug.Mod
} }
} }
if best == "" { if best == "" {
if pkgPath != "main" && isStandardImportPath(pkgPath) {
stdlib = append(stdlib, sym.name)
}
continue continue
} }
results[best] = append(results[best], sym.name) results[best] = append(results[best], sym.name)
@ -147,6 +248,22 @@ func moduleSymbols(symbols []binarySymbol, main *debug.Module, deps []*debug.Mod
slices.Sort(names) slices.Sort(names)
results[modPath] = slices.Compact(names) results[modPath] = slices.Compact(names)
} }
if len(stdlib) > 0 {
slices.Sort(stdlib)
stdlib = slices.Compact(stdlib)
}
return results return results, stdlib
}
// isStandardImportPath reports whether path is a Go standard-library import path. This mirrors the rule
// the Go toolchain uses: a path is standard if the element before its first slash contains no dot (e.g.
// "net/http", "runtime", "internal/abi"), which distinguishes it from module paths like
// "github.com/foo/bar" whose leading element is a domain name.
func isStandardImportPath(path string) bool {
first := path
if i := strings.Index(path, "/"); i >= 0 {
first = path[:i]
}
return first != "" && !strings.Contains(first, ".")
} }

View File

@ -22,6 +22,7 @@ func Test_moduleSymbols(t *testing.T) {
name string name string
symbols []binarySymbol symbols []binarySymbol
expected map[string][]string expected map[string][]string
expectedStdlib []string
}{ }{
{ {
name: "no symbols", name: "no symbols",
@ -59,12 +60,18 @@ func Test_moduleSymbols(t *testing.T) {
}, },
}, },
{ {
name: "stdlib and runtime symbols are not attributed", name: "stdlib and runtime symbols are collected separately",
symbols: []binarySymbol{ symbols: []binarySymbol{
{packagePath: "runtime", name: "runtime.main"}, {packagePath: "runtime", name: "runtime.main"},
{packagePath: "net/http", name: "net/http.(*Client).Do"}, {packagePath: "net/http", name: "net/http.(*Client).Do"},
{packagePath: "internal/abi", name: "internal/abi.(*Type).Kind"},
}, },
expected: map[string][]string{}, expected: map[string][]string{},
expectedStdlib: []string{
"internal/abi.(*Type).Kind",
"net/http.(*Client).Do",
"runtime.main",
},
}, },
{ {
name: "duplicate symbols are deduplicated", name: "duplicate symbols are deduplicated",
@ -82,7 +89,9 @@ func Test_moduleSymbols(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.expected, moduleSymbols(tt.symbols, mainModule, deps)) gotByModule, gotStdlib := moduleSymbols(tt.symbols, mainModule, deps)
assert.Equal(t, tt.expected, gotByModule)
assert.Equal(t, tt.expectedStdlib, gotStdlib)
}) })
} }
} }