feat: POC capture lib symbols

Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
Christopher Phillips 2026-06-11 12:04:03 -04:00
parent 148fe572bc
commit a08457c166
No known key found for this signature in database
10 changed files with 318 additions and 10 deletions

View File

@ -193,7 +193,8 @@ func (cfg Catalog) ToPackagesConfig() pkgcataloging.Config {
WithFromBuildSettings(cfg.Golang.MainModuleVersion.FromBuildSettings).
WithFromLDFlags(cfg.Golang.MainModuleVersion.FromLDFlags),
).
WithUsePackagesLib(*multiLevelOption(true, enrichmentEnabled(cfg.Enrich, task.Go, task.Golang), cfg.Golang.UsePackagesLib)),
WithUsePackagesLib(*multiLevelOption(true, enrichmentEnabled(cfg.Enrich, task.Go, task.Golang), cfg.Golang.UsePackagesLib)).
WithCaptureSymbols(cfg.Golang.CaptureSymbols),
JavaScript: javascript.DefaultCatalogerConfig().
WithIncludeDevDependencies(*multiLevelOption(false, cfg.JavaScript.IncludeDevDependencies)).
WithSearchRemoteLicenses(*multiLevelOption(false, enrichmentEnabled(cfg.Enrich, task.JavaScript, task.Node, task.NPM), cfg.JavaScript.SearchRemoteLicenses)).

View File

@ -17,6 +17,7 @@ type golangConfig struct {
NoProxy string `json:"no-proxy" yaml:"no-proxy" mapstructure:"no-proxy"`
MainModuleVersion golangMainModuleVersionConfig `json:"main-module-version" yaml:"main-module-version" mapstructure:"main-module-version"`
UsePackagesLib *bool `json:"use-packages-lib" yaml:"use-packages-lib" mapstructure:"use-packages-lib"`
CaptureSymbols bool `json:"capture-symbols" yaml:"capture-symbols" mapstructure:"capture-symbols"`
}
var _ interface {
@ -39,8 +40,9 @@ if unset this defaults to $GONOPROXY`)
always show (devel) as the version. Use these options to control heuristics to guess
a more accurate version from the binary.`)
descriptions.Add(&o.UsePackagesLib, `use the golang.org/x/tools/go/packages library, which executes golang tooling found on the path in addition to potential network access to get the most accurate results`)
descriptions.Add(&o.CaptureSymbols, `capture function symbols from the binary symbol table (pclntab) and attribute them to the owning module`)
descriptions.Add(&o.MainModuleVersion.FromLDFlags, `look for LD flags that appear to be setting a version (e.g. -X main.version=1.0.0)`)
descriptions.Add(&o.MainModuleVersion.FromBuildSettings, `use the build settings (e.g. vcs.version & vcs.time) to craft a v0 pseudo version
descriptions.Add(&o.MainModuleVersion.FromBuildSettings, `use the build settings (e.g. vcs.version & vcs.time) to craft a v0 pseudo version
(e.g. v0.0.0-20220308212642-53e6d0aaf6fb) when a more accurate version cannot be found otherwise`)
descriptions.Add(&o.MainModuleVersion.FromContents, `search for semver-like strings in the binary contents`)
}
@ -67,5 +69,6 @@ func defaultGolangConfig() golangConfig {
FromBuildSettings: def.MainModuleVersion.FromBuildSettings,
},
UsePackagesLib: nil, // this defaults to true, which is the API default
CaptureSymbols: def.CaptureSymbols,
}
}

View File

@ -3,16 +3,16 @@ package internal
const (
// JSONSchemaVersion is the current schema version output by the JSON encoder
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
JSONSchemaVersion = "16.1.7"
JSONSchemaVersion = "16.1.8"
// Changelog
// 16.1.0 - reformulated the python pdm fields (added "URL" and removed the unused "path" field).
// 16.1.1 - correct elf package osCpe field according to the document of systemd (also add appCpe field)
// 16.1.2 - placeholder for 16.1.2 changelog
// 16.1.3 - add GGUFFileParts to GGUFFileHeader metadata
// 16.1.4 - add BunLockEntry metadata type for bun.lock support
// 16.1.5 - add DenoLockEntry and DenoRemoteLockEntry metadata types for deno.lock support
// 16.1.6 - add Dependencies to ElixirMixLockEntry metadata
// 16.1.7 - add AppleAppBundleEntry metadata type for the apple app bundle cataloger
// 16.1.8 - add Symbols to GolangBinaryBuildinfoEntry metadata
)

View File

@ -49,6 +49,10 @@ type CatalogerConfig struct {
MainModuleVersion MainModuleVersionConfig `yaml:"main-module-version" json:"main-module-version" mapstructure:"main-module-version"`
// CaptureSymbols enables extracting function symbols from the binary symbol table (pclntab) and attributing them to the owning module.
// app-config: golang.capture-symbols
CaptureSymbols bool `yaml:"capture-symbols" json:"capture-symbols" mapstructure:"capture-symbols"`
// Whether to use the golang.org/x/tools/go/packages, which executes golang tooling found on the path in addition to potential network access
UsePackagesLib bool `json:"use-packages-lib" yaml:"use-packages-lib" mapstructure:"use-packages-lib"`
}
@ -76,6 +80,7 @@ func DefaultCatalogerConfig() CatalogerConfig {
UsePackagesLib: true,
MainModuleVersion: DefaultMainModuleVersionConfig(),
LocalModCacheDir: defaultGoModDir(),
CaptureSymbols: true,
}
// first process the proxy settings
@ -184,6 +189,11 @@ func (g CatalogerConfig) WithMainModuleVersion(input MainModuleVersionConfig) Ca
return g
}
func (g CatalogerConfig) WithCaptureSymbols(input bool) CatalogerConfig {
g.CaptureSymbols = input
return g
}
func (g CatalogerConfig) WithUsePackagesLib(useLib bool) CatalogerConfig {
g.UsePackagesLib = useLib
return g

View File

@ -45,7 +45,7 @@ func (c *goBinaryCataloger) newGoBinaryPackage(dep *debug.Module, m pkg.GolangBi
return p
}
func newBinaryMetadata(dep *debug.Module, mainModule, goVersion, architecture string, buildSettings pkg.KeyValues, cryptoSettings, experiments []string) pkg.GolangBinaryBuildinfoEntry {
func newBinaryMetadata(dep *debug.Module, mainModule, goVersion, architecture string, buildSettings pkg.KeyValues, cryptoSettings, experiments, symbols []string) pkg.GolangBinaryBuildinfoEntry {
if dep.Replace != nil {
dep = dep.Replace
}
@ -58,6 +58,7 @@ func newBinaryMetadata(dep *debug.Module, mainModule, goVersion, architecture st
MainModule: mainModule,
GoCryptoSettings: cryptoSettings,
GoExperiments: experiments,
Symbols: symbols,
}
}

View File

@ -49,12 +49,14 @@ const devel = "(devel)"
type goBinaryCataloger struct {
licenseResolver goLicenseResolver
mainModuleVersion MainModuleVersionConfig
captureSymbols bool
}
func newGoBinaryCataloger(opts CatalogerConfig) *goBinaryCataloger {
return &goBinaryCataloger{
licenseResolver: newGoLicenseResolver(binaryCatalogerName, opts),
mainModuleVersion: opts.MainModuleVersion,
captureSymbols: opts.CaptureSymbols,
}
}
@ -68,7 +70,7 @@ func (c *goBinaryCataloger) parseGoBinary(ctx context.Context, resolver file.Res
}
defer internal.CloseAndLogError(reader.ReadCloser, reader.RealPath)
mods, errs := scanFile(reader.Location, unionReader)
mods, errs := scanFile(reader.Location, unionReader, c.captureSymbols)
var rels []artifact.Relationship
for _, mod := range mods {
@ -128,6 +130,8 @@ func (c *goBinaryCataloger) buildGoPkgInfo(ctx context.Context, resolver file.Re
mod.Main = createMainModuleFromPath(mod)
}
symbolsByModule := moduleSymbols(mod.symbols, &mod.Main, mod.Deps)
var pkgs []pkg.Package
for _, dep := range mod.Deps {
if dep == nil {
@ -147,6 +151,7 @@ func (c *goBinaryCataloger) buildGoPkgInfo(ctx context.Context, resolver file.Re
nil,
mod.cryptoSettings,
experiments,
symbolsByModule[dep.Path],
)
p := c.newGoBinaryPackage(
@ -164,7 +169,7 @@ func (c *goBinaryCataloger) buildGoPkgInfo(ctx context.Context, resolver file.Re
return nil, pkgs
}
main := c.makeGoMainPackage(ctx, resolver, mod, arch, location, reader)
main := c.makeGoMainPackage(ctx, resolver, mod, arch, location, reader, symbolsByModule[mod.Main.Path])
return &main, pkgs
}
@ -179,7 +184,7 @@ func missingMainModule(mod *extendedBuildInfo) bool {
return mod.Main == moduleFromPartialPackageBuild
}
func (c *goBinaryCataloger) makeGoMainPackage(ctx context.Context, resolver file.Resolver, mod *extendedBuildInfo, arch string, location file.Location, reader io.ReadSeekCloser) pkg.Package {
func (c *goBinaryCataloger) makeGoMainPackage(ctx context.Context, resolver file.Resolver, mod *extendedBuildInfo, arch string, location file.Location, reader io.ReadSeekCloser, symbols []string) pkg.Package {
gbs := getBuildSettings(mod.Settings)
lics := c.licenseResolver.getLicenses(ctx, resolver, mod.Main.Path, mod.Main.Version)
gover, experiments := getExperimentsFromVersion(mod.GoVersion)
@ -192,6 +197,7 @@ func (c *goBinaryCataloger) makeGoMainPackage(ctx context.Context, resolver file
gbs,
mod.cryptoSettings,
experiments,
symbols,
)
if mod.Main.Version == devel {

View File

@ -18,10 +18,11 @@ type extendedBuildInfo struct {
*debug.BuildInfo
cryptoSettings []string
arch string
symbols []binarySymbol
}
// scanFile scans file to try to report the Go and module versions.
func scanFile(location file.Location, reader unionreader.UnionReader) ([]*extendedBuildInfo, error) {
func scanFile(location file.Location, reader unionreader.UnionReader, captureSymbols bool) ([]*extendedBuildInfo, error) {
// NOTE: multiple readers are returned to cover universal binaries, which are files
// with more than one binary
readers, errs := unionreader.GetReaders(reader)
@ -61,7 +62,18 @@ func scanFile(location file.Location, reader unionreader.UnionReader) ([]*extend
}
}
builds = append(builds, &extendedBuildInfo{BuildInfo: bi, cryptoSettings: v, arch: arch})
var symbols []binarySymbol
if captureSymbols {
symbols, err = getSymbols(r)
if err != nil {
log.WithFields("file", location.RealPath, "error", err).Trace("unable to read golang symbol info")
// don't skip this build info.
// we can still catalog packages, even if we can't get the symbol information
errs = unknown.Appendf(errs, location, "unable to read golang symbol info: %w", err)
}
}
builds = append(builds, &extendedBuildInfo{BuildInfo: bi, cryptoSettings: v, arch: arch, symbols: symbols})
}
return builds, errs
}

View File

@ -0,0 +1,152 @@
package golang
import (
"debug/elf"
"debug/gosym"
"debug/macho"
"fmt"
"io"
"runtime/debug"
"slices"
"strings"
)
// binarySymbol represents a single function symbol extracted from a go binary's pclntab.
type binarySymbol struct {
// packagePath is the import path of the package that owns the symbol (e.g. "github.com/foo/bar/internal/baz")
packagePath string
// name is the fully qualified symbol name (e.g. "github.com/foo/bar/internal/baz.(*Type).Method")
name string
}
// getSymbols extracts all function symbols from the pclntab of a go binary. The pclntab is required by the
// go runtime (for panic tracebacks and GC), so it is present even in binaries built with -ldflags="-s -w".
func getSymbols(r io.ReaderAt) (syms []binarySymbol, err error) {
defer func() {
if r := recover(); r != nil {
// the gosym package can panic on malformed pclntab data
err = fmt.Errorf("recovered from panic while reading pclntab: %v", r)
}
}()
pclntab, textStart, err := readPclntab(r)
if err != nil {
return nil, err
}
table, err := gosym.NewTable(nil, gosym.NewLineTable(pclntab, textStart))
if err != nil {
return nil, fmt.Errorf("unable to parse pclntab: %w", err)
}
for _, fn := range table.Funcs {
if fn.Sym == nil {
continue
}
syms = append(syms, binarySymbol{
packagePath: fn.PackageName(),
name: fn.Name,
})
}
return syms, nil
}
// readPclntab locates the pclntab and the start address of the text segment within the binary.
func readPclntab(r io.ReaderAt) (pclntab []byte, textStart uint64, err error) {
ident := make([]byte, 16)
if n, err := r.ReadAt(ident, 0); n < len(ident) || err != nil {
return nil, 0, errUnrecognizedFormat
}
switch {
case strings.HasPrefix(string(ident), "\x7FELF"):
f, err := elf.NewFile(r)
if err != nil {
return nil, 0, fmt.Errorf("unable to parse ELF binary: %w", err)
}
sect := f.Section(".gopclntab")
if sect == nil {
return nil, 0, fmt.Errorf("no .gopclntab section found")
}
pclntab, err := sect.Data()
if err != nil {
return nil, 0, fmt.Errorf("unable to read .gopclntab section: %w", err)
}
text := f.Section(".text")
if text == nil {
return nil, 0, fmt.Errorf("no .text section found")
}
return pclntab, text.Addr, nil
case strings.HasPrefix(string(ident), "\xFE\xED\xFA") || strings.HasPrefix(string(ident[1:]), "\xFA\xED\xFE"):
f, err := macho.NewFile(r)
if err != nil {
return nil, 0, fmt.Errorf("unable to parse Mach-O binary: %w", err)
}
sect := f.Section("__gopclntab")
if sect == nil {
return nil, 0, fmt.Errorf("no __gopclntab section found")
}
pclntab, err := sect.Data()
if err != nil {
return nil, 0, fmt.Errorf("unable to read __gopclntab section: %w", err)
}
text := f.Section("__text")
if text == nil {
return nil, 0, fmt.Errorf("no __text section found")
}
return pclntab, text.Addr, nil
}
// note: PE and XCOFF binaries do not place the pclntab in a dedicated section; locating it requires
// walking the symbol table for runtime.pclntab markers, which is not yet supported here
return nil, 0, errUnrecognizedFormat
}
// moduleSymbols attributes each extracted symbol to the module that owns it (by longest module path prefix
// of the symbol's package path) and returns a sorted, deduplicated list of symbol names per module path.
// Symbols from the "main" package are attributed to the main module. Stdlib and runtime symbols are not
// attributed to any module.
func moduleSymbols(symbols []binarySymbol, main *debug.Module, deps []*debug.Module) map[string][]string {
if len(symbols) == 0 {
return nil
}
var modulePaths []string
if main != nil && main.Path != "" {
modulePaths = append(modulePaths, main.Path)
}
for _, dep := range deps {
if dep != nil && dep.Path != "" {
modulePaths = append(modulePaths, dep.Path)
}
}
results := make(map[string][]string)
for _, sym := range symbols {
pkgPath := sym.packagePath
if pkgPath == "main" && main != nil {
// the linker renames the main package's import path to "main"
pkgPath = main.Path
}
var best string
for _, modPath := range modulePaths {
if len(modPath) > len(best) && (pkgPath == modPath || strings.HasPrefix(pkgPath, modPath+"/")) {
best = modPath
}
}
if best == "" {
continue
}
results[best] = append(results[best], sym.name)
}
for modPath, names := range results {
slices.Sort(names)
results[modPath] = slices.Compact(names)
}
return results
}

View File

@ -0,0 +1,118 @@
package golang
import (
"os"
"runtime"
"runtime/debug"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func Test_moduleSymbols(t *testing.T) {
mainModule := &debug.Module{Path: "github.com/someorg/somecli"}
deps := []*debug.Module{
{Path: "github.com/foo/bar"},
{Path: "github.com/foo/bar/v2"},
nil,
}
tests := []struct {
name string
symbols []binarySymbol
expected map[string][]string
}{
{
name: "no symbols",
symbols: nil,
expected: nil,
},
{
name: "attribute symbols by longest module path prefix",
symbols: []binarySymbol{
{packagePath: "github.com/foo/bar", name: "github.com/foo/bar.Parse"},
{packagePath: "github.com/foo/bar/internal/util", name: "github.com/foo/bar/internal/util.(*Helper).Do"},
{packagePath: "github.com/foo/bar/v2", name: "github.com/foo/bar/v2.Parse"},
},
expected: map[string][]string{
"github.com/foo/bar": {
"github.com/foo/bar.Parse",
"github.com/foo/bar/internal/util.(*Helper).Do",
},
"github.com/foo/bar/v2": {
"github.com/foo/bar/v2.Parse",
},
},
},
{
name: "main package symbols are attributed to the main module",
symbols: []binarySymbol{
{packagePath: "main", name: "main.main"},
{packagePath: "github.com/someorg/somecli/cmd", name: "github.com/someorg/somecli/cmd.Execute"},
},
expected: map[string][]string{
"github.com/someorg/somecli": {
"github.com/someorg/somecli/cmd.Execute",
"main.main",
},
},
},
{
name: "stdlib and runtime symbols are not attributed",
symbols: []binarySymbol{
{packagePath: "runtime", name: "runtime.main"},
{packagePath: "net/http", name: "net/http.(*Client).Do"},
},
expected: map[string][]string{},
},
{
name: "duplicate symbols are deduplicated",
symbols: []binarySymbol{
{packagePath: "github.com/foo/bar", name: "github.com/foo/bar.Parse"},
{packagePath: "github.com/foo/bar", name: "github.com/foo/bar.Parse"},
},
expected: map[string][]string{
"github.com/foo/bar": {
"github.com/foo/bar.Parse",
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.expected, moduleSymbols(tt.symbols, mainModule, deps))
})
}
}
func Test_getSymbols(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("PE binaries are not supported for symbol extraction")
}
// the test executable is itself a go binary with a pclntab, which makes for a hermetic fixture
exe, err := os.Executable()
require.NoError(t, err)
f, err := os.Open(exe)
require.NoError(t, err)
defer f.Close()
symbols, err := getSymbols(f)
require.NoError(t, err)
require.NotEmpty(t, symbols)
var foundRuntime, foundTesting bool
for _, sym := range symbols {
switch {
case sym.packagePath == "runtime" && sym.name == "runtime.main":
foundRuntime = true
case sym.packagePath == "testing" && sym.name == "testing.tRunner":
foundTesting = true
}
}
assert.True(t, foundRuntime, "expected to find runtime.main symbol")
assert.True(t, foundTesting, "expected to find testing.tRunner symbol")
}

View File

@ -22,6 +22,11 @@ type GolangBinaryBuildinfoEntry struct {
// GoExperiments lists experimental Go features enabled during compilation (e.g., "arenas", "cgocheck2").
GoExperiments []string `json:"goExperiments,omitempty" cyclonedx:"goExperiments"`
// Symbols are the fully qualified function symbols from this module that are compiled into the binary
// (e.g., "github.com/foo/bar.(*Type).Method"), extracted from the binary symbol table (pclntab).
// Only captured when the golang cataloger is configured to capture symbols.
Symbols []string `json:"symbols,omitempty"`
}
// GolangModuleEntry represents all captured data for a Golang source scan with go.mod/go.sum