diff --git a/cmd/syft/internal/options/catalog.go b/cmd/syft/internal/options/catalog.go index 1b74df358..9a700aff6 100644 --- a/cmd/syft/internal/options/catalog.go +++ b/cmd/syft/internal/options/catalog.go @@ -193,7 +193,8 @@ func (cfg Catalog) ToPackagesConfig() pkgcataloging.Config { WithFromBuildSettings(cfg.Golang.MainModuleVersion.FromBuildSettings). WithFromLDFlags(cfg.Golang.MainModuleVersion.FromLDFlags), ). - WithUsePackagesLib(*multiLevelOption(true, enrichmentEnabled(cfg.Enrich, task.Go, task.Golang), cfg.Golang.UsePackagesLib)), + WithUsePackagesLib(*multiLevelOption(true, enrichmentEnabled(cfg.Enrich, task.Go, task.Golang), cfg.Golang.UsePackagesLib)). + WithCaptureSymbols(cfg.Golang.CaptureSymbols), JavaScript: javascript.DefaultCatalogerConfig(). WithIncludeDevDependencies(*multiLevelOption(false, cfg.JavaScript.IncludeDevDependencies)). WithSearchRemoteLicenses(*multiLevelOption(false, enrichmentEnabled(cfg.Enrich, task.JavaScript, task.Node, task.NPM), cfg.JavaScript.SearchRemoteLicenses)). diff --git a/cmd/syft/internal/options/golang.go b/cmd/syft/internal/options/golang.go index 10539d79c..60b6a0a48 100644 --- a/cmd/syft/internal/options/golang.go +++ b/cmd/syft/internal/options/golang.go @@ -17,6 +17,7 @@ type golangConfig struct { NoProxy string `json:"no-proxy" yaml:"no-proxy" mapstructure:"no-proxy"` MainModuleVersion golangMainModuleVersionConfig `json:"main-module-version" yaml:"main-module-version" mapstructure:"main-module-version"` UsePackagesLib *bool `json:"use-packages-lib" yaml:"use-packages-lib" mapstructure:"use-packages-lib"` + CaptureSymbols bool `json:"capture-symbols" yaml:"capture-symbols" mapstructure:"capture-symbols"` } var _ interface { @@ -39,8 +40,9 @@ if unset this defaults to $GONOPROXY`) always show (devel) as the version. Use these options to control heuristics to guess a more accurate version from the binary.`) descriptions.Add(&o.UsePackagesLib, `use the golang.org/x/tools/go/packages library, which executes golang tooling found on the path in addition to potential network access to get the most accurate results`) + descriptions.Add(&o.CaptureSymbols, `capture function symbols from the binary symbol table (pclntab) and attribute them to the owning module`) descriptions.Add(&o.MainModuleVersion.FromLDFlags, `look for LD flags that appear to be setting a version (e.g. -X main.version=1.0.0)`) - descriptions.Add(&o.MainModuleVersion.FromBuildSettings, `use the build settings (e.g. vcs.version & vcs.time) to craft a v0 pseudo version + descriptions.Add(&o.MainModuleVersion.FromBuildSettings, `use the build settings (e.g. vcs.version & vcs.time) to craft a v0 pseudo version (e.g. v0.0.0-20220308212642-53e6d0aaf6fb) when a more accurate version cannot be found otherwise`) descriptions.Add(&o.MainModuleVersion.FromContents, `search for semver-like strings in the binary contents`) } @@ -67,5 +69,6 @@ func defaultGolangConfig() golangConfig { FromBuildSettings: def.MainModuleVersion.FromBuildSettings, }, UsePackagesLib: nil, // this defaults to true, which is the API default + CaptureSymbols: def.CaptureSymbols, } } diff --git a/internal/constants.go b/internal/constants.go index 3f27e106c..21046590c 100644 --- a/internal/constants.go +++ b/internal/constants.go @@ -3,16 +3,16 @@ package internal const ( // JSONSchemaVersion is the current schema version output by the JSON encoder // This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment. - JSONSchemaVersion = "16.1.7" + JSONSchemaVersion = "16.1.8" // Changelog // 16.1.0 - reformulated the python pdm fields (added "URL" and removed the unused "path" field). // 16.1.1 - correct elf package osCpe field according to the document of systemd (also add appCpe field) // 16.1.2 - placeholder for 16.1.2 changelog // 16.1.3 - add GGUFFileParts to GGUFFileHeader metadata - // 16.1.4 - add BunLockEntry metadata type for bun.lock support // 16.1.5 - add DenoLockEntry and DenoRemoteLockEntry metadata types for deno.lock support // 16.1.6 - add Dependencies to ElixirMixLockEntry metadata // 16.1.7 - add AppleAppBundleEntry metadata type for the apple app bundle cataloger + // 16.1.8 - add Symbols to GolangBinaryBuildinfoEntry metadata ) diff --git a/syft/pkg/cataloger/golang/config.go b/syft/pkg/cataloger/golang/config.go index c90767275..1558f183b 100644 --- a/syft/pkg/cataloger/golang/config.go +++ b/syft/pkg/cataloger/golang/config.go @@ -49,6 +49,10 @@ type CatalogerConfig struct { MainModuleVersion MainModuleVersionConfig `yaml:"main-module-version" json:"main-module-version" mapstructure:"main-module-version"` + // CaptureSymbols enables extracting function symbols from the binary symbol table (pclntab) and attributing them to the owning module. + // app-config: golang.capture-symbols + CaptureSymbols bool `yaml:"capture-symbols" json:"capture-symbols" mapstructure:"capture-symbols"` + // Whether to use the golang.org/x/tools/go/packages, which executes golang tooling found on the path in addition to potential network access UsePackagesLib bool `json:"use-packages-lib" yaml:"use-packages-lib" mapstructure:"use-packages-lib"` } @@ -76,6 +80,7 @@ func DefaultCatalogerConfig() CatalogerConfig { UsePackagesLib: true, MainModuleVersion: DefaultMainModuleVersionConfig(), LocalModCacheDir: defaultGoModDir(), + CaptureSymbols: true, } // first process the proxy settings @@ -184,6 +189,11 @@ func (g CatalogerConfig) WithMainModuleVersion(input MainModuleVersionConfig) Ca return g } +func (g CatalogerConfig) WithCaptureSymbols(input bool) CatalogerConfig { + g.CaptureSymbols = input + return g +} + func (g CatalogerConfig) WithUsePackagesLib(useLib bool) CatalogerConfig { g.UsePackagesLib = useLib return g diff --git a/syft/pkg/cataloger/golang/package.go b/syft/pkg/cataloger/golang/package.go index f689c1a40..4e1e39a80 100644 --- a/syft/pkg/cataloger/golang/package.go +++ b/syft/pkg/cataloger/golang/package.go @@ -45,7 +45,7 @@ func (c *goBinaryCataloger) newGoBinaryPackage(dep *debug.Module, m pkg.GolangBi return p } -func newBinaryMetadata(dep *debug.Module, mainModule, goVersion, architecture string, buildSettings pkg.KeyValues, cryptoSettings, experiments []string) pkg.GolangBinaryBuildinfoEntry { +func newBinaryMetadata(dep *debug.Module, mainModule, goVersion, architecture string, buildSettings pkg.KeyValues, cryptoSettings, experiments, symbols []string) pkg.GolangBinaryBuildinfoEntry { if dep.Replace != nil { dep = dep.Replace } @@ -58,6 +58,7 @@ func newBinaryMetadata(dep *debug.Module, mainModule, goVersion, architecture st MainModule: mainModule, GoCryptoSettings: cryptoSettings, GoExperiments: experiments, + Symbols: symbols, } } diff --git a/syft/pkg/cataloger/golang/parse_go_binary.go b/syft/pkg/cataloger/golang/parse_go_binary.go index 9cb25337f..c1ade2e35 100644 --- a/syft/pkg/cataloger/golang/parse_go_binary.go +++ b/syft/pkg/cataloger/golang/parse_go_binary.go @@ -49,12 +49,14 @@ const devel = "(devel)" type goBinaryCataloger struct { licenseResolver goLicenseResolver mainModuleVersion MainModuleVersionConfig + captureSymbols bool } func newGoBinaryCataloger(opts CatalogerConfig) *goBinaryCataloger { return &goBinaryCataloger{ licenseResolver: newGoLicenseResolver(binaryCatalogerName, opts), mainModuleVersion: opts.MainModuleVersion, + captureSymbols: opts.CaptureSymbols, } } @@ -68,7 +70,7 @@ func (c *goBinaryCataloger) parseGoBinary(ctx context.Context, resolver file.Res } defer internal.CloseAndLogError(reader.ReadCloser, reader.RealPath) - mods, errs := scanFile(reader.Location, unionReader) + mods, errs := scanFile(reader.Location, unionReader, c.captureSymbols) var rels []artifact.Relationship for _, mod := range mods { @@ -128,6 +130,8 @@ func (c *goBinaryCataloger) buildGoPkgInfo(ctx context.Context, resolver file.Re mod.Main = createMainModuleFromPath(mod) } + symbolsByModule := moduleSymbols(mod.symbols, &mod.Main, mod.Deps) + var pkgs []pkg.Package for _, dep := range mod.Deps { if dep == nil { @@ -147,6 +151,7 @@ func (c *goBinaryCataloger) buildGoPkgInfo(ctx context.Context, resolver file.Re nil, mod.cryptoSettings, experiments, + symbolsByModule[dep.Path], ) p := c.newGoBinaryPackage( @@ -164,7 +169,7 @@ func (c *goBinaryCataloger) buildGoPkgInfo(ctx context.Context, resolver file.Re return nil, pkgs } - main := c.makeGoMainPackage(ctx, resolver, mod, arch, location, reader) + main := c.makeGoMainPackage(ctx, resolver, mod, arch, location, reader, symbolsByModule[mod.Main.Path]) return &main, pkgs } @@ -179,7 +184,7 @@ func missingMainModule(mod *extendedBuildInfo) bool { return mod.Main == moduleFromPartialPackageBuild } -func (c *goBinaryCataloger) makeGoMainPackage(ctx context.Context, resolver file.Resolver, mod *extendedBuildInfo, arch string, location file.Location, reader io.ReadSeekCloser) pkg.Package { +func (c *goBinaryCataloger) makeGoMainPackage(ctx context.Context, resolver file.Resolver, mod *extendedBuildInfo, arch string, location file.Location, reader io.ReadSeekCloser, symbols []string) pkg.Package { gbs := getBuildSettings(mod.Settings) lics := c.licenseResolver.getLicenses(ctx, resolver, mod.Main.Path, mod.Main.Version) gover, experiments := getExperimentsFromVersion(mod.GoVersion) @@ -192,6 +197,7 @@ func (c *goBinaryCataloger) makeGoMainPackage(ctx context.Context, resolver file gbs, mod.cryptoSettings, experiments, + symbols, ) if mod.Main.Version == devel { diff --git a/syft/pkg/cataloger/golang/scan_binary.go b/syft/pkg/cataloger/golang/scan_binary.go index cc41d7190..84fd6d2b2 100644 --- a/syft/pkg/cataloger/golang/scan_binary.go +++ b/syft/pkg/cataloger/golang/scan_binary.go @@ -18,10 +18,11 @@ type extendedBuildInfo struct { *debug.BuildInfo cryptoSettings []string arch string + symbols []binarySymbol } // scanFile scans file to try to report the Go and module versions. -func scanFile(location file.Location, reader unionreader.UnionReader) ([]*extendedBuildInfo, error) { +func scanFile(location file.Location, reader unionreader.UnionReader, captureSymbols bool) ([]*extendedBuildInfo, error) { // NOTE: multiple readers are returned to cover universal binaries, which are files // with more than one binary readers, errs := unionreader.GetReaders(reader) @@ -61,7 +62,18 @@ func scanFile(location file.Location, reader unionreader.UnionReader) ([]*extend } } - builds = append(builds, &extendedBuildInfo{BuildInfo: bi, cryptoSettings: v, arch: arch}) + var symbols []binarySymbol + if captureSymbols { + symbols, err = getSymbols(r) + if err != nil { + log.WithFields("file", location.RealPath, "error", err).Trace("unable to read golang symbol info") + // don't skip this build info. + // we can still catalog packages, even if we can't get the symbol information + errs = unknown.Appendf(errs, location, "unable to read golang symbol info: %w", err) + } + } + + builds = append(builds, &extendedBuildInfo{BuildInfo: bi, cryptoSettings: v, arch: arch, symbols: symbols}) } return builds, errs } diff --git a/syft/pkg/cataloger/golang/symbols.go b/syft/pkg/cataloger/golang/symbols.go new file mode 100644 index 000000000..39353151c --- /dev/null +++ b/syft/pkg/cataloger/golang/symbols.go @@ -0,0 +1,152 @@ +package golang + +import ( + "debug/elf" + "debug/gosym" + "debug/macho" + "fmt" + "io" + "runtime/debug" + "slices" + "strings" +) + +// binarySymbol represents a single function symbol extracted from a go binary's pclntab. +type binarySymbol struct { + // packagePath is the import path of the package that owns the symbol (e.g. "github.com/foo/bar/internal/baz") + packagePath string + + // name is the fully qualified symbol name (e.g. "github.com/foo/bar/internal/baz.(*Type).Method") + name string +} + +// getSymbols extracts all function symbols from the pclntab of a go binary. The pclntab is required by the +// go runtime (for panic tracebacks and GC), so it is present even in binaries built with -ldflags="-s -w". +func getSymbols(r io.ReaderAt) (syms []binarySymbol, err error) { + defer func() { + if r := recover(); r != nil { + // the gosym package can panic on malformed pclntab data + err = fmt.Errorf("recovered from panic while reading pclntab: %v", r) + } + }() + + pclntab, textStart, err := readPclntab(r) + if err != nil { + return nil, err + } + + table, err := gosym.NewTable(nil, gosym.NewLineTable(pclntab, textStart)) + if err != nil { + return nil, fmt.Errorf("unable to parse pclntab: %w", err) + } + + for _, fn := range table.Funcs { + if fn.Sym == nil { + continue + } + syms = append(syms, binarySymbol{ + packagePath: fn.PackageName(), + name: fn.Name, + }) + } + + return syms, nil +} + +// readPclntab locates the pclntab and the start address of the text segment within the binary. +func readPclntab(r io.ReaderAt) (pclntab []byte, textStart uint64, err error) { + ident := make([]byte, 16) + if n, err := r.ReadAt(ident, 0); n < len(ident) || err != nil { + return nil, 0, errUnrecognizedFormat + } + + switch { + case strings.HasPrefix(string(ident), "\x7FELF"): + f, err := elf.NewFile(r) + if err != nil { + return nil, 0, fmt.Errorf("unable to parse ELF binary: %w", err) + } + sect := f.Section(".gopclntab") + if sect == nil { + return nil, 0, fmt.Errorf("no .gopclntab section found") + } + pclntab, err := sect.Data() + if err != nil { + return nil, 0, fmt.Errorf("unable to read .gopclntab section: %w", err) + } + text := f.Section(".text") + if text == nil { + return nil, 0, fmt.Errorf("no .text section found") + } + return pclntab, text.Addr, nil + case strings.HasPrefix(string(ident), "\xFE\xED\xFA") || strings.HasPrefix(string(ident[1:]), "\xFA\xED\xFE"): + f, err := macho.NewFile(r) + if err != nil { + return nil, 0, fmt.Errorf("unable to parse Mach-O binary: %w", err) + } + sect := f.Section("__gopclntab") + if sect == nil { + return nil, 0, fmt.Errorf("no __gopclntab section found") + } + pclntab, err := sect.Data() + if err != nil { + return nil, 0, fmt.Errorf("unable to read __gopclntab section: %w", err) + } + text := f.Section("__text") + if text == nil { + return nil, 0, fmt.Errorf("no __text section found") + } + return pclntab, text.Addr, nil + } + + // note: PE and XCOFF binaries do not place the pclntab in a dedicated section; locating it requires + // walking the symbol table for runtime.pclntab markers, which is not yet supported here + return nil, 0, errUnrecognizedFormat +} + +// moduleSymbols attributes each extracted symbol to the module that owns it (by longest module path prefix +// of the symbol's package path) and returns a sorted, deduplicated list of symbol names per module path. +// Symbols from the "main" package are attributed to the main module. Stdlib and runtime symbols are not +// attributed to any module. +func moduleSymbols(symbols []binarySymbol, main *debug.Module, deps []*debug.Module) map[string][]string { + if len(symbols) == 0 { + return nil + } + + var modulePaths []string + if main != nil && main.Path != "" { + modulePaths = append(modulePaths, main.Path) + } + for _, dep := range deps { + if dep != nil && dep.Path != "" { + modulePaths = append(modulePaths, dep.Path) + } + } + + results := make(map[string][]string) + for _, sym := range symbols { + pkgPath := sym.packagePath + if pkgPath == "main" && main != nil { + // the linker renames the main package's import path to "main" + pkgPath = main.Path + } + + var best string + for _, modPath := range modulePaths { + if len(modPath) > len(best) && (pkgPath == modPath || strings.HasPrefix(pkgPath, modPath+"/")) { + best = modPath + } + } + if best == "" { + continue + } + results[best] = append(results[best], sym.name) + } + + for modPath, names := range results { + slices.Sort(names) + results[modPath] = slices.Compact(names) + } + + return results +} diff --git a/syft/pkg/cataloger/golang/symbols_test.go b/syft/pkg/cataloger/golang/symbols_test.go new file mode 100644 index 000000000..361f008ec --- /dev/null +++ b/syft/pkg/cataloger/golang/symbols_test.go @@ -0,0 +1,118 @@ +package golang + +import ( + "os" + "runtime" + "runtime/debug" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func Test_moduleSymbols(t *testing.T) { + mainModule := &debug.Module{Path: "github.com/someorg/somecli"} + deps := []*debug.Module{ + {Path: "github.com/foo/bar"}, + {Path: "github.com/foo/bar/v2"}, + nil, + } + + tests := []struct { + name string + symbols []binarySymbol + expected map[string][]string + }{ + { + name: "no symbols", + symbols: nil, + expected: nil, + }, + { + name: "attribute symbols by longest module path prefix", + symbols: []binarySymbol{ + {packagePath: "github.com/foo/bar", name: "github.com/foo/bar.Parse"}, + {packagePath: "github.com/foo/bar/internal/util", name: "github.com/foo/bar/internal/util.(*Helper).Do"}, + {packagePath: "github.com/foo/bar/v2", name: "github.com/foo/bar/v2.Parse"}, + }, + expected: map[string][]string{ + "github.com/foo/bar": { + "github.com/foo/bar.Parse", + "github.com/foo/bar/internal/util.(*Helper).Do", + }, + "github.com/foo/bar/v2": { + "github.com/foo/bar/v2.Parse", + }, + }, + }, + { + name: "main package symbols are attributed to the main module", + symbols: []binarySymbol{ + {packagePath: "main", name: "main.main"}, + {packagePath: "github.com/someorg/somecli/cmd", name: "github.com/someorg/somecli/cmd.Execute"}, + }, + expected: map[string][]string{ + "github.com/someorg/somecli": { + "github.com/someorg/somecli/cmd.Execute", + "main.main", + }, + }, + }, + { + name: "stdlib and runtime symbols are not attributed", + symbols: []binarySymbol{ + {packagePath: "runtime", name: "runtime.main"}, + {packagePath: "net/http", name: "net/http.(*Client).Do"}, + }, + expected: map[string][]string{}, + }, + { + name: "duplicate symbols are deduplicated", + symbols: []binarySymbol{ + {packagePath: "github.com/foo/bar", name: "github.com/foo/bar.Parse"}, + {packagePath: "github.com/foo/bar", name: "github.com/foo/bar.Parse"}, + }, + expected: map[string][]string{ + "github.com/foo/bar": { + "github.com/foo/bar.Parse", + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expected, moduleSymbols(tt.symbols, mainModule, deps)) + }) + } +} + +func Test_getSymbols(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("PE binaries are not supported for symbol extraction") + } + + // the test executable is itself a go binary with a pclntab, which makes for a hermetic fixture + exe, err := os.Executable() + require.NoError(t, err) + + f, err := os.Open(exe) + require.NoError(t, err) + defer f.Close() + + symbols, err := getSymbols(f) + require.NoError(t, err) + require.NotEmpty(t, symbols) + + var foundRuntime, foundTesting bool + for _, sym := range symbols { + switch { + case sym.packagePath == "runtime" && sym.name == "runtime.main": + foundRuntime = true + case sym.packagePath == "testing" && sym.name == "testing.tRunner": + foundTesting = true + } + } + assert.True(t, foundRuntime, "expected to find runtime.main symbol") + assert.True(t, foundTesting, "expected to find testing.tRunner symbol") +} diff --git a/syft/pkg/golang.go b/syft/pkg/golang.go index 9656f83b1..eefe5e4ab 100644 --- a/syft/pkg/golang.go +++ b/syft/pkg/golang.go @@ -22,6 +22,11 @@ type GolangBinaryBuildinfoEntry struct { // GoExperiments lists experimental Go features enabled during compilation (e.g., "arenas", "cgocheck2"). GoExperiments []string `json:"goExperiments,omitempty" cyclonedx:"goExperiments"` + + // Symbols are the fully qualified function symbols from this module that are compiled into the binary + // (e.g., "github.com/foo/bar.(*Type).Method"), extracted from the binary symbol table (pclntab). + // Only captured when the golang cataloger is configured to capture symbols. + Symbols []string `json:"symbols,omitempty"` } // GolangModuleEntry represents all captured data for a Golang source scan with go.mod/go.sum