diff --git a/syft/pkg/cataloger/golang/cataloger.go b/syft/pkg/cataloger/golang/cataloger.go index d52c210c0..f51dc8955 100644 --- a/syft/pkg/cataloger/golang/cataloger.go +++ b/syft/pkg/cataloger/golang/cataloger.go @@ -26,10 +26,11 @@ func NewGoModuleFileCataloger(opts CatalogerConfig) pkg.Cataloger { // NewGoModuleBinaryCataloger returns a new cataloger object that searches within binaries built by the go compiler. func NewGoModuleBinaryCataloger(opts CatalogerConfig) pkg.Cataloger { + c := newGoBinaryCataloger(opts) return generic.NewCataloger(binaryCatalogerName). WithParserByMimeTypes( - newGoBinaryCataloger(opts).parseGoBinary, + c.parseGoBinary, mimetype.ExecutableMIMETypeSet.List()..., ). - WithResolvingProcessors(stdlibProcessor) + WithResolvingProcessors(c.stdlibProcessor) } diff --git a/syft/pkg/cataloger/golang/config_test.go b/syft/pkg/cataloger/golang/config_test.go index 4b563e656..9d76e9d3b 100644 --- a/syft/pkg/cataloger/golang/config_test.go +++ b/syft/pkg/cataloger/golang/config_test.go @@ -58,6 +58,7 @@ func Test_Config(t *testing.T) { NoProxy: []string{"my.private", "no.proxy"}, MainModuleVersion: DefaultMainModuleVersionConfig(), UsePackagesLib: true, + CaptureSymbols: true, }, }, { @@ -86,6 +87,7 @@ func Test_Config(t *testing.T) { NoProxy: []string{"alt.no.proxy"}, MainModuleVersion: DefaultMainModuleVersionConfig(), UsePackagesLib: true, + CaptureSymbols: true, }, }, } diff --git a/syft/pkg/cataloger/golang/parse_go_binary.go b/syft/pkg/cataloger/golang/parse_go_binary.go index c1ade2e35..8312d64de 100644 --- a/syft/pkg/cataloger/golang/parse_go_binary.go +++ b/syft/pkg/cataloger/golang/parse_go_binary.go @@ -13,6 +13,7 @@ import ( "runtime/debug" "slices" "strings" + "sync" "time" "golang.org/x/mod/module" @@ -50,6 +51,12 @@ type goBinaryCataloger struct { licenseResolver goLicenseResolver mainModuleVersion MainModuleVersionConfig captureSymbols bool + + // stdlibSymbols holds the standard-library function symbols discovered per binary (keyed by the + // binary's location), populated during parsing and consumed by stdlibProcessor when it builds the + // synthetic "stdlib" package. Guarded by stdlibSymbolsMu because parsers run concurrently. + stdlibSymbols map[file.Coordinates][]string + stdlibSymbolsMu sync.Mutex } func newGoBinaryCataloger(opts CatalogerConfig) *goBinaryCataloger { @@ -57,9 +64,30 @@ func newGoBinaryCataloger(opts CatalogerConfig) *goBinaryCataloger { licenseResolver: newGoLicenseResolver(binaryCatalogerName, opts), mainModuleVersion: opts.MainModuleVersion, captureSymbols: opts.CaptureSymbols, + stdlibSymbols: make(map[file.Coordinates][]string), } } +// recordStdlibSymbols merges the standard-library symbols discovered for a binary location so the +// stdlib processor can attach them to the synthetic stdlib package. +func (c *goBinaryCataloger) recordStdlibSymbols(coord file.Coordinates, symbols []string) { + if len(symbols) == 0 { + return + } + c.stdlibSymbolsMu.Lock() + defer c.stdlibSymbolsMu.Unlock() + merged := append(c.stdlibSymbols[coord], symbols...) + slices.Sort(merged) + c.stdlibSymbols[coord] = slices.Compact(merged) +} + +// stdlibSymbolsFor returns the standard-library symbols recorded for a binary location. +func (c *goBinaryCataloger) stdlibSymbolsFor(coord file.Coordinates) []string { + c.stdlibSymbolsMu.Lock() + defer c.stdlibSymbolsMu.Unlock() + return c.stdlibSymbols[coord] +} + // parseGoBinary catalogs packages found in the "buildinfo" section of a binary built by the go compiler. func (c *goBinaryCataloger) parseGoBinary(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { var pkgs []pkg.Package @@ -130,7 +158,8 @@ func (c *goBinaryCataloger) buildGoPkgInfo(ctx context.Context, resolver file.Re mod.Main = createMainModuleFromPath(mod) } - symbolsByModule := moduleSymbols(mod.symbols, &mod.Main, mod.Deps) + symbolsByModule, stdlibSymbols := moduleSymbols(mod.symbols, &mod.Main, mod.Deps) + c.recordStdlibSymbols(location.Coordinates, stdlibSymbols) var pkgs []pkg.Package for _, dep := range mod.Deps { diff --git a/syft/pkg/cataloger/golang/stdlib_package.go b/syft/pkg/cataloger/golang/stdlib_package.go index b1c7ea67a..9205a781e 100644 --- a/syft/pkg/cataloger/golang/stdlib_package.go +++ b/syft/pkg/cataloger/golang/stdlib_package.go @@ -12,12 +12,12 @@ import ( "github.com/anchore/syft/syft/pkg" ) -func stdlibProcessor(ctx context.Context, _ file.Resolver, pkgs []pkg.Package, relationships []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) { - compilerPkgs, newRelationships := stdlibPackageAndRelationships(ctx, pkgs) +func (c *goBinaryCataloger) stdlibProcessor(ctx context.Context, _ file.Resolver, pkgs []pkg.Package, relationships []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) { + compilerPkgs, newRelationships := c.stdlibPackageAndRelationships(ctx, pkgs) return append(pkgs, compilerPkgs...), append(relationships, newRelationships...), err } -func stdlibPackageAndRelationships(ctx context.Context, pkgs []pkg.Package) ([]pkg.Package, []artifact.Relationship) { +func (c *goBinaryCataloger) stdlibPackageAndRelationships(ctx context.Context, pkgs []pkg.Package) ([]pkg.Package, []artifact.Relationship) { var goCompilerPkgs []pkg.Package var relationships []artifact.Relationship totalLocations := file.NewLocationSet() @@ -33,7 +33,7 @@ func stdlibPackageAndRelationships(ctx context.Context, pkgs []pkg.Package) ([]p continue } - stdLibPkg := newGoStdLib(ctx, mValue.GoCompiledVersion, goPkg.Locations) + stdLibPkg := newGoStdLib(ctx, mValue.GoCompiledVersion, goPkg.Locations, c.stdlibSymbolsFor(location.Coordinates)) if stdLibPkg == nil { continue } @@ -50,7 +50,7 @@ func stdlibPackageAndRelationships(ctx context.Context, pkgs []pkg.Package) ([]p return goCompilerPkgs, relationships } -func newGoStdLib(ctx context.Context, version string, location file.LocationSet) *pkg.Package { +func newGoStdLib(ctx context.Context, version string, location file.LocationSet, symbols []string) *pkg.Package { stdlibCpe, err := generateStdlibCpe(version) if err != nil { return nil @@ -66,6 +66,7 @@ func newGoStdLib(ctx context.Context, version string, location file.LocationSet) Type: pkg.GoModulePkg, Metadata: pkg.GolangBinaryBuildinfoEntry{ GoCompiledVersion: version, + Symbols: symbols, }, } goCompilerPkg.SetID() diff --git a/syft/pkg/cataloger/golang/stdlib_package_test.go b/syft/pkg/cataloger/golang/stdlib_package_test.go index 395f626cd..3c4e8ff75 100644 --- a/syft/pkg/cataloger/golang/stdlib_package_test.go +++ b/syft/pkg/cataloger/golang/stdlib_package_test.go @@ -88,7 +88,8 @@ func Test_stdlibPackageAndRelationships(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - gotPkgs, gotRels := stdlibPackageAndRelationships(ctx, tt.pkgs) + c := &goBinaryCataloger{stdlibSymbols: make(map[file.Coordinates][]string)} + gotPkgs, gotRels := c.stdlibPackageAndRelationships(ctx, tt.pkgs) assert.Len(t, gotPkgs, tt.wantPkgs) assert.Len(t, gotRels, tt.wantRels) }) @@ -137,7 +138,8 @@ func Test_stdlibPackageAndRelationships_values(t *testing.T) { Type: artifact.DependencyOfRelationship, } - gotPkgs, gotRels := stdlibPackageAndRelationships(ctx, []pkg.Package{p}) + c := &goBinaryCataloger{stdlibSymbols: make(map[file.Coordinates][]string)} + gotPkgs, gotRels := c.stdlibPackageAndRelationships(ctx, []pkg.Package{p}) require.Len(t, gotPkgs, 1) gotPkg := gotPkgs[0] diff --git a/syft/pkg/cataloger/golang/symbols.go b/syft/pkg/cataloger/golang/symbols.go index 39353151c..1e2ef3f0a 100644 --- a/syft/pkg/cataloger/golang/symbols.go +++ b/syft/pkg/cataloger/golang/symbols.go @@ -1,9 +1,11 @@ package golang import ( + "bytes" "debug/elf" "debug/gosym" "debug/macho" + "encoding/binary" "fmt" "io" "runtime/debug" @@ -40,19 +42,113 @@ func getSymbols(r io.ReaderAt) (syms []binarySymbol, err error) { return nil, fmt.Errorf("unable to parse pclntab: %w", err) } + seen := make(map[string]struct{}) for _, fn := range table.Funcs { if fn.Sym == nil { continue } + seen[fn.Name] = struct{}{} syms = append(syms, binarySymbol{ packagePath: fn.PackageName(), name: fn.Name, }) } + // debug/gosym only exposes top-level functions; functions that the compiler inlined into their + // callers are absent from table.Funcs even though their names are recorded in the pclntab funcname + // table (used to reconstruct inlined frames in tracebacks). Recover those names so that a + // vulnerable-but-inlined function (e.g. a small stdlib wrapper) is still reported as present. + for _, name := range funcNameTable(pclntab) { + if _, ok := seen[name]; ok { + continue + } + pkgPath := packagePathFromSymbolName(name) + if pkgPath == "" { + continue + } + seen[name] = struct{}{} + syms = append(syms, binarySymbol{packagePath: pkgPath, name: name}) + } + return syms, nil } +// packagePathFromSymbolName derives the owning package import path from a fully qualified symbol name. +// The package path is everything up to the first "." that follows the final "/" — e.g. +// "path/filepath.IsLocal" -> "path/filepath" and "golang.org/x/net/html.(*Tokenizer).Next" -> +// "golang.org/x/net/html". Returns "" when the name has no package-qualifying dot. +func packagePathFromSymbolName(name string) string { + slash := strings.LastIndex(name, "/") + dot := strings.IndexByte(name[slash+1:], '.') + if dot < 0 { + return "" + } + return name[:slash+1+dot] +} + +// funcNameTable returns every function name recorded in the pclntab's funcname table, including the +// names of inlined functions that debug/gosym does not expose. It parses the pclntab header for the +// Go 1.16+ layouts; on any unrecognized layout or out-of-bounds offset it returns nil (fail-soft), so +// callers fall back to the debug/gosym function set. See the runtime's pcHeader / moduledata layout. +func funcNameTable(pclntab []byte) []string { + if len(pclntab) < 8 { + return nil + } + + magic := binary.LittleEndian.Uint32(pclntab[0:4]) + // the field before funcnameOffset is textStart, which exists in the 1.18+ headers but not 1.16/1.17 + var hasTextStart bool + switch magic { + case 0xfffffff1, 0xfffffff0: // go1.20+, go1.18/1.19 + hasTextStart = true + case 0xfffffffa: // go1.16/1.17 + hasTextStart = false + default: + return nil + } + + ptrSize := int(pclntab[7]) + if ptrSize != 4 && ptrSize != 8 { + return nil + } + + readWord := func(idx int) (uint64, bool) { + off := 8 + idx*ptrSize + if off+ptrSize > len(pclntab) { + return 0, false + } + if ptrSize == 8 { + return binary.LittleEndian.Uint64(pclntab[off : off+8]), true + } + return uint64(binary.LittleEndian.Uint32(pclntab[off : off+4])), true + } + + // header words after (nfunc, nfiles): [textStart,] funcnameOffset, cuOffset, ... + funcnameIdx := 2 + if hasTextStart { + funcnameIdx = 3 + } + funcnameOffset, ok1 := readWord(funcnameIdx) + cuOffset, ok2 := readWord(funcnameIdx + 1) + if !ok1 || !ok2 { + return nil + } + + start, end := int(funcnameOffset), int(cuOffset) + if start < 0 || end > len(pclntab) || start >= end { + return nil + } + + var names []string + for _, raw := range bytes.Split(pclntab[start:end], []byte{0}) { + if len(raw) == 0 { + continue + } + names = append(names, string(raw)) + } + return names +} + // readPclntab locates the pclntab and the start address of the text segment within the binary. func readPclntab(r io.ReaderAt) (pclntab []byte, textStart uint64, err error) { ident := make([]byte, 16) @@ -106,11 +202,13 @@ func readPclntab(r io.ReaderAt) (pclntab []byte, textStart uint64, err error) { // moduleSymbols attributes each extracted symbol to the module that owns it (by longest module path prefix // of the symbol's package path) and returns a sorted, deduplicated list of symbol names per module path. -// Symbols from the "main" package are attributed to the main module. Stdlib and runtime symbols are not -// attributed to any module. -func moduleSymbols(symbols []binarySymbol, main *debug.Module, deps []*debug.Module) map[string][]string { +// Symbols from the "main" package are attributed to the main module. Standard-library symbols (which belong +// to no module) are collected separately and returned as the second value so they can be attached to the +// synthetic "stdlib" package. Compiler/runtime-internal symbols that are neither module-owned nor a +// recognizable stdlib import path are dropped. +func moduleSymbols(symbols []binarySymbol, main *debug.Module, deps []*debug.Module) (byModule map[string][]string, stdlib []string) { if len(symbols) == 0 { - return nil + return nil, nil } var modulePaths []string @@ -138,6 +236,9 @@ func moduleSymbols(symbols []binarySymbol, main *debug.Module, deps []*debug.Mod } } if best == "" { + if pkgPath != "main" && isStandardImportPath(pkgPath) { + stdlib = append(stdlib, sym.name) + } continue } results[best] = append(results[best], sym.name) @@ -147,6 +248,22 @@ func moduleSymbols(symbols []binarySymbol, main *debug.Module, deps []*debug.Mod slices.Sort(names) results[modPath] = slices.Compact(names) } + if len(stdlib) > 0 { + slices.Sort(stdlib) + stdlib = slices.Compact(stdlib) + } - return results + return results, stdlib +} + +// isStandardImportPath reports whether path is a Go standard-library import path. This mirrors the rule +// the Go toolchain uses: a path is standard if the element before its first slash contains no dot (e.g. +// "net/http", "runtime", "internal/abi"), which distinguishes it from module paths like +// "github.com/foo/bar" whose leading element is a domain name. +func isStandardImportPath(path string) bool { + first := path + if i := strings.Index(path, "/"); i >= 0 { + first = path[:i] + } + return first != "" && !strings.Contains(first, ".") } diff --git a/syft/pkg/cataloger/golang/symbols_test.go b/syft/pkg/cataloger/golang/symbols_test.go index 361f008ec..9c36e408c 100644 --- a/syft/pkg/cataloger/golang/symbols_test.go +++ b/syft/pkg/cataloger/golang/symbols_test.go @@ -19,9 +19,10 @@ func Test_moduleSymbols(t *testing.T) { } tests := []struct { - name string - symbols []binarySymbol - expected map[string][]string + name string + symbols []binarySymbol + expected map[string][]string + expectedStdlib []string }{ { name: "no symbols", @@ -59,12 +60,18 @@ func Test_moduleSymbols(t *testing.T) { }, }, { - name: "stdlib and runtime symbols are not attributed", + name: "stdlib and runtime symbols are collected separately", symbols: []binarySymbol{ {packagePath: "runtime", name: "runtime.main"}, {packagePath: "net/http", name: "net/http.(*Client).Do"}, + {packagePath: "internal/abi", name: "internal/abi.(*Type).Kind"}, }, expected: map[string][]string{}, + expectedStdlib: []string{ + "internal/abi.(*Type).Kind", + "net/http.(*Client).Do", + "runtime.main", + }, }, { name: "duplicate symbols are deduplicated", @@ -82,7 +89,9 @@ func Test_moduleSymbols(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - assert.Equal(t, tt.expected, moduleSymbols(tt.symbols, mainModule, deps)) + gotByModule, gotStdlib := moduleSymbols(tt.symbols, mainModule, deps) + assert.Equal(t, tt.expected, gotByModule) + assert.Equal(t, tt.expectedStdlib, gotStdlib) }) } }