mirror of
https://github.com/anchore/syft.git
synced 2026-07-05 02:28:25 +02:00
wip: stdlib
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
parent
a08457c166
commit
e4957568b6
@ -26,10 +26,11 @@ func NewGoModuleFileCataloger(opts CatalogerConfig) pkg.Cataloger {
|
||||
|
||||
// NewGoModuleBinaryCataloger returns a new cataloger object that searches within binaries built by the go compiler.
|
||||
func NewGoModuleBinaryCataloger(opts CatalogerConfig) pkg.Cataloger {
|
||||
c := newGoBinaryCataloger(opts)
|
||||
return generic.NewCataloger(binaryCatalogerName).
|
||||
WithParserByMimeTypes(
|
||||
newGoBinaryCataloger(opts).parseGoBinary,
|
||||
c.parseGoBinary,
|
||||
mimetype.ExecutableMIMETypeSet.List()...,
|
||||
).
|
||||
WithResolvingProcessors(stdlibProcessor)
|
||||
WithResolvingProcessors(c.stdlibProcessor)
|
||||
}
|
||||
|
||||
@ -58,6 +58,7 @@ func Test_Config(t *testing.T) {
|
||||
NoProxy: []string{"my.private", "no.proxy"},
|
||||
MainModuleVersion: DefaultMainModuleVersionConfig(),
|
||||
UsePackagesLib: true,
|
||||
CaptureSymbols: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -86,6 +87,7 @@ func Test_Config(t *testing.T) {
|
||||
NoProxy: []string{"alt.no.proxy"},
|
||||
MainModuleVersion: DefaultMainModuleVersionConfig(),
|
||||
UsePackagesLib: true,
|
||||
CaptureSymbols: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@ -13,6 +13,7 @@ import (
|
||||
"runtime/debug"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"golang.org/x/mod/module"
|
||||
@ -50,6 +51,12 @@ type goBinaryCataloger struct {
|
||||
licenseResolver goLicenseResolver
|
||||
mainModuleVersion MainModuleVersionConfig
|
||||
captureSymbols bool
|
||||
|
||||
// stdlibSymbols holds the standard-library function symbols discovered per binary (keyed by the
|
||||
// binary's location), populated during parsing and consumed by stdlibProcessor when it builds the
|
||||
// synthetic "stdlib" package. Guarded by stdlibSymbolsMu because parsers run concurrently.
|
||||
stdlibSymbols map[file.Coordinates][]string
|
||||
stdlibSymbolsMu sync.Mutex
|
||||
}
|
||||
|
||||
func newGoBinaryCataloger(opts CatalogerConfig) *goBinaryCataloger {
|
||||
@ -57,9 +64,30 @@ func newGoBinaryCataloger(opts CatalogerConfig) *goBinaryCataloger {
|
||||
licenseResolver: newGoLicenseResolver(binaryCatalogerName, opts),
|
||||
mainModuleVersion: opts.MainModuleVersion,
|
||||
captureSymbols: opts.CaptureSymbols,
|
||||
stdlibSymbols: make(map[file.Coordinates][]string),
|
||||
}
|
||||
}
|
||||
|
||||
// recordStdlibSymbols merges the standard-library symbols discovered for a binary location so the
|
||||
// stdlib processor can attach them to the synthetic stdlib package.
|
||||
func (c *goBinaryCataloger) recordStdlibSymbols(coord file.Coordinates, symbols []string) {
|
||||
if len(symbols) == 0 {
|
||||
return
|
||||
}
|
||||
c.stdlibSymbolsMu.Lock()
|
||||
defer c.stdlibSymbolsMu.Unlock()
|
||||
merged := append(c.stdlibSymbols[coord], symbols...)
|
||||
slices.Sort(merged)
|
||||
c.stdlibSymbols[coord] = slices.Compact(merged)
|
||||
}
|
||||
|
||||
// stdlibSymbolsFor returns the standard-library symbols recorded for a binary location.
|
||||
func (c *goBinaryCataloger) stdlibSymbolsFor(coord file.Coordinates) []string {
|
||||
c.stdlibSymbolsMu.Lock()
|
||||
defer c.stdlibSymbolsMu.Unlock()
|
||||
return c.stdlibSymbols[coord]
|
||||
}
|
||||
|
||||
// parseGoBinary catalogs packages found in the "buildinfo" section of a binary built by the go compiler.
|
||||
func (c *goBinaryCataloger) parseGoBinary(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
var pkgs []pkg.Package
|
||||
@ -130,7 +158,8 @@ func (c *goBinaryCataloger) buildGoPkgInfo(ctx context.Context, resolver file.Re
|
||||
mod.Main = createMainModuleFromPath(mod)
|
||||
}
|
||||
|
||||
symbolsByModule := moduleSymbols(mod.symbols, &mod.Main, mod.Deps)
|
||||
symbolsByModule, stdlibSymbols := moduleSymbols(mod.symbols, &mod.Main, mod.Deps)
|
||||
c.recordStdlibSymbols(location.Coordinates, stdlibSymbols)
|
||||
|
||||
var pkgs []pkg.Package
|
||||
for _, dep := range mod.Deps {
|
||||
|
||||
@ -12,12 +12,12 @@ import (
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
)
|
||||
|
||||
func stdlibProcessor(ctx context.Context, _ file.Resolver, pkgs []pkg.Package, relationships []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
compilerPkgs, newRelationships := stdlibPackageAndRelationships(ctx, pkgs)
|
||||
func (c *goBinaryCataloger) stdlibProcessor(ctx context.Context, _ file.Resolver, pkgs []pkg.Package, relationships []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
compilerPkgs, newRelationships := c.stdlibPackageAndRelationships(ctx, pkgs)
|
||||
return append(pkgs, compilerPkgs...), append(relationships, newRelationships...), err
|
||||
}
|
||||
|
||||
func stdlibPackageAndRelationships(ctx context.Context, pkgs []pkg.Package) ([]pkg.Package, []artifact.Relationship) {
|
||||
func (c *goBinaryCataloger) stdlibPackageAndRelationships(ctx context.Context, pkgs []pkg.Package) ([]pkg.Package, []artifact.Relationship) {
|
||||
var goCompilerPkgs []pkg.Package
|
||||
var relationships []artifact.Relationship
|
||||
totalLocations := file.NewLocationSet()
|
||||
@ -33,7 +33,7 @@ func stdlibPackageAndRelationships(ctx context.Context, pkgs []pkg.Package) ([]p
|
||||
continue
|
||||
}
|
||||
|
||||
stdLibPkg := newGoStdLib(ctx, mValue.GoCompiledVersion, goPkg.Locations)
|
||||
stdLibPkg := newGoStdLib(ctx, mValue.GoCompiledVersion, goPkg.Locations, c.stdlibSymbolsFor(location.Coordinates))
|
||||
if stdLibPkg == nil {
|
||||
continue
|
||||
}
|
||||
@ -50,7 +50,7 @@ func stdlibPackageAndRelationships(ctx context.Context, pkgs []pkg.Package) ([]p
|
||||
return goCompilerPkgs, relationships
|
||||
}
|
||||
|
||||
func newGoStdLib(ctx context.Context, version string, location file.LocationSet) *pkg.Package {
|
||||
func newGoStdLib(ctx context.Context, version string, location file.LocationSet, symbols []string) *pkg.Package {
|
||||
stdlibCpe, err := generateStdlibCpe(version)
|
||||
if err != nil {
|
||||
return nil
|
||||
@ -66,6 +66,7 @@ func newGoStdLib(ctx context.Context, version string, location file.LocationSet)
|
||||
Type: pkg.GoModulePkg,
|
||||
Metadata: pkg.GolangBinaryBuildinfoEntry{
|
||||
GoCompiledVersion: version,
|
||||
Symbols: symbols,
|
||||
},
|
||||
}
|
||||
goCompilerPkg.SetID()
|
||||
|
||||
@ -88,7 +88,8 @@ func Test_stdlibPackageAndRelationships(t *testing.T) {
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
gotPkgs, gotRels := stdlibPackageAndRelationships(ctx, tt.pkgs)
|
||||
c := &goBinaryCataloger{stdlibSymbols: make(map[file.Coordinates][]string)}
|
||||
gotPkgs, gotRels := c.stdlibPackageAndRelationships(ctx, tt.pkgs)
|
||||
assert.Len(t, gotPkgs, tt.wantPkgs)
|
||||
assert.Len(t, gotRels, tt.wantRels)
|
||||
})
|
||||
@ -137,7 +138,8 @@ func Test_stdlibPackageAndRelationships_values(t *testing.T) {
|
||||
Type: artifact.DependencyOfRelationship,
|
||||
}
|
||||
|
||||
gotPkgs, gotRels := stdlibPackageAndRelationships(ctx, []pkg.Package{p})
|
||||
c := &goBinaryCataloger{stdlibSymbols: make(map[file.Coordinates][]string)}
|
||||
gotPkgs, gotRels := c.stdlibPackageAndRelationships(ctx, []pkg.Package{p})
|
||||
require.Len(t, gotPkgs, 1)
|
||||
|
||||
gotPkg := gotPkgs[0]
|
||||
|
||||
@ -1,9 +1,11 @@
|
||||
package golang
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"debug/elf"
|
||||
"debug/gosym"
|
||||
"debug/macho"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"runtime/debug"
|
||||
@ -40,19 +42,113 @@ func getSymbols(r io.ReaderAt) (syms []binarySymbol, err error) {
|
||||
return nil, fmt.Errorf("unable to parse pclntab: %w", err)
|
||||
}
|
||||
|
||||
seen := make(map[string]struct{})
|
||||
for _, fn := range table.Funcs {
|
||||
if fn.Sym == nil {
|
||||
continue
|
||||
}
|
||||
seen[fn.Name] = struct{}{}
|
||||
syms = append(syms, binarySymbol{
|
||||
packagePath: fn.PackageName(),
|
||||
name: fn.Name,
|
||||
})
|
||||
}
|
||||
|
||||
// debug/gosym only exposes top-level functions; functions that the compiler inlined into their
|
||||
// callers are absent from table.Funcs even though their names are recorded in the pclntab funcname
|
||||
// table (used to reconstruct inlined frames in tracebacks). Recover those names so that a
|
||||
// vulnerable-but-inlined function (e.g. a small stdlib wrapper) is still reported as present.
|
||||
for _, name := range funcNameTable(pclntab) {
|
||||
if _, ok := seen[name]; ok {
|
||||
continue
|
||||
}
|
||||
pkgPath := packagePathFromSymbolName(name)
|
||||
if pkgPath == "" {
|
||||
continue
|
||||
}
|
||||
seen[name] = struct{}{}
|
||||
syms = append(syms, binarySymbol{packagePath: pkgPath, name: name})
|
||||
}
|
||||
|
||||
return syms, nil
|
||||
}
|
||||
|
||||
// packagePathFromSymbolName derives the owning package import path from a fully qualified symbol name.
|
||||
// The package path is everything up to the first "." that follows the final "/" — e.g.
|
||||
// "path/filepath.IsLocal" -> "path/filepath" and "golang.org/x/net/html.(*Tokenizer).Next" ->
|
||||
// "golang.org/x/net/html". Returns "" when the name has no package-qualifying dot.
|
||||
func packagePathFromSymbolName(name string) string {
|
||||
slash := strings.LastIndex(name, "/")
|
||||
dot := strings.IndexByte(name[slash+1:], '.')
|
||||
if dot < 0 {
|
||||
return ""
|
||||
}
|
||||
return name[:slash+1+dot]
|
||||
}
|
||||
|
||||
// funcNameTable returns every function name recorded in the pclntab's funcname table, including the
|
||||
// names of inlined functions that debug/gosym does not expose. It parses the pclntab header for the
|
||||
// Go 1.16+ layouts; on any unrecognized layout or out-of-bounds offset it returns nil (fail-soft), so
|
||||
// callers fall back to the debug/gosym function set. See the runtime's pcHeader / moduledata layout.
|
||||
func funcNameTable(pclntab []byte) []string {
|
||||
if len(pclntab) < 8 {
|
||||
return nil
|
||||
}
|
||||
|
||||
magic := binary.LittleEndian.Uint32(pclntab[0:4])
|
||||
// the field before funcnameOffset is textStart, which exists in the 1.18+ headers but not 1.16/1.17
|
||||
var hasTextStart bool
|
||||
switch magic {
|
||||
case 0xfffffff1, 0xfffffff0: // go1.20+, go1.18/1.19
|
||||
hasTextStart = true
|
||||
case 0xfffffffa: // go1.16/1.17
|
||||
hasTextStart = false
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
|
||||
ptrSize := int(pclntab[7])
|
||||
if ptrSize != 4 && ptrSize != 8 {
|
||||
return nil
|
||||
}
|
||||
|
||||
readWord := func(idx int) (uint64, bool) {
|
||||
off := 8 + idx*ptrSize
|
||||
if off+ptrSize > len(pclntab) {
|
||||
return 0, false
|
||||
}
|
||||
if ptrSize == 8 {
|
||||
return binary.LittleEndian.Uint64(pclntab[off : off+8]), true
|
||||
}
|
||||
return uint64(binary.LittleEndian.Uint32(pclntab[off : off+4])), true
|
||||
}
|
||||
|
||||
// header words after (nfunc, nfiles): [textStart,] funcnameOffset, cuOffset, ...
|
||||
funcnameIdx := 2
|
||||
if hasTextStart {
|
||||
funcnameIdx = 3
|
||||
}
|
||||
funcnameOffset, ok1 := readWord(funcnameIdx)
|
||||
cuOffset, ok2 := readWord(funcnameIdx + 1)
|
||||
if !ok1 || !ok2 {
|
||||
return nil
|
||||
}
|
||||
|
||||
start, end := int(funcnameOffset), int(cuOffset)
|
||||
if start < 0 || end > len(pclntab) || start >= end {
|
||||
return nil
|
||||
}
|
||||
|
||||
var names []string
|
||||
for _, raw := range bytes.Split(pclntab[start:end], []byte{0}) {
|
||||
if len(raw) == 0 {
|
||||
continue
|
||||
}
|
||||
names = append(names, string(raw))
|
||||
}
|
||||
return names
|
||||
}
|
||||
|
||||
// readPclntab locates the pclntab and the start address of the text segment within the binary.
|
||||
func readPclntab(r io.ReaderAt) (pclntab []byte, textStart uint64, err error) {
|
||||
ident := make([]byte, 16)
|
||||
@ -106,11 +202,13 @@ func readPclntab(r io.ReaderAt) (pclntab []byte, textStart uint64, err error) {
|
||||
|
||||
// moduleSymbols attributes each extracted symbol to the module that owns it (by longest module path prefix
|
||||
// of the symbol's package path) and returns a sorted, deduplicated list of symbol names per module path.
|
||||
// Symbols from the "main" package are attributed to the main module. Stdlib and runtime symbols are not
|
||||
// attributed to any module.
|
||||
func moduleSymbols(symbols []binarySymbol, main *debug.Module, deps []*debug.Module) map[string][]string {
|
||||
// Symbols from the "main" package are attributed to the main module. Standard-library symbols (which belong
|
||||
// to no module) are collected separately and returned as the second value so they can be attached to the
|
||||
// synthetic "stdlib" package. Compiler/runtime-internal symbols that are neither module-owned nor a
|
||||
// recognizable stdlib import path are dropped.
|
||||
func moduleSymbols(symbols []binarySymbol, main *debug.Module, deps []*debug.Module) (byModule map[string][]string, stdlib []string) {
|
||||
if len(symbols) == 0 {
|
||||
return nil
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var modulePaths []string
|
||||
@ -138,6 +236,9 @@ func moduleSymbols(symbols []binarySymbol, main *debug.Module, deps []*debug.Mod
|
||||
}
|
||||
}
|
||||
if best == "" {
|
||||
if pkgPath != "main" && isStandardImportPath(pkgPath) {
|
||||
stdlib = append(stdlib, sym.name)
|
||||
}
|
||||
continue
|
||||
}
|
||||
results[best] = append(results[best], sym.name)
|
||||
@ -147,6 +248,22 @@ func moduleSymbols(symbols []binarySymbol, main *debug.Module, deps []*debug.Mod
|
||||
slices.Sort(names)
|
||||
results[modPath] = slices.Compact(names)
|
||||
}
|
||||
if len(stdlib) > 0 {
|
||||
slices.Sort(stdlib)
|
||||
stdlib = slices.Compact(stdlib)
|
||||
}
|
||||
|
||||
return results
|
||||
return results, stdlib
|
||||
}
|
||||
|
||||
// isStandardImportPath reports whether path is a Go standard-library import path. This mirrors the rule
|
||||
// the Go toolchain uses: a path is standard if the element before its first slash contains no dot (e.g.
|
||||
// "net/http", "runtime", "internal/abi"), which distinguishes it from module paths like
|
||||
// "github.com/foo/bar" whose leading element is a domain name.
|
||||
func isStandardImportPath(path string) bool {
|
||||
first := path
|
||||
if i := strings.Index(path, "/"); i >= 0 {
|
||||
first = path[:i]
|
||||
}
|
||||
return first != "" && !strings.Contains(first, ".")
|
||||
}
|
||||
|
||||
@ -19,9 +19,10 @@ func Test_moduleSymbols(t *testing.T) {
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
symbols []binarySymbol
|
||||
expected map[string][]string
|
||||
name string
|
||||
symbols []binarySymbol
|
||||
expected map[string][]string
|
||||
expectedStdlib []string
|
||||
}{
|
||||
{
|
||||
name: "no symbols",
|
||||
@ -59,12 +60,18 @@ func Test_moduleSymbols(t *testing.T) {
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "stdlib and runtime symbols are not attributed",
|
||||
name: "stdlib and runtime symbols are collected separately",
|
||||
symbols: []binarySymbol{
|
||||
{packagePath: "runtime", name: "runtime.main"},
|
||||
{packagePath: "net/http", name: "net/http.(*Client).Do"},
|
||||
{packagePath: "internal/abi", name: "internal/abi.(*Type).Kind"},
|
||||
},
|
||||
expected: map[string][]string{},
|
||||
expectedStdlib: []string{
|
||||
"internal/abi.(*Type).Kind",
|
||||
"net/http.(*Client).Do",
|
||||
"runtime.main",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "duplicate symbols are deduplicated",
|
||||
@ -82,7 +89,9 @@ func Test_moduleSymbols(t *testing.T) {
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
assert.Equal(t, tt.expected, moduleSymbols(tt.symbols, mainModule, deps))
|
||||
gotByModule, gotStdlib := moduleSymbols(tt.symbols, mainModule, deps)
|
||||
assert.Equal(t, tt.expected, gotByModule)
|
||||
assert.Equal(t, tt.expectedStdlib, gotStdlib)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user