diff --git a/cmd/syft/internal/options/golang.go b/cmd/syft/internal/options/golang.go index 60b6a0a48..688ec7b08 100644 --- a/cmd/syft/internal/options/golang.go +++ b/cmd/syft/internal/options/golang.go @@ -1,6 +1,7 @@ package options import ( + "fmt" "strings" "github.com/anchore/clio" @@ -17,11 +18,12 @@ type golangConfig struct { NoProxy string `json:"no-proxy" yaml:"no-proxy" mapstructure:"no-proxy"` MainModuleVersion golangMainModuleVersionConfig `json:"main-module-version" yaml:"main-module-version" mapstructure:"main-module-version"` UsePackagesLib *bool `json:"use-packages-lib" yaml:"use-packages-lib" mapstructure:"use-packages-lib"` - CaptureSymbols bool `json:"capture-symbols" yaml:"capture-symbols" mapstructure:"capture-symbols"` + CaptureSymbols golang.SymbolScope `json:"capture-symbols" yaml:"capture-symbols" mapstructure:"capture-symbols"` } var _ interface { clio.FieldDescriber + clio.PostLoader } = (*golangConfig)(nil) func (o *golangConfig) DescribeFields(descriptions clio.FieldDescriptionSet) { @@ -40,13 +42,23 @@ if unset this defaults to $GONOPROXY`) always show (devel) as the version. Use these options to control heuristics to guess a more accurate version from the binary.`) descriptions.Add(&o.UsePackagesLib, `use the golang.org/x/tools/go/packages library, which executes golang tooling found on the path in addition to potential network access to get the most accurate results`) - descriptions.Add(&o.CaptureSymbols, `capture function symbols from the binary symbol table (pclntab) and attribute them to the owning module`) + descriptions.Add(&o.CaptureSymbols, `capture function symbols from the binary symbol table (pclntab). valid values are: +"none" (disabled), "stdlib" (only the synthetic stdlib package), and "all" (all module packages plus stdlib)`) descriptions.Add(&o.MainModuleVersion.FromLDFlags, `look for LD flags that appear to be setting a version (e.g. -X main.version=1.0.0)`) descriptions.Add(&o.MainModuleVersion.FromBuildSettings, `use the build settings (e.g. vcs.version & vcs.time) to craft a v0 pseudo version (e.g. v0.0.0-20220308212642-53e6d0aaf6fb) when a more accurate version cannot be found otherwise`) descriptions.Add(&o.MainModuleVersion.FromContents, `search for semver-like strings in the binary contents`) } +func (o *golangConfig) PostLoad() error { + parsed := o.CaptureSymbols.Parse() + if parsed == "" { + return fmt.Errorf("invalid value %q for golang.capture-symbols; valid values are: none, stdlib, all", o.CaptureSymbols) + } + o.CaptureSymbols = parsed + return nil +} + type golangMainModuleVersionConfig struct { FromLDFlags bool `json:"from-ld-flags" yaml:"from-ld-flags" mapstructure:"from-ld-flags"` FromContents bool `json:"from-contents" yaml:"from-contents" mapstructure:"from-contents"` diff --git a/cmd/syft/internal/options/golang_test.go b/cmd/syft/internal/options/golang_test.go new file mode 100644 index 000000000..bb64f1da1 --- /dev/null +++ b/cmd/syft/internal/options/golang_test.go @@ -0,0 +1,57 @@ +package options + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/anchore/syft/syft/pkg/cataloger/golang" +) + +func Test_golangConfig_PostLoad(t *testing.T) { + tests := []struct { + name string + cfg golangConfig + expected golang.SymbolScope + wantErr assert.ErrorAssertionFunc + }{ + { + name: "normalize all", + cfg: golangConfig{CaptureSymbols: "all"}, + expected: golang.SymbolScopeAll, + }, + { + name: "normalize stdlib", + cfg: golangConfig{CaptureSymbols: "stdlib"}, + expected: golang.SymbolScopeStdlib, + }, + { + name: "empty defaults to none", + cfg: golangConfig{CaptureSymbols: ""}, + expected: golang.SymbolScopeNone, + }, + { + name: "error on invalid value", + cfg: golangConfig{CaptureSymbols: "bogus"}, + wantErr: assert.Error, + }, + { + name: "boolean spellings are not valid", + cfg: golangConfig{CaptureSymbols: "true"}, + wantErr: assert.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = assert.NoError + } + err := tt.cfg.PostLoad() + tt.wantErr(t, err) + if err != nil { + return + } + assert.Equal(t, tt.expected, tt.cfg.CaptureSymbols) + }) + } +} diff --git a/internal/capabilities/appconfig.yaml b/internal/capabilities/appconfig.yaml index b42a8ee29..7c9c52b55 100644 --- a/internal/capabilities/appconfig.yaml +++ b/internal/capabilities/appconfig.yaml @@ -12,6 +12,8 @@ application: # AUTO-GENERATED - application-level config keys description: treat DLL claims or on-disk evidence for child packages as DLL claims or on-disk evidence for any parent package - key: dotnet.relax-dll-claims-when-bundling-detected description: show all packages from the deps.json if bundling tooling is present as a dependency (e.g. ILRepack) + - key: golang.capture-symbols + description: 'capture function symbols from the binary symbol table (pclntab). valid values are: "none" (disabled), "stdlib" (only the synthetic stdlib package), and "all" (all module packages plus stdlib)' - key: golang.local-mod-cache-dir description: specify an explicit go mod cache directory, if unset this defaults to $GOPATH/pkg/mod or $HOME/go/pkg/mod - key: golang.local-vendor-dir diff --git a/syft/pkg/cataloger/golang/capabilities.yaml b/syft/pkg/cataloger/golang/capabilities.yaml index b0e55805c..01e24b146 100644 --- a/syft/pkg/cataloger/golang/capabilities.yaml +++ b/syft/pkg/cataloger/golang/capabilities.yaml @@ -24,6 +24,9 @@ configs: # AUTO-GENERATED - config structs and their fields - key: NoProxy description: NoProxy is a list of glob patterns that match go module names that should not be fetched from the go proxy. When not set, syft will use the GOPRIVATE and GONOPROXY env vars. app_key: golang.no-proxy + - key: CaptureSymbols + description: CaptureSymbols controls extracting function symbols from the binary symbol table (pclntab). Valid values are "none" (disabled), "stdlib" (only the synthetic stdlib package), and "all" (all module packages plus stdlib). + app_key: golang.capture-symbols catalogers: - ecosystem: go # MANUAL name: go-module-binary-cataloger # AUTO-GENERATED diff --git a/syft/pkg/cataloger/golang/config.go b/syft/pkg/cataloger/golang/config.go index 04de1a49f..11d1f8bad 100644 --- a/syft/pkg/cataloger/golang/config.go +++ b/syft/pkg/cataloger/golang/config.go @@ -18,6 +18,34 @@ var ( directProxiesOnly = []string{directProxyOnly} ) +// SymbolScope controls which packages get function symbols (from the binary pclntab) attached to their metadata. +type SymbolScope string + +const ( + // SymbolScopeNone disables symbol capture entirely. + SymbolScopeNone SymbolScope = "none" + + // SymbolScopeStdlib captures symbols only for the synthetic "stdlib" package, leaving module packages without symbols. + SymbolScopeStdlib SymbolScope = "stdlib" + + // SymbolScopeAll captures symbols for all module packages as well as the synthetic "stdlib" package. + SymbolScopeAll SymbolScope = "all" +) + +// Parse normalizes a SymbolScope, treating an empty (unset) value as SymbolScopeNone. It returns an empty +// SymbolScope to signal an unrecognized value, which callers validate against. +func (s SymbolScope) Parse() SymbolScope { + switch strings.ToLower(strings.TrimSpace(string(s))) { + case string(SymbolScopeAll): + return SymbolScopeAll + case string(SymbolScopeStdlib): + return SymbolScopeStdlib + case string(SymbolScopeNone), "": + return SymbolScopeNone + } + return "" +} + type CatalogerConfig struct { // SearchLocalModCacheLicenses enables searching for go package licenses in the local GOPATH mod cache. // app-config: golang.search-local-mod-cache-licenses @@ -49,9 +77,10 @@ type CatalogerConfig struct { MainModuleVersion MainModuleVersionConfig `yaml:"main-module-version" json:"main-module-version" mapstructure:"main-module-version"` - // CaptureSymbols enables extracting function symbols from the binary symbol table (pclntab) and attributing them to the owning module. + // CaptureSymbols controls extracting function symbols from the binary symbol table (pclntab). Valid values are + // "none" (disabled), "stdlib" (only the synthetic stdlib package), and "all" (all module packages plus stdlib). // app-config: golang.capture-symbols - CaptureSymbols bool `yaml:"capture-symbols" json:"capture-symbols" mapstructure:"capture-symbols"` + CaptureSymbols SymbolScope `yaml:"capture-symbols" json:"capture-symbols" mapstructure:"capture-symbols"` // Whether to use the golang.org/x/tools/go/packages, which executes golang tooling found on the path in addition to potential network access UsePackagesLib bool `json:"use-packages-lib" yaml:"use-packages-lib" mapstructure:"use-packages-lib"` @@ -80,7 +109,7 @@ func DefaultCatalogerConfig() CatalogerConfig { UsePackagesLib: true, MainModuleVersion: DefaultMainModuleVersionConfig(), LocalModCacheDir: defaultGoModDir(), - CaptureSymbols: false, + CaptureSymbols: SymbolScopeNone, } // first process the proxy settings @@ -189,7 +218,7 @@ func (g CatalogerConfig) WithMainModuleVersion(input MainModuleVersionConfig) Ca return g } -func (g CatalogerConfig) WithCaptureSymbols(input bool) CatalogerConfig { +func (g CatalogerConfig) WithCaptureSymbols(input SymbolScope) CatalogerConfig { g.CaptureSymbols = input return g } diff --git a/syft/pkg/cataloger/golang/config_test.go b/syft/pkg/cataloger/golang/config_test.go index aa3e7a712..e20f9d47b 100644 --- a/syft/pkg/cataloger/golang/config_test.go +++ b/syft/pkg/cataloger/golang/config_test.go @@ -58,7 +58,7 @@ func Test_Config(t *testing.T) { NoProxy: []string{"my.private", "no.proxy"}, MainModuleVersion: DefaultMainModuleVersionConfig(), UsePackagesLib: true, - CaptureSymbols: false, + CaptureSymbols: SymbolScopeNone, }, }, { @@ -87,7 +87,7 @@ func Test_Config(t *testing.T) { NoProxy: []string{"alt.no.proxy"}, MainModuleVersion: DefaultMainModuleVersionConfig(), UsePackagesLib: true, - CaptureSymbols: false, + CaptureSymbols: SymbolScopeNone, }, }, } @@ -114,6 +114,29 @@ func Test_Config(t *testing.T) { } } +func Test_SymbolScope_Parse(t *testing.T) { + tests := []struct { + input string + expected SymbolScope + }{ + {"all", SymbolScopeAll}, + {"ALL", SymbolScopeAll}, + {" all ", SymbolScopeAll}, + {"stdlib", SymbolScopeStdlib}, + {"Stdlib", SymbolScopeStdlib}, + {"none", SymbolScopeNone}, + {"", SymbolScopeNone}, + {"true", ""}, + {"false", ""}, + {"bogus", ""}, + } + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + assert.Equal(t, test.expected, SymbolScope(test.input).Parse()) + }) + } +} + // restoreCache ensures cache settings are restored after test func restoreCache(t testing.TB) { t.Helper() diff --git a/syft/pkg/cataloger/golang/parse_go_binary.go b/syft/pkg/cataloger/golang/parse_go_binary.go index 80519caf1..2cefc6058 100644 --- a/syft/pkg/cataloger/golang/parse_go_binary.go +++ b/syft/pkg/cataloger/golang/parse_go_binary.go @@ -50,7 +50,7 @@ const devel = "(devel)" type goBinaryCataloger struct { licenseResolver goLicenseResolver mainModuleVersion MainModuleVersionConfig - captureSymbols bool + symbolScope SymbolScope // stdlibSymbols holds the standard-library function symbols discovered per binary (keyed by the // binary's location), populated during parsing and consumed by stdlibProcessor when it builds the @@ -63,7 +63,7 @@ func newGoBinaryCataloger(opts CatalogerConfig) *goBinaryCataloger { return &goBinaryCataloger{ licenseResolver: newGoLicenseResolver(binaryCatalogerName, opts), mainModuleVersion: opts.MainModuleVersion, - captureSymbols: opts.CaptureSymbols, + symbolScope: opts.CaptureSymbols.Parse(), stdlibSymbols: make(map[file.Coordinates][]string), } } @@ -98,7 +98,7 @@ func (c *goBinaryCataloger) parseGoBinary(ctx context.Context, resolver file.Res } defer internal.CloseAndLogError(reader.ReadCloser, reader.RealPath) - mods, errs := scanFile(reader.Location, unionReader, c.captureSymbols) + mods, errs := scanFile(reader.Location, unionReader, c.symbolScope != SymbolScopeNone) var rels []artifact.Relationship for _, mod := range mods { @@ -161,6 +161,12 @@ func (c *goBinaryCataloger) buildGoPkgInfo(ctx context.Context, resolver file.Re symbolsByModule, stdlibSymbols := moduleSymbols(mod.symbols, &mod.Main, mod.Deps) c.recordStdlibSymbols(location.Coordinates, stdlibSymbols) + if c.symbolScope != SymbolScopeAll { + // only the "all" scope attaches per-module symbols; for the "stdlib" scope we keep just the + // recorded stdlib symbols. nil map lookups below then yield nil symbol lists for each module. + symbolsByModule = nil + } + var pkgs []pkg.Package for _, dep := range mod.Deps { if dep == nil { diff --git a/syft/pkg/cataloger/golang/parse_go_binary_test.go b/syft/pkg/cataloger/golang/parse_go_binary_test.go index d0fda31fe..14df4d4d5 100644 --- a/syft/pkg/cataloger/golang/parse_go_binary_test.go +++ b/syft/pkg/cataloger/golang/parse_go_binary_test.go @@ -1422,3 +1422,71 @@ type alwaysErrorReader struct{} func (alwaysErrorReader) Read(_ []byte) (int, error) { return 0, errors.New("read from always error reader") } + +func Test_buildGoPkgInfo_symbolScope(t *testing.T) { + location := file.NewLocationFromCoordinates(file.Coordinates{RealPath: "/a-path", FileSystemID: "layer-id"}) + + // the symbols a binary would carry once scanFile has extracted them: one main-package symbol, one + // dependency symbol, and one standard-library symbol. For the "none" scope scanFile never runs, so the + // build info carries no symbols at all. + populatedSymbols := []binarySymbol{ + {packagePath: "main", name: "main.main"}, + {packagePath: "github.com/foo/bar", name: "github.com/foo/bar.Parse"}, + {packagePath: "net/http", name: "net/http.(*Client).Do"}, + } + + tests := []struct { + name string + scope SymbolScope + symbols []binarySymbol + wantMainSyms []string + wantDepSyms []string + wantStdlibSyms []string + }{ + { + name: "none captures nothing", + scope: SymbolScopeNone, + symbols: nil, + }, + { + name: "stdlib captures only the stdlib package", + scope: SymbolScopeStdlib, + symbols: populatedSymbols, + wantStdlibSyms: []string{"net/http.(*Client).Do"}, + }, + { + name: "all captures module and stdlib packages", + scope: SymbolScopeAll, + symbols: populatedSymbols, + wantMainSyms: []string{"main.main"}, + wantDepSyms: []string{"github.com/foo/bar.Parse"}, + wantStdlibSyms: []string{"net/http.(*Client).Do"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mod := &extendedBuildInfo{ + BuildInfo: &debug.BuildInfo{ + GoVersion: "go1.22.0", + Main: debug.Module{Path: "github.com/anchore/syft", Version: "v1.0.0"}, + Deps: []*debug.Module{{Path: "github.com/foo/bar", Version: "v1.2.3"}}, + }, + arch: "amd64", + symbols: tt.symbols, + } + + c := newGoBinaryCataloger(CatalogerConfig{CaptureSymbols: tt.scope}) + reader, err := unionreader.GetUnionReader(io.NopCloser(strings.NewReader(""))) + require.NoError(t, err) + + mainPkg, pkgs := c.buildGoPkgInfo(context.Background(), fileresolver.Empty{}, location, mod, mod.arch, reader) + require.NotNil(t, mainPkg) + require.Len(t, pkgs, 1) + + assert.Equal(t, tt.wantMainSyms, mainPkg.Metadata.(pkg.GolangBinaryBuildinfoEntry).Symbols, "main module symbols") + assert.Equal(t, tt.wantDepSyms, pkgs[0].Metadata.(pkg.GolangBinaryBuildinfoEntry).Symbols, "dependency symbols") + assert.Equal(t, tt.wantStdlibSyms, c.stdlibSymbolsFor(location.Coordinates), "recorded stdlib symbols") + }) + } +} diff --git a/syft/pkg/golang.go b/syft/pkg/golang.go index eefe5e4ab..1a119e8dc 100644 --- a/syft/pkg/golang.go +++ b/syft/pkg/golang.go @@ -25,7 +25,9 @@ type GolangBinaryBuildinfoEntry struct { // Symbols are the fully qualified function symbols from this module that are compiled into the binary // (e.g., "github.com/foo/bar.(*Type).Method"), extracted from the binary symbol table (pclntab). - // Only captured when the golang cataloger is configured to capture symbols. + // Populated only when the golang cataloger's capture-symbols scope covers this package: the "all" scope + // populates every module package plus the synthetic stdlib package, while the "stdlib" scope populates + // only the stdlib package. Symbols []string `json:"symbols,omitempty"` }