Guess go main module version based on binary contents (#2608)

* guess go main module version based on binary contents

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* add configuration options for golang main module version heuristics

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* fix test setup for go bin cataloger

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* fix unit test

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* fix incorrect test assert ordering

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* handle error from seek

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

---------

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Alex Goodman 2024-02-09 14:52:42 -05:00 committed by GitHub
parent 737c4e44c5
commit 84576b93e1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 309 additions and 94 deletions

View File

@ -691,6 +691,24 @@ golang:
# SYFT_GOLANG_NOPROXY env var
no-proxy: ""
# the go main module version discovered from binaries built with the go compiler will
# always show (devel) as the version. Use these options to control heuristics to guess
# a more accurate version from the binary.
main-module-version:
# look for LD flags that appear to be setting a version (e.g. -X main.version=1.0.0)
# SYFT_GOLANG_MAIN_MODULE_VERSION_FROM_LD_FLAGS env var
from-ld-flags: true
# use the build settings (e.g. vcs.version & vcs.time) to craft a v0 pseudo version
# (e.g. v0.0.0-20220308212642-53e6d0aaf6fb) when a more accurate version cannot be found otherwise.
# SYFT_GOLANG_MAIN_MODULE_VERSION_FROM_BUILD_SETTINGS env var
from-build-settings: true
# search for semver-like strings in the binary contents.
# SYFT_GOLANG_MAIN_MODULE_VERSION_FROM_CONTENTS env var
from-contents: true
java:
maven-url: "https://repo1.maven.org/maven2"
max-parent-recursive-depth: 5

View File

@ -62,6 +62,7 @@ func DefaultCatalog() Catalog {
Scope: source.SquashedScope.String(),
Package: defaultPackageConfig(),
LinuxKernel: defaultLinuxKernelConfig(),
Golang: defaultGolangConfig(),
File: defaultFileConfig(),
Relationships: defaultRelationshipsConfig(),
Source: defaultSourceConfig(),
@ -131,7 +132,13 @@ func (cfg Catalog) ToPackagesConfig() pkgcataloging.Config {
WithLocalModCacheDir(cfg.Golang.LocalModCacheDir).
WithSearchRemoteLicenses(cfg.Golang.SearchRemoteLicenses).
WithProxy(cfg.Golang.Proxy).
WithNoProxy(cfg.Golang.NoProxy),
WithNoProxy(cfg.Golang.NoProxy).
WithMainModuleVersion(
golang.DefaultMainModuleVersionConfig().
WithFromContents(cfg.Golang.MainModuleVersion.FromContents).
WithFromBuildSettings(cfg.Golang.MainModuleVersion.FromBuildSettings).
WithFromLDFlags(cfg.Golang.MainModuleVersion.FromLDFlags),
),
JavaScript: javascript.DefaultCatalogerConfig().
WithSearchRemoteLicenses(cfg.JavaScript.SearchRemoteLicenses).
WithNpmBaseURL(cfg.JavaScript.NpmBaseURL),

View File

@ -1,9 +1,38 @@
package options
import (
"strings"
"github.com/anchore/syft/syft/pkg/cataloger/golang"
)
type golangConfig struct {
SearchLocalModCacheLicenses bool `json:"search-local-mod-cache-licenses" yaml:"search-local-mod-cache-licenses" mapstructure:"search-local-mod-cache-licenses"`
LocalModCacheDir string `json:"local-mod-cache-dir" yaml:"local-mod-cache-dir" mapstructure:"local-mod-cache-dir"`
SearchRemoteLicenses bool `json:"search-remote-licenses" yaml:"search-remote-licenses" mapstructure:"search-remote-licenses"`
Proxy string `json:"proxy" yaml:"proxy" mapstructure:"proxy"`
NoProxy string `json:"no-proxy" yaml:"no-proxy" mapstructure:"no-proxy"`
SearchLocalModCacheLicenses bool `json:"search-local-mod-cache-licenses" yaml:"search-local-mod-cache-licenses" mapstructure:"search-local-mod-cache-licenses"`
LocalModCacheDir string `json:"local-mod-cache-dir" yaml:"local-mod-cache-dir" mapstructure:"local-mod-cache-dir"`
SearchRemoteLicenses bool `json:"search-remote-licenses" yaml:"search-remote-licenses" mapstructure:"search-remote-licenses"`
Proxy string `json:"proxy" yaml:"proxy" mapstructure:"proxy"`
NoProxy string `json:"no-proxy" yaml:"no-proxy" mapstructure:"no-proxy"`
MainModuleVersion golangMainModuleVersionConfig `json:"main-module-version" yaml:"main-module-version" mapstructure:"main-module-version"`
}
type golangMainModuleVersionConfig struct {
FromLDFlags bool `json:"from-ld-flags" yaml:"from-ld-flags" mapstructure:"from-ld-flags"`
FromContents bool `json:"from-contents" yaml:"from-contents" mapstructure:"from-contents"`
FromBuildSettings bool `json:"from-build-settings" yaml:"from-build-settings" mapstructure:"from-build-settings"`
}
func defaultGolangConfig() golangConfig {
def := golang.DefaultCatalogerConfig()
return golangConfig{
SearchLocalModCacheLicenses: def.SearchLocalModCacheLicenses,
LocalModCacheDir: def.LocalModCacheDir,
SearchRemoteLicenses: def.SearchRemoteLicenses,
Proxy: strings.Join(def.Proxies, ","),
NoProxy: strings.Join(def.NoProxy, ","),
MainModuleVersion: golangMainModuleVersionConfig{
FromLDFlags: def.MainModuleVersion.FromLDFlags,
FromContents: def.MainModuleVersion.FromContents,
FromBuildSettings: def.MainModuleVersion.FromBuildSettings,
},
}
}

View File

@ -31,7 +31,9 @@ func Test_packageCatalogerExports(t *testing.T) {
for pkg, expected := range expectAtLeast {
actual, ok := exports[pkg]
require.True(t, ok, pkg)
require.True(t, expected.IsSubset(actual.Names()), pkg)
if !assert.True(t, actual.Names().IsSubset(expected), pkg) {
t.Logf("missing: %s", strset.SymmetricDifference(expected, actual.Names()))
}
}
}

View File

@ -38,12 +38,12 @@ func NewGoModuleFileCataloger(opts CatalogerConfig) pkg.Cataloger {
// NewGoModuleBinaryCataloger returns a new cataloger object that searches within binaries built by the go compiler.
func NewGoModuleBinaryCataloger(opts CatalogerConfig) pkg.Cataloger {
c := goBinaryCataloger{
licenses: newGoLicenses(binaryCatalogerName, opts),
}
return &progressingCataloger{
cataloger: generic.NewCataloger(binaryCatalogerName).
WithParserByMimeTypes(c.parseGoBinary, mimetype.ExecutableMIMETypeSet.List()...),
WithParserByMimeTypes(
newGoBinaryCataloger(opts).parseGoBinary,
mimetype.ExecutableMIMETypeSet.List()...,
),
}
}

View File

@ -20,11 +20,18 @@ var (
)
type CatalogerConfig struct {
SearchLocalModCacheLicenses bool `yaml:"search-local-mod-cache-licenses" json:"search-local-mod-cache-licenses" mapstructure:"search-local-mod-cache-licenses"`
LocalModCacheDir string `yaml:"local-mod-cache-dir" json:"local-mod-cache-dir" mapstructure:"local-mod-cache-dir"`
SearchRemoteLicenses bool `yaml:"search-remote-licenses" json:"search-remote-licenses" mapstructure:"search-remote-licenses"`
Proxies []string `yaml:"proxies,omitempty" json:"proxies,omitempty" mapstructure:"proxies"`
NoProxy []string `yaml:"no-proxy,omitempty" json:"no-proxy,omitempty" mapstructure:"no-proxy"`
SearchLocalModCacheLicenses bool `yaml:"search-local-mod-cache-licenses" json:"search-local-mod-cache-licenses" mapstructure:"search-local-mod-cache-licenses"`
LocalModCacheDir string `yaml:"local-mod-cache-dir" json:"local-mod-cache-dir" mapstructure:"local-mod-cache-dir"`
SearchRemoteLicenses bool `yaml:"search-remote-licenses" json:"search-remote-licenses" mapstructure:"search-remote-licenses"`
Proxies []string `yaml:"proxies,omitempty" json:"proxies,omitempty" mapstructure:"proxies"`
NoProxy []string `yaml:"no-proxy,omitempty" json:"no-proxy,omitempty" mapstructure:"no-proxy"`
MainModuleVersion MainModuleVersionConfig `yaml:"main-module-version" json:"main-module-version" mapstructure:"main-module-version"`
}
type MainModuleVersionConfig struct {
FromLDFlags bool `yaml:"from-ld-flags" json:"from-ld-flags" mapstructure:"from-ld-flags"`
FromContents bool `yaml:"from-contents" json:"from-contents" mapstructure:"from-contents"`
FromBuildSettings bool `yaml:"from-build-settings" json:"from-build-settings" mapstructure:"from-build-settings"`
}
// DefaultCatalogerConfig create a CatalogerConfig with default options, which includes:
@ -32,7 +39,9 @@ type CatalogerConfig struct {
// - setting the default no proxy if none is provided
// - setting the default local module cache dir if none is provided
func DefaultCatalogerConfig() CatalogerConfig {
g := CatalogerConfig{}
g := CatalogerConfig{
MainModuleVersion: DefaultMainModuleVersionConfig(),
}
// first process the proxy settings
if len(g.Proxies) == 0 {
@ -76,6 +85,14 @@ func DefaultCatalogerConfig() CatalogerConfig {
return g
}
func DefaultMainModuleVersionConfig() MainModuleVersionConfig {
return MainModuleVersionConfig{
FromLDFlags: true,
FromContents: true,
FromBuildSettings: true,
}
}
func (g CatalogerConfig) WithSearchLocalModCacheLicenses(input bool) CatalogerConfig {
g.SearchLocalModCacheLicenses = input
return g
@ -112,3 +129,23 @@ func (g CatalogerConfig) WithNoProxy(input string) CatalogerConfig {
g.NoProxy = strings.Split(input, ",")
return g
}
func (g CatalogerConfig) WithMainModuleVersion(input MainModuleVersionConfig) CatalogerConfig {
g.MainModuleVersion = input
return g
}
func (g MainModuleVersionConfig) WithFromLDFlags(input bool) MainModuleVersionConfig {
g.FromLDFlags = input
return g
}
func (g MainModuleVersionConfig) WithFromContents(input bool) MainModuleVersionConfig {
g.FromContents = input
return g
}
func (g MainModuleVersionConfig) WithFromBuildSettings(input bool) MainModuleVersionConfig {
g.FromBuildSettings = input
return g
}

View File

@ -7,7 +7,7 @@ import (
"github.com/stretchr/testify/assert"
)
func Test_Options(t *testing.T) {
func Test_Config(t *testing.T) {
type opts struct {
local bool
cacheDir string
@ -51,6 +51,7 @@ func Test_Options(t *testing.T) {
SearchRemoteLicenses: false,
Proxies: []string{"https://my.proxy"},
NoProxy: []string{"my.private", "no.proxy"},
MainModuleVersion: DefaultMainModuleVersionConfig(),
},
},
{
@ -74,6 +75,7 @@ func Test_Options(t *testing.T) {
SearchRemoteLicenses: true,
Proxies: []string{"https://alt.proxy", "direct"},
NoProxy: []string{"alt.no.proxy"},
MainModuleVersion: DefaultMainModuleVersionConfig(),
},
},
}

View File

@ -17,6 +17,7 @@ import (
"golang.org/x/mod/module"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/unionreader"
@ -37,7 +38,7 @@ var (
// inject the correct version into the main module of the build process
knownBuildFlagPatterns = []*regexp.Regexp{
regexp.MustCompile(`(?m)\.([gG]it)?([bB]uild)?[vV]ersion=(\S+/)*(?P<version>v?\d+.\d+.\d+[-\w]*)`),
regexp.MustCompile(`(?m)\.([gG]it)?([bB]uild)?[vV]er(sion)?=(\S+/)*(?P<version>v?\d+.\d+.\d+[-\w]*)`),
regexp.MustCompile(`(?m)\.([tT]ag)=(\S+/)*(?P<version>v?\d+.\d+.\d+[-\w]*)`),
}
)
@ -45,7 +46,15 @@ var (
const devel = "(devel)"
type goBinaryCataloger struct {
licenses goLicenses
licenses goLicenses
mainModuleVersion MainModuleVersionConfig
}
func newGoBinaryCataloger(opts CatalogerConfig) *goBinaryCataloger {
return &goBinaryCataloger{
licenses: newGoLicenses(binaryCatalogerName, opts),
mainModuleVersion: opts.MainModuleVersion,
}
}
// parseGoBinary catalogs packages found in the "buildinfo" section of a binary built by the go compiler.
@ -61,13 +70,53 @@ func (c *goBinaryCataloger) parseGoBinary(_ context.Context, resolver file.Resol
internal.CloseAndLogError(reader.ReadCloser, reader.RealPath)
for _, mod := range mods {
pkgs = append(pkgs, c.buildGoPkgInfo(resolver, reader.Location, mod, mod.arch)...)
pkgs = append(pkgs, c.buildGoPkgInfo(resolver, reader.Location, mod, mod.arch, unionReader)...)
}
return pkgs, nil, nil
}
func (c *goBinaryCataloger) makeGoMainPackage(resolver file.Resolver, mod *extendedBuildInfo, arch string, location file.Location) pkg.Package {
func (c *goBinaryCataloger) buildGoPkgInfo(resolver file.Resolver, location file.Location, mod *extendedBuildInfo, arch string, reader io.ReadSeekCloser) []pkg.Package {
var pkgs []pkg.Package
if mod == nil {
return pkgs
}
var empty debug.Module
if mod.Main == empty && mod.Path != "" {
mod.Main = createMainModuleFromPath(mod.Path)
}
for _, dep := range mod.Deps {
if dep == nil {
continue
}
p := c.newGoBinaryPackage(
resolver,
dep,
mod.Main.Path,
mod.GoVersion,
arch,
nil,
mod.cryptoSettings,
location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
)
if pkg.IsValid(&p) {
pkgs = append(pkgs, p)
}
}
if mod.Main == empty {
return pkgs
}
main := c.makeGoMainPackage(resolver, mod, arch, location, reader)
pkgs = append(pkgs, main)
return pkgs
}
func (c *goBinaryCataloger) makeGoMainPackage(resolver file.Resolver, mod *extendedBuildInfo, arch string, location file.Location, reader io.ReadSeekCloser) pkg.Package {
gbs := getBuildSettings(mod.Settings)
main := c.newGoBinaryPackage(
resolver,
@ -81,37 +130,17 @@ func (c *goBinaryCataloger) makeGoMainPackage(resolver file.Resolver, mod *exten
)
if main.Version != devel {
// found a full package with a non-development version... return it as is...
return main
}
version, hasVersion := gbs.Get("vcs.revision")
timestamp, hasTimestamp := gbs.Get("vcs.time")
var ldflags string
if metadata, ok := main.Metadata.(pkg.GolangBinaryBuildinfoEntry); ok {
// we've found a specific version from the ldflags! use it as the version.
// why not combine that with the pseudo version (e.g. v1.2.3-0.20210101000000-abcdef123456)?
// short answer: we're assuming that if a specific semver was provided in the ldflags that
// there is a matching vcs tag to match that could be referenced. This assumption could
// be incorrect in terms of the go.mod contents, but is not incorrect in terms of the logical
// version of the package.
ldflags, _ = metadata.BuildSettings.Get("-ldflags")
// we have a package, but the version is "devel"... let's try and find a better answer
var metadata *pkg.GolangBinaryBuildinfoEntry
if v, ok := main.Metadata.(pkg.GolangBinaryBuildinfoEntry); ok {
metadata = &v
}
version := c.findMainModuleVersion(metadata, gbs, reader)
majorVersion, fullVersion := extractVersionFromLDFlags(ldflags)
if fullVersion != "" {
version = fullVersion
} else if hasVersion && hasTimestamp {
//NOTE: err is ignored, because if parsing fails
// we still use the empty Time{} struct to generate an empty date, like 00010101000000
// for consistency with the pseudo-version format: https://go.dev/ref/mod#pseudo-versions
ts, _ := time.Parse(time.RFC3339, timestamp)
if len(version) >= 12 {
version = version[:12]
}
version = module.PseudoVersion(majorVersion, fullVersion, ts, version)
}
if version != "" {
main.Version = version
main.PURL = packageURL(main.Name, main.Version)
@ -122,6 +151,65 @@ func (c *goBinaryCataloger) makeGoMainPackage(resolver file.Resolver, mod *exten
return main
}
var semverPattern = regexp.MustCompile(`\x00(?P<version>v?(\d+\.\d+\.\d+[-\w]*[+\w]*))\x00`)
func (c *goBinaryCataloger) findMainModuleVersion(metadata *pkg.GolangBinaryBuildinfoEntry, gbs pkg.KeyValues, reader io.ReadSeekCloser) string {
vcsVersion, hasVersion := gbs.Get("vcs.revision")
timestamp, hasTimestamp := gbs.Get("vcs.time")
var ldflags, majorVersion, fullVersion string
if c.mainModuleVersion.FromLDFlags && metadata != nil {
// we've found a specific version from the ldflags! use it as the version.
// why not combine that with the pseudo version (e.g. v1.2.3-0.20210101000000-abcdef123456)?
// short answer: we're assuming that if a specific semver was provided in the ldflags that
// there is a matching vcs tag to match that could be referenced. This assumption could
// be incorrect in terms of the go.mod contents, but is not incorrect in terms of the logical
// version of the package.
ldflags, _ = metadata.BuildSettings.Get("-ldflags")
majorVersion, fullVersion = extractVersionFromLDFlags(ldflags)
if fullVersion != "" {
return fullVersion
}
}
// guess the version from pattern matching in the binary (can result in false positives)
if c.mainModuleVersion.FromContents {
_, err := reader.Seek(0, io.SeekStart)
if err != nil {
log.WithFields("error", err).Trace("unable to seek to start of go binary reader")
} else {
contents, err := io.ReadAll(reader)
if err != nil {
log.WithFields("error", err).Trace("unable to read from go binary reader")
} else {
matchMetadata := internal.MatchNamedCaptureGroups(semverPattern, string(contents))
version, ok := matchMetadata["version"]
if ok {
return version
}
}
}
}
// fallback to using the go standard pseudo v0.0.0 version
if c.mainModuleVersion.FromBuildSettings && hasVersion && hasTimestamp {
version := vcsVersion
//NOTE: err is ignored, because if parsing fails
// we still use the empty Time{} struct to generate an empty date, like 00010101000000
// for consistency with the pseudo-version format: https://go.dev/ref/mod#pseudo-versions
ts, _ := time.Parse(time.RFC3339, timestamp)
if len(vcsVersion) >= 12 {
version = vcsVersion[:12]
}
return module.PseudoVersion(majorVersion, fullVersion, ts, version)
}
return ""
}
func extractVersionFromLDFlags(ldflags string) (majorVersion string, fullVersion string) {
if ldflags == "" {
return "", ""
@ -223,43 +311,3 @@ func createMainModuleFromPath(path string) (mod debug.Module) {
mod.Version = devel
return
}
func (c *goBinaryCataloger) buildGoPkgInfo(resolver file.Resolver, location file.Location, mod *extendedBuildInfo, arch string) []pkg.Package {
var pkgs []pkg.Package
if mod == nil {
return pkgs
}
var empty debug.Module
if mod.Main == empty && mod.Path != "" {
mod.Main = createMainModuleFromPath(mod.Path)
}
for _, dep := range mod.Deps {
if dep == nil {
continue
}
p := c.newGoBinaryPackage(
resolver,
dep,
mod.Main.Path,
mod.GoVersion,
arch,
nil,
mod.cryptoSettings,
location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
)
if pkg.IsValid(&p) {
pkgs = append(pkgs, p)
}
}
if mod.Main == empty {
return pkgs
}
main := c.makeGoMainPackage(resolver, mod, arch, location)
pkgs = append(pkgs, main)
return pkgs
}

View File

@ -8,6 +8,7 @@ import (
"path/filepath"
"runtime/debug"
"strconv"
"strings"
"syscall"
"testing"
@ -16,7 +17,9 @@ import (
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/fileresolver"
"github.com/anchore/syft/syft/internal/unionreader"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
)
// make will run the default make target for the given test fixture path
@ -163,9 +166,10 @@ func TestBuildGoPkgInfo(t *testing.T) {
}
tests := []struct {
name string
mod *extendedBuildInfo
expected []pkg.Package
name string
mod *extendedBuildInfo
expected []pkg.Package
binaryContent string
}{
{
name: "package without name",
@ -839,6 +843,69 @@ func TestBuildGoPkgInfo(t *testing.T) {
unmodifiedMain,
},
},
{
name: "parse main mod and replace devel with pattern from binary contents",
mod: &extendedBuildInfo{
BuildInfo: &debug.BuildInfo{
GoVersion: goCompiledVersion,
Main: debug.Module{Path: "github.com/anchore/syft", Version: "(devel)"},
Settings: []debug.BuildSetting{
{Key: "GOARCH", Value: archDetails},
{Key: "GOOS", Value: "darwin"},
{Key: "GOAMD64", Value: "v1"},
{Key: "vcs.time", Value: "2022-10-14T19:54:57Z"}, // important! missing revision
{Key: "-ldflags", Value: `build -ldflags="-w -s -extldflags '-static' -X blah=foobar`},
},
},
cryptoSettings: nil,
arch: archDetails,
},
binaryContent: "\x00v1.0.0-somethingelse+incompatible\x00",
expected: []pkg.Package{
{
Name: "github.com/anchore/syft",
Language: pkg.Go,
Type: pkg.GoModulePkg,
Version: "v1.0.0-somethingelse+incompatible",
PURL: "pkg:golang/github.com/anchore/syft@v1.0.0-somethingelse+incompatible",
Locations: file.NewLocationSet(
file.NewLocationFromCoordinates(
file.Coordinates{
RealPath: "/a-path",
FileSystemID: "layer-id",
},
).WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
),
Metadata: pkg.GolangBinaryBuildinfoEntry{
GoCompiledVersion: goCompiledVersion,
Architecture: archDetails,
BuildSettings: []pkg.KeyValue{
{
Key: "GOARCH",
Value: archDetails,
},
{
Key: "GOOS",
Value: "darwin",
},
{
Key: "GOAMD64",
Value: "v1",
},
{
Key: "vcs.time",
Value: "2022-10-14T19:54:57Z",
},
{
Key: "-ldflags",
Value: `build -ldflags="-w -s -extldflags '-static' -X blah=foobar`,
},
},
MainModule: "github.com/anchore/syft",
},
},
},
},
}
for _, test := range tests {
@ -854,9 +921,14 @@ func TestBuildGoPkgInfo(t *testing.T) {
},
)
c := goBinaryCataloger{}
pkgs := c.buildGoPkgInfo(fileresolver.Empty{}, location, test.mod, test.mod.arch)
assert.Equal(t, test.expected, pkgs)
c := newGoBinaryCataloger(DefaultCatalogerConfig())
reader, err := unionreader.GetUnionReader(io.NopCloser(strings.NewReader(test.binaryContent)))
require.NoError(t, err)
pkgs := c.buildGoPkgInfo(fileresolver.Empty{}, location, test.mod, test.mod.arch, reader)
require.Len(t, pkgs, len(test.expected))
for i, p := range pkgs {
pkgtest.AssertPackagesEqual(t, test.expected[i], p)
}
})
}
}