Unexport types and functions cataloger packages (#2530)

* unexport as many types and functions from cataloger packages as possible

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* capture type and signature information in convention test

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* check that we return pkg.Cataloger from constructors

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

---------

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Alex Goodman 2024-01-24 16:12:46 -05:00 committed by GitHub
parent e0e1c4ba0a
commit 11c0b1c234
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 386 additions and 40 deletions

View File

@ -22,7 +22,7 @@ func DefaultConfig() Config {
return Config{ return Config{
Binary: binary.DefaultCatalogerConfig(), Binary: binary.DefaultCatalogerConfig(),
Golang: golang.DefaultCatalogerConfig(), Golang: golang.DefaultCatalogerConfig(),
LinuxKernel: kernel.DefaultLinuxCatalogerConfig(), LinuxKernel: kernel.DefaultLinuxKernelCatalogerConfig(),
Python: python.DefaultCatalogerConfig(), Python: python.DefaultCatalogerConfig(),
JavaArchive: java.DefaultArchiveCatalogerConfig(), JavaArchive: java.DefaultArchiveCatalogerConfig(),
} }

View File

@ -1,5 +1,5 @@
/* /*
Package binary provides a concrete Cataloger implementations for surfacing possible packages based on signatures found within binary files. Package binary provides a concrete cataloger implementations for surfacing possible packages based on signatures found within binary files.
*/ */
package binary package binary
@ -26,7 +26,7 @@ func DefaultCatalogerConfig() CatalogerConfig {
} }
func NewCataloger(cfg CatalogerConfig) pkg.Cataloger { func NewCataloger(cfg CatalogerConfig) pkg.Cataloger {
return &Cataloger{ return &cataloger{
classifiers: cfg.Classifiers, classifiers: cfg.Classifiers,
} }
} }
@ -40,23 +40,23 @@ func (cfg CatalogerConfig) MarshalJSON() ([]byte, error) {
return json.Marshal(names) return json.Marshal(names)
} }
// Cataloger is the cataloger responsible for surfacing evidence of a very limited set of binary files, // cataloger is the cataloger responsible for surfacing evidence of a very limited set of binary files,
// which have been identified by the classifiers. The Cataloger is _NOT_ a place to catalog any and every // which have been identified by the classifiers. The cataloger is _NOT_ a place to catalog any and every
// binary, but rather the specific set that has been curated to be important, predominantly related to toolchain- // binary, but rather the specific set that has been curated to be important, predominantly related to toolchain-
// related runtimes like Python, Go, Java, or Node. Some exceptions can be made for widely-used binaries such // related runtimes like Python, Go, Java, or Node. Some exceptions can be made for widely-used binaries such
// as busybox. // as busybox.
type Cataloger struct { type cataloger struct {
classifiers []Classifier classifiers []Classifier
} }
// Name returns a string that uniquely describes the Cataloger // Name returns a string that uniquely describes the cataloger
func (c Cataloger) Name() string { func (c cataloger) Name() string {
return catalogerName return catalogerName
} }
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages // Catalog is given an object to resolve file references and content, this function returns any discovered Packages
// after analyzing the catalog source. // after analyzing the catalog source.
func (c Cataloger) Catalog(_ context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { func (c cataloger) Catalog(_ context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {
var packages []pkg.Package var packages []pkg.Package
var relationships []artifact.Relationship var relationships []artifact.Relationship

View File

@ -16,10 +16,6 @@ import (
var _ generic.Parser = parseConanfile var _ generic.Parser = parseConanfile
type Conanfile struct {
Requires []string `toml:"requires"`
}
// parseConanfile is a parser function for conanfile.txt contents, returning all packages discovered. // parseConanfile is a parser function for conanfile.txt contents, returning all packages discovered.
func parseConanfile(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { func parseConanfile(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
r := bufio.NewReader(reader) r := bufio.NewReader(reader)

View File

@ -25,7 +25,7 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/internal/unionreader" "github.com/anchore/syft/syft/pkg/cataloger/internal/unionreader"
) )
const GOARCH = "GOARCH" const goArch = "GOARCH"
var ( var (
// errUnrecognizedFormat is returned when a given executable file doesn't // errUnrecognizedFormat is returned when a given executable file doesn't
@ -154,7 +154,7 @@ func extractVersionFromLDFlags(ldflags string) (majorVersion string, fullVersion
func getGOARCH(settings []debug.BuildSetting) string { func getGOARCH(settings []debug.BuildSetting) string {
for _, s := range settings { for _, s := range settings {
if s.Key == GOARCH { if s.Key == goArch {
return s.Value return s.Value
} }
} }

View File

@ -9,7 +9,7 @@ import (
) )
// NewArchiveCataloger returns a new Java archive cataloger object for detecting packages with archives (jar, war, ear, par, sar, jpi, hpi, and native-image formats) // NewArchiveCataloger returns a new Java archive cataloger object for detecting packages with archives (jar, war, ear, par, sar, jpi, hpi, and native-image formats)
func NewArchiveCataloger(cfg ArchiveCatalogerConfig) *generic.Cataloger { func NewArchiveCataloger(cfg ArchiveCatalogerConfig) pkg.Cataloger {
gap := newGenericArchiveParserAdapter(cfg) gap := newGenericArchiveParserAdapter(cfg)
c := generic.NewCataloger("java-archive-cataloger"). c := generic.NewCataloger("java-archive-cataloger").

View File

@ -92,7 +92,7 @@ type nativeImagePE struct {
header exportPrefixPE header exportPrefixPE
} }
type NativeImageCataloger struct{} type nativeImageCataloger struct{}
const nativeImageCatalogerName = "graalvm-native-image-cataloger" const nativeImageCatalogerName = "graalvm-native-image-cataloger"
const nativeImageSbomSymbol = "sbom" const nativeImageSbomSymbol = "sbom"
@ -104,11 +104,11 @@ const nativeImageMissingExportedDataDirectoryError = "exported data directory is
// NewNativeImageCataloger returns a new Native Image cataloger object. // NewNativeImageCataloger returns a new Native Image cataloger object.
func NewNativeImageCataloger() pkg.Cataloger { func NewNativeImageCataloger() pkg.Cataloger {
return &NativeImageCataloger{} return &nativeImageCataloger{}
} }
// Name returns a string that uniquely describes a native image cataloger // Name returns a string that uniquely describes a native image cataloger
func (c *NativeImageCataloger) Name() string { func (c *nativeImageCataloger) Name() string {
return nativeImageCatalogerName return nativeImageCatalogerName
} }
@ -571,7 +571,7 @@ func fetchPkgs(reader unionreader.UnionReader, filename string) []pkg.Package {
} }
// Catalog attempts to find any native image executables reachable from a resolver. // Catalog attempts to find any native image executables reachable from a resolver.
func (c *NativeImageCataloger) Catalog(_ context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { func (c *nativeImageCataloger) Catalog(_ context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {
var pkgs []pkg.Package var pkgs []pkg.Package
fileMatches, err := resolver.FilesByMIMEType(internal.ExecutableMIMETypeSet.List()...) fileMatches, err := resolver.FilesByMIMEType(internal.ExecutableMIMETypeSet.List()...)
if err != nil { if err != nil {

View File

@ -13,8 +13,8 @@ import (
const gradleLockfileGlob = "**/gradle.lockfile*" const gradleLockfileGlob = "**/gradle.lockfile*"
// LockfileDependency represents a single dependency in the gradle.lockfile file // lockfileDependency represents a single dependency in the gradle.lockfile file
type LockfileDependency struct { type lockfileDependency struct {
Group string Group string
Name string Name string
Version string Version string
@ -27,7 +27,7 @@ func parseGradleLockfile(_ context.Context, _ file.Resolver, _ *generic.Environm
scanner := bufio.NewScanner(reader) scanner := bufio.NewScanner(reader)
// Create slices to hold the dependencies and plugins // Create slices to hold the dependencies and plugins
dependencies := []LockfileDependency{} dependencies := []lockfileDependency{}
// Loop over all lines in the file // Loop over all lines in the file
for scanner.Scan() { for scanner.Scan() {
@ -43,7 +43,7 @@ func parseGradleLockfile(_ context.Context, _ file.Resolver, _ *generic.Environm
// we have a version directly specified // we have a version directly specified
if len(parts) == 3 { if len(parts) == 3 {
// Create a new Dependency struct and add it to the dependencies slice // Create a new Dependency struct and add it to the dependencies slice
dep := LockfileDependency{Group: parts[0], Name: parts[1], Version: parts[2]} dep := lockfileDependency{Group: parts[0], Name: parts[1], Version: parts[2]}
dependencies = append(dependencies, dep) dependencies = append(dependencies, dep)
} }
} }

View File

@ -15,17 +15,17 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic" "github.com/anchore/syft/syft/pkg/cataloger/generic"
) )
var _ pkg.Cataloger = (*LinuxKernelCataloger)(nil) var _ pkg.Cataloger = (*linuxKernelCataloger)(nil)
type LinuxKernelCatalogerConfig struct { type LinuxKernelCatalogerConfig struct {
CatalogModules bool `yaml:"catalog-modules" json:"catalog-modules" mapstructure:"catalog-modules"` CatalogModules bool `yaml:"catalog-modules" json:"catalog-modules" mapstructure:"catalog-modules"`
} }
type LinuxKernelCataloger struct { type linuxKernelCataloger struct {
cfg LinuxKernelCatalogerConfig cfg LinuxKernelCatalogerConfig
} }
func DefaultLinuxCatalogerConfig() LinuxKernelCatalogerConfig { func DefaultLinuxKernelCatalogerConfig() LinuxKernelCatalogerConfig {
return LinuxKernelCatalogerConfig{ return LinuxKernelCatalogerConfig{
CatalogModules: true, CatalogModules: true,
} }
@ -45,17 +45,17 @@ var kernelModuleGlobs = []string{
} }
// NewLinuxKernelCataloger returns a new kernel files cataloger object. // NewLinuxKernelCataloger returns a new kernel files cataloger object.
func NewLinuxKernelCataloger(cfg LinuxKernelCatalogerConfig) *LinuxKernelCataloger { func NewLinuxKernelCataloger(cfg LinuxKernelCatalogerConfig) pkg.Cataloger {
return &LinuxKernelCataloger{ return &linuxKernelCataloger{
cfg: cfg, cfg: cfg,
} }
} }
func (l LinuxKernelCataloger) Name() string { func (l linuxKernelCataloger) Name() string {
return "linux-kernel-cataloger" return "linux-kernel-cataloger"
} }
func (l LinuxKernelCataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { func (l linuxKernelCataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {
var allPackages []pkg.Package var allPackages []pkg.Package
var allRelationships []artifact.Relationship var allRelationships []artifact.Relationship
var errs error var errs error

View File

@ -17,18 +17,18 @@ import (
const catalogerName = "nix-store-cataloger" const catalogerName = "nix-store-cataloger"
// StoreCataloger finds package outputs installed in the Nix store location (/nix/store/*). // storeCataloger finds package outputs installed in the Nix store location (/nix/store/*).
type StoreCataloger struct{} type storeCataloger struct{}
func NewStoreCataloger() pkg.Cataloger { func NewStoreCataloger() pkg.Cataloger {
return &StoreCataloger{} return &storeCataloger{}
} }
func (c *StoreCataloger) Name() string { func (c *storeCataloger) Name() string {
return catalogerName return catalogerName
} }
func (c *StoreCataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { func (c *storeCataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {
// we want to search for only directories, which isn't possible via the stereoscope API, so we need to apply the glob manually on all returned paths // we want to search for only directories, which isn't possible via the stereoscope API, so we need to apply the glob manually on all returned paths
var pkgs []pkg.Package var pkgs []pkg.Package
var filesByPath = make(map[string]*file.LocationSet) var filesByPath = make(map[string]*file.LocationSet)

View File

@ -21,7 +21,7 @@ func DefaultCatalogerConfig() CatalogerConfig {
} }
// NewPackageCataloger returns a new cataloger for python packages referenced from poetry lock files, requirements.txt files, and setup.py files. // NewPackageCataloger returns a new cataloger for python packages referenced from poetry lock files, requirements.txt files, and setup.py files.
func NewPackageCataloger(cfg CatalogerConfig) *generic.Cataloger { func NewPackageCataloger(cfg CatalogerConfig) pkg.Cataloger {
rqp := newRequirementsParser(cfg) rqp := newRequirementsParser(cfg)
return generic.NewCataloger("python-package-cataloger"). return generic.NewCataloger("python-package-cataloger").
WithParserByGlobs(rqp.parseRequirementsTxt, "**/*requirements*.txt"). WithParserByGlobs(rqp.parseRequirementsTxt, "**/*requirements*.txt").

View File

@ -29,11 +29,11 @@ type pipfileLock struct {
VerifySsl bool `json:"verify_ssl"` VerifySsl bool `json:"verify_ssl"`
} `json:"sources"` } `json:"sources"`
} `json:"_meta"` } `json:"_meta"`
Default map[string]Dependency `json:"default"` Default map[string]pipfileLockDependency `json:"default"`
Develop map[string]Dependency `json:"develop"` Develop map[string]pipfileLockDependency `json:"develop"`
} }
type Dependency struct { type pipfileLockDependency struct {
Hashes []string `json:"hashes"` Hashes []string `json:"hashes"`
Version string `json:"version"` Version string `json:"version"`
Index string `json:"index"` Index string `json:"index"`

View File

@ -0,0 +1,350 @@
package integration
import (
"fmt"
"go/ast"
"go/parser"
"go/token"
"go/types"
"os"
"path/filepath"
"reflect"
"strings"
"testing"
"github.com/bmatcuk/doublestar/v4"
"github.com/scylladb/go-set/strset"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func Test_packageCatalogerExports(t *testing.T) {
// sanity check that we are actually finding exports
exports := packageCatalogerExports(t)
require.NotEmpty(t, exports)
expectAtLeast := map[string]*strset.Set{
"golang": strset.New("NewGoModuleFileCataloger", "NewGoModuleBinaryCataloger", "CatalogerConfig", "DefaultCatalogerConfig"),
}
for pkg, expected := range expectAtLeast {
actual, ok := exports[pkg]
require.True(t, ok, pkg)
require.True(t, expected.IsSubset(actual.Names()), pkg)
}
}
func Test_validatePackageCatalogerExport(t *testing.T) {
cases := []struct {
name string
export exportToken
wantErr assert.ErrorAssertionFunc
}{
// valid...
{
name: "valid constructor",
export: exportToken{
Name: "NewFooCataloger",
Type: "*ast.FuncType",
SignatureSize: 1,
ReturnTypeNames: []string{
"pkg.Cataloger",
},
},
},
{
name: "valid default config",
export: exportToken{
Name: "DefaultFooConfig",
Type: "*ast.FuncType",
SignatureSize: 0,
},
},
{
name: "valid config",
export: exportToken{
Name: "FooConfig",
Type: "*ast.StructType",
},
},
// invalid...
{
name: "constructor that returns a concrete type",
export: exportToken{
Name: "NewFooCataloger",
Type: "*ast.FuncType",
SignatureSize: 1,
ReturnTypeNames: []string{
"*generic.Cataloger",
},
},
wantErr: assert.Error,
},
{
name: "struct with constructor name",
export: exportToken{
Name: "NewFooCataloger",
Type: "*ast.StructType",
},
wantErr: assert.Error,
},
{
name: "struct with default config fn name",
export: exportToken{
Name: "DefaultFooConfig",
Type: "*ast.StructType",
},
wantErr: assert.Error,
},
{
name: "fn with struct name",
export: exportToken{
Name: "FooConfig",
Type: "*ast.FuncType",
},
wantErr: assert.Error,
},
{
name: "default config with parameters",
export: exportToken{
Name: "DefaultFooConfig",
Type: "*ast.FuncType",
SignatureSize: 1,
},
wantErr: assert.Error,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
if c.wantErr == nil {
c.wantErr = assert.NoError
}
err := validatePackageCatalogerExport(t, "test", c.export)
c.wantErr(t, err)
})
}
}
func Test_PackageCatalogerConventions(t *testing.T) {
// look at each package in syft/pkg/cataloger...
// we want to make certain that only the following things are exported from the package:
// - function matching New*Cataloger (e.g. NewAptCataloger)
// - function matching Default*Config
// - struct matching *Config
//
// anything else that is exported should result in the test failing.
// note: this is meant to apply to things in static space, not methods on structs or within interfaces.
//
// this additionally ensures that:
// - any config struct has a Default*Config function to pair with it.
// - all cataloger constructors return pkg.Cataloger interface instead of a concrete type
exportsPerPackage := packageCatalogerExports(t)
//for debugging purposes...
//for pkg, exports := range exportsPerPackage {
// t.Log(pkg)
// for _, export := range exports {
// t.Logf(" %#v", export)
// }
//}
for pkg, exports := range exportsPerPackage {
for _, export := range exports.List() {
// assert the export name is valid...
assert.NoError(t, validatePackageCatalogerExport(t, pkg, export))
// assert that config structs have a Default*Config functions to pair with them...
if strings.Contains(export.Name, "Config") && !strings.Contains(export.Name, "Default") {
// this is a config struct, make certain there is a pairing with a Default*Config function
assert.True(t, exports.Has("Default"+export.Name), "cataloger config struct %q in pkg %q must have a 'Default%s' function", export.Name, pkg, export.Name)
}
}
}
}
func validatePackageCatalogerExport(t *testing.T, pkg string, export exportToken) error {
constructorMatches, err := doublestar.Match("New*Cataloger", export.Name)
require.NoError(t, err)
defaultConfigMatches, err := doublestar.Match("Default*Config", export.Name)
require.NoError(t, err)
configMatches, err := doublestar.Match("*Config", export.Name)
require.NoError(t, err)
switch {
case constructorMatches:
if !export.isFunction() {
return fmt.Errorf("constructor convention used for non-function in pkg=%q: %#v", pkg, export)
}
returnTypes := strset.New(export.ReturnTypeNames...)
if !returnTypes.Has("pkg.Cataloger") {
return fmt.Errorf("constructor convention is to return pkg.Cataloger and not concrete types. pkg=%q constructor=%q types=%+v", pkg, export.Name, strings.Join(export.ReturnTypeNames, ","))
}
case defaultConfigMatches:
if !export.isFunction() {
return fmt.Errorf("default config convention used for non-function in pkg=%q: %#v", pkg, export)
}
if export.SignatureSize != 0 {
return fmt.Errorf("default config convention used for non-zero signature size in pkg=%q: %#v", pkg, export)
}
case configMatches:
if !export.isStruct() {
return fmt.Errorf("config convention used for non-struct in pkg=%q: %#v", pkg, export)
}
default:
return fmt.Errorf("unexpected export in pkg=%q: %#v", pkg, export)
}
return nil
}
type exportToken struct {
Name string
Type string
SignatureSize int
ReturnTypeNames []string
}
func (e exportToken) isFunction() bool {
return strings.Contains(e.Type, "ast.FuncType")
}
func (e exportToken) isStruct() bool {
return strings.Contains(e.Type, "ast.StructType")
}
type exportTokenSet map[string]exportToken
func (s exportTokenSet) Names() *strset.Set {
set := strset.New()
for k := range s {
set.Add(k)
}
return set
}
func (s exportTokenSet) Has(name string) bool {
_, ok := s[name]
return ok
}
func (s exportTokenSet) Add(tokens ...exportToken) {
for _, t := range tokens {
if _, ok := s[t.Name]; ok {
panic("duplicate token name: " + t.Name)
}
s[t.Name] = t
}
}
func (s exportTokenSet) Remove(names ...string) {
for _, name := range names {
delete(s, name)
}
}
func (s exportTokenSet) List() []exportToken {
var tokens []exportToken
for _, t := range s {
tokens = append(tokens, t)
}
return tokens
}
func packageCatalogerExports(t *testing.T) map[string]exportTokenSet {
t.Helper()
catalogerPath := filepath.Join(repoRoot(t), "syft", "pkg", "cataloger")
ignorePaths := []string{
filepath.Join(catalogerPath, "common"),
filepath.Join(catalogerPath, "generic"),
}
exportsPerPackage := make(map[string]exportTokenSet)
err := filepath.Walk(catalogerPath, func(path string, info os.FileInfo, err error) error {
require.NoError(t, err)
if info.IsDir() ||
!strings.HasSuffix(info.Name(), ".go") ||
strings.HasSuffix(info.Name(), "_test.go") ||
strings.Contains(path, "test-fixtures") ||
strings.Contains(path, "internal") {
return nil
}
for _, ignorePath := range ignorePaths {
if strings.Contains(path, ignorePath) {
return nil
}
}
fset := token.NewFileSet()
node, err := parser.ParseFile(fset, path, nil, parser.ParseComments)
require.NoError(t, err)
pkg := node.Name.Name
for _, f := range node.Decls {
switch decl := f.(type) {
case *ast.GenDecl:
for _, spec := range decl.Specs {
switch spec := spec.(type) {
case *ast.TypeSpec:
if spec.Name.IsExported() {
if _, ok := exportsPerPackage[pkg]; !ok {
exportsPerPackage[pkg] = make(exportTokenSet)
}
exportsPerPackage[pkg].Add(exportToken{
Name: spec.Name.Name,
Type: reflect.TypeOf(spec.Type).String(),
})
}
}
}
case *ast.FuncDecl:
if decl.Recv == nil && decl.Name.IsExported() {
var returnTypes []string
if decl.Type.Results != nil {
for _, field := range decl.Type.Results.List {
// TODO: there is probably a better way to extract the specific type name
//ty := strings.Join(strings.Split(fmt.Sprint(field.Type), " "), ".")
ty := types.ExprString(field.Type)
returnTypes = append(returnTypes, ty)
}
}
if _, ok := exportsPerPackage[pkg]; !ok {
exportsPerPackage[pkg] = make(exportTokenSet)
}
exportsPerPackage[pkg].Add(exportToken{
Name: decl.Name.Name,
Type: reflect.TypeOf(decl.Type).String(),
SignatureSize: len(decl.Type.Params.List),
ReturnTypeNames: returnTypes,
})
}
}
}
return nil
})
require.NoError(t, err)
// remove exceptions
// these are known violations to the common convention that are allowed.
if vs, ok := exportsPerPackage["binary"]; ok {
vs.Remove("Classifier", "EvidenceMatcher", "FileContentsVersionMatcher", "DefaultClassifiers")
}
return exportsPerPackage
}