diff --git a/syft/cataloging/pkgcataloging/config.go b/syft/cataloging/pkgcataloging/config.go index 9cc1d9246..db029848e 100644 --- a/syft/cataloging/pkgcataloging/config.go +++ b/syft/cataloging/pkgcataloging/config.go @@ -22,7 +22,7 @@ func DefaultConfig() Config { return Config{ Binary: binary.DefaultCatalogerConfig(), Golang: golang.DefaultCatalogerConfig(), - LinuxKernel: kernel.DefaultLinuxCatalogerConfig(), + LinuxKernel: kernel.DefaultLinuxKernelCatalogerConfig(), Python: python.DefaultCatalogerConfig(), JavaArchive: java.DefaultArchiveCatalogerConfig(), } diff --git a/syft/pkg/cataloger/binary/cataloger.go b/syft/pkg/cataloger/binary/cataloger.go index 1a79469f4..7ecc2e17d 100644 --- a/syft/pkg/cataloger/binary/cataloger.go +++ b/syft/pkg/cataloger/binary/cataloger.go @@ -1,5 +1,5 @@ /* -Package binary provides a concrete Cataloger implementations for surfacing possible packages based on signatures found within binary files. +Package binary provides a concrete cataloger implementations for surfacing possible packages based on signatures found within binary files. */ package binary @@ -26,7 +26,7 @@ func DefaultCatalogerConfig() CatalogerConfig { } func NewCataloger(cfg CatalogerConfig) pkg.Cataloger { - return &Cataloger{ + return &cataloger{ classifiers: cfg.Classifiers, } } @@ -40,23 +40,23 @@ func (cfg CatalogerConfig) MarshalJSON() ([]byte, error) { return json.Marshal(names) } -// Cataloger is the cataloger responsible for surfacing evidence of a very limited set of binary files, -// which have been identified by the classifiers. The Cataloger is _NOT_ a place to catalog any and every +// cataloger is the cataloger responsible for surfacing evidence of a very limited set of binary files, +// which have been identified by the classifiers. The cataloger is _NOT_ a place to catalog any and every // binary, but rather the specific set that has been curated to be important, predominantly related to toolchain- // related runtimes like Python, Go, Java, or Node. Some exceptions can be made for widely-used binaries such // as busybox. -type Cataloger struct { +type cataloger struct { classifiers []Classifier } -// Name returns a string that uniquely describes the Cataloger -func (c Cataloger) Name() string { +// Name returns a string that uniquely describes the cataloger +func (c cataloger) Name() string { return catalogerName } // Catalog is given an object to resolve file references and content, this function returns any discovered Packages // after analyzing the catalog source. -func (c Cataloger) Catalog(_ context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { +func (c cataloger) Catalog(_ context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { var packages []pkg.Package var relationships []artifact.Relationship diff --git a/syft/pkg/cataloger/cpp/parse_conanfile.go b/syft/pkg/cataloger/cpp/parse_conanfile.go index 9f6d57d49..dc22f80bd 100644 --- a/syft/pkg/cataloger/cpp/parse_conanfile.go +++ b/syft/pkg/cataloger/cpp/parse_conanfile.go @@ -16,10 +16,6 @@ import ( var _ generic.Parser = parseConanfile -type Conanfile struct { - Requires []string `toml:"requires"` -} - // parseConanfile is a parser function for conanfile.txt contents, returning all packages discovered. func parseConanfile(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { r := bufio.NewReader(reader) diff --git a/syft/pkg/cataloger/golang/parse_go_binary.go b/syft/pkg/cataloger/golang/parse_go_binary.go index 3a8555cd1..bce75ac6f 100644 --- a/syft/pkg/cataloger/golang/parse_go_binary.go +++ b/syft/pkg/cataloger/golang/parse_go_binary.go @@ -25,7 +25,7 @@ import ( "github.com/anchore/syft/syft/pkg/cataloger/internal/unionreader" ) -const GOARCH = "GOARCH" +const goArch = "GOARCH" var ( // errUnrecognizedFormat is returned when a given executable file doesn't @@ -154,7 +154,7 @@ func extractVersionFromLDFlags(ldflags string) (majorVersion string, fullVersion func getGOARCH(settings []debug.BuildSetting) string { for _, s := range settings { - if s.Key == GOARCH { + if s.Key == goArch { return s.Value } } diff --git a/syft/pkg/cataloger/java/cataloger.go b/syft/pkg/cataloger/java/cataloger.go index fa8560ec4..9552b142d 100644 --- a/syft/pkg/cataloger/java/cataloger.go +++ b/syft/pkg/cataloger/java/cataloger.go @@ -9,7 +9,7 @@ import ( ) // NewArchiveCataloger returns a new Java archive cataloger object for detecting packages with archives (jar, war, ear, par, sar, jpi, hpi, and native-image formats) -func NewArchiveCataloger(cfg ArchiveCatalogerConfig) *generic.Cataloger { +func NewArchiveCataloger(cfg ArchiveCatalogerConfig) pkg.Cataloger { gap := newGenericArchiveParserAdapter(cfg) c := generic.NewCataloger("java-archive-cataloger"). diff --git a/syft/pkg/cataloger/java/graalvm_native_image_cataloger.go b/syft/pkg/cataloger/java/graalvm_native_image_cataloger.go index b5b0474ca..de2a0bfe3 100644 --- a/syft/pkg/cataloger/java/graalvm_native_image_cataloger.go +++ b/syft/pkg/cataloger/java/graalvm_native_image_cataloger.go @@ -92,7 +92,7 @@ type nativeImagePE struct { header exportPrefixPE } -type NativeImageCataloger struct{} +type nativeImageCataloger struct{} const nativeImageCatalogerName = "graalvm-native-image-cataloger" const nativeImageSbomSymbol = "sbom" @@ -104,11 +104,11 @@ const nativeImageMissingExportedDataDirectoryError = "exported data directory is // NewNativeImageCataloger returns a new Native Image cataloger object. func NewNativeImageCataloger() pkg.Cataloger { - return &NativeImageCataloger{} + return &nativeImageCataloger{} } // Name returns a string that uniquely describes a native image cataloger -func (c *NativeImageCataloger) Name() string { +func (c *nativeImageCataloger) Name() string { return nativeImageCatalogerName } @@ -571,7 +571,7 @@ func fetchPkgs(reader unionreader.UnionReader, filename string) []pkg.Package { } // Catalog attempts to find any native image executables reachable from a resolver. -func (c *NativeImageCataloger) Catalog(_ context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { +func (c *nativeImageCataloger) Catalog(_ context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { var pkgs []pkg.Package fileMatches, err := resolver.FilesByMIMEType(internal.ExecutableMIMETypeSet.List()...) if err != nil { diff --git a/syft/pkg/cataloger/java/parse_gradle_lockfile.go b/syft/pkg/cataloger/java/parse_gradle_lockfile.go index 5cc842b30..a2aa11b5d 100644 --- a/syft/pkg/cataloger/java/parse_gradle_lockfile.go +++ b/syft/pkg/cataloger/java/parse_gradle_lockfile.go @@ -13,8 +13,8 @@ import ( const gradleLockfileGlob = "**/gradle.lockfile*" -// LockfileDependency represents a single dependency in the gradle.lockfile file -type LockfileDependency struct { +// lockfileDependency represents a single dependency in the gradle.lockfile file +type lockfileDependency struct { Group string Name string Version string @@ -27,7 +27,7 @@ func parseGradleLockfile(_ context.Context, _ file.Resolver, _ *generic.Environm scanner := bufio.NewScanner(reader) // Create slices to hold the dependencies and plugins - dependencies := []LockfileDependency{} + dependencies := []lockfileDependency{} // Loop over all lines in the file for scanner.Scan() { @@ -43,7 +43,7 @@ func parseGradleLockfile(_ context.Context, _ file.Resolver, _ *generic.Environm // we have a version directly specified if len(parts) == 3 { // Create a new Dependency struct and add it to the dependencies slice - dep := LockfileDependency{Group: parts[0], Name: parts[1], Version: parts[2]} + dep := lockfileDependency{Group: parts[0], Name: parts[1], Version: parts[2]} dependencies = append(dependencies, dep) } } diff --git a/syft/pkg/cataloger/kernel/cataloger.go b/syft/pkg/cataloger/kernel/cataloger.go index c02349601..d0cc4d131 100644 --- a/syft/pkg/cataloger/kernel/cataloger.go +++ b/syft/pkg/cataloger/kernel/cataloger.go @@ -15,17 +15,17 @@ import ( "github.com/anchore/syft/syft/pkg/cataloger/generic" ) -var _ pkg.Cataloger = (*LinuxKernelCataloger)(nil) +var _ pkg.Cataloger = (*linuxKernelCataloger)(nil) type LinuxKernelCatalogerConfig struct { CatalogModules bool `yaml:"catalog-modules" json:"catalog-modules" mapstructure:"catalog-modules"` } -type LinuxKernelCataloger struct { +type linuxKernelCataloger struct { cfg LinuxKernelCatalogerConfig } -func DefaultLinuxCatalogerConfig() LinuxKernelCatalogerConfig { +func DefaultLinuxKernelCatalogerConfig() LinuxKernelCatalogerConfig { return LinuxKernelCatalogerConfig{ CatalogModules: true, } @@ -45,17 +45,17 @@ var kernelModuleGlobs = []string{ } // NewLinuxKernelCataloger returns a new kernel files cataloger object. -func NewLinuxKernelCataloger(cfg LinuxKernelCatalogerConfig) *LinuxKernelCataloger { - return &LinuxKernelCataloger{ +func NewLinuxKernelCataloger(cfg LinuxKernelCatalogerConfig) pkg.Cataloger { + return &linuxKernelCataloger{ cfg: cfg, } } -func (l LinuxKernelCataloger) Name() string { +func (l linuxKernelCataloger) Name() string { return "linux-kernel-cataloger" } -func (l LinuxKernelCataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { +func (l linuxKernelCataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { var allPackages []pkg.Package var allRelationships []artifact.Relationship var errs error diff --git a/syft/pkg/cataloger/nix/cataloger.go b/syft/pkg/cataloger/nix/cataloger.go index fce31d1d7..0849be929 100644 --- a/syft/pkg/cataloger/nix/cataloger.go +++ b/syft/pkg/cataloger/nix/cataloger.go @@ -17,18 +17,18 @@ import ( const catalogerName = "nix-store-cataloger" -// StoreCataloger finds package outputs installed in the Nix store location (/nix/store/*). -type StoreCataloger struct{} +// storeCataloger finds package outputs installed in the Nix store location (/nix/store/*). +type storeCataloger struct{} func NewStoreCataloger() pkg.Cataloger { - return &StoreCataloger{} + return &storeCataloger{} } -func (c *StoreCataloger) Name() string { +func (c *storeCataloger) Name() string { return catalogerName } -func (c *StoreCataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { +func (c *storeCataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { // we want to search for only directories, which isn't possible via the stereoscope API, so we need to apply the glob manually on all returned paths var pkgs []pkg.Package var filesByPath = make(map[string]*file.LocationSet) diff --git a/syft/pkg/cataloger/python/cataloger.go b/syft/pkg/cataloger/python/cataloger.go index 30f65b9a5..2f7b661f7 100644 --- a/syft/pkg/cataloger/python/cataloger.go +++ b/syft/pkg/cataloger/python/cataloger.go @@ -21,7 +21,7 @@ func DefaultCatalogerConfig() CatalogerConfig { } // NewPackageCataloger returns a new cataloger for python packages referenced from poetry lock files, requirements.txt files, and setup.py files. -func NewPackageCataloger(cfg CatalogerConfig) *generic.Cataloger { +func NewPackageCataloger(cfg CatalogerConfig) pkg.Cataloger { rqp := newRequirementsParser(cfg) return generic.NewCataloger("python-package-cataloger"). WithParserByGlobs(rqp.parseRequirementsTxt, "**/*requirements*.txt"). diff --git a/syft/pkg/cataloger/python/parse_pipfile_lock.go b/syft/pkg/cataloger/python/parse_pipfile_lock.go index bcae72bc0..7e1181f4a 100644 --- a/syft/pkg/cataloger/python/parse_pipfile_lock.go +++ b/syft/pkg/cataloger/python/parse_pipfile_lock.go @@ -29,11 +29,11 @@ type pipfileLock struct { VerifySsl bool `json:"verify_ssl"` } `json:"sources"` } `json:"_meta"` - Default map[string]Dependency `json:"default"` - Develop map[string]Dependency `json:"develop"` + Default map[string]pipfileLockDependency `json:"default"` + Develop map[string]pipfileLockDependency `json:"develop"` } -type Dependency struct { +type pipfileLockDependency struct { Hashes []string `json:"hashes"` Version string `json:"version"` Index string `json:"index"` diff --git a/test/integration/package_cataloger_convention_test.go b/test/integration/package_cataloger_convention_test.go new file mode 100644 index 000000000..a119e51bd --- /dev/null +++ b/test/integration/package_cataloger_convention_test.go @@ -0,0 +1,350 @@ +package integration + +import ( + "fmt" + "go/ast" + "go/parser" + "go/token" + "go/types" + "os" + "path/filepath" + "reflect" + "strings" + "testing" + + "github.com/bmatcuk/doublestar/v4" + "github.com/scylladb/go-set/strset" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func Test_packageCatalogerExports(t *testing.T) { + // sanity check that we are actually finding exports + + exports := packageCatalogerExports(t) + require.NotEmpty(t, exports) + + expectAtLeast := map[string]*strset.Set{ + "golang": strset.New("NewGoModuleFileCataloger", "NewGoModuleBinaryCataloger", "CatalogerConfig", "DefaultCatalogerConfig"), + } + + for pkg, expected := range expectAtLeast { + actual, ok := exports[pkg] + require.True(t, ok, pkg) + require.True(t, expected.IsSubset(actual.Names()), pkg) + } + +} + +func Test_validatePackageCatalogerExport(t *testing.T) { + cases := []struct { + name string + export exportToken + wantErr assert.ErrorAssertionFunc + }{ + // valid... + { + name: "valid constructor", + export: exportToken{ + Name: "NewFooCataloger", + Type: "*ast.FuncType", + SignatureSize: 1, + ReturnTypeNames: []string{ + "pkg.Cataloger", + }, + }, + }, + { + name: "valid default config", + export: exportToken{ + Name: "DefaultFooConfig", + Type: "*ast.FuncType", + SignatureSize: 0, + }, + }, + { + name: "valid config", + export: exportToken{ + Name: "FooConfig", + Type: "*ast.StructType", + }, + }, + // invalid... + { + name: "constructor that returns a concrete type", + export: exportToken{ + Name: "NewFooCataloger", + Type: "*ast.FuncType", + SignatureSize: 1, + ReturnTypeNames: []string{ + "*generic.Cataloger", + }, + }, + wantErr: assert.Error, + }, + { + name: "struct with constructor name", + export: exportToken{ + Name: "NewFooCataloger", + Type: "*ast.StructType", + }, + wantErr: assert.Error, + }, + { + name: "struct with default config fn name", + export: exportToken{ + Name: "DefaultFooConfig", + Type: "*ast.StructType", + }, + wantErr: assert.Error, + }, + { + name: "fn with struct name", + export: exportToken{ + Name: "FooConfig", + Type: "*ast.FuncType", + }, + wantErr: assert.Error, + }, + { + name: "default config with parameters", + export: exportToken{ + Name: "DefaultFooConfig", + Type: "*ast.FuncType", + SignatureSize: 1, + }, + wantErr: assert.Error, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + if c.wantErr == nil { + c.wantErr = assert.NoError + } + err := validatePackageCatalogerExport(t, "test", c.export) + c.wantErr(t, err) + }) + } +} + +func Test_PackageCatalogerConventions(t *testing.T) { + // look at each package in syft/pkg/cataloger... + // we want to make certain that only the following things are exported from the package: + // - function matching New*Cataloger (e.g. NewAptCataloger) + // - function matching Default*Config + // - struct matching *Config + // + // anything else that is exported should result in the test failing. + // note: this is meant to apply to things in static space, not methods on structs or within interfaces. + // + // this additionally ensures that: + // - any config struct has a Default*Config function to pair with it. + // - all cataloger constructors return pkg.Cataloger interface instead of a concrete type + + exportsPerPackage := packageCatalogerExports(t) + + //for debugging purposes... + //for pkg, exports := range exportsPerPackage { + // t.Log(pkg) + // for _, export := range exports { + // t.Logf(" %#v", export) + // } + //} + + for pkg, exports := range exportsPerPackage { + for _, export := range exports.List() { + // assert the export name is valid... + assert.NoError(t, validatePackageCatalogerExport(t, pkg, export)) + + // assert that config structs have a Default*Config functions to pair with them... + if strings.Contains(export.Name, "Config") && !strings.Contains(export.Name, "Default") { + // this is a config struct, make certain there is a pairing with a Default*Config function + assert.True(t, exports.Has("Default"+export.Name), "cataloger config struct %q in pkg %q must have a 'Default%s' function", export.Name, pkg, export.Name) + } + } + } +} + +func validatePackageCatalogerExport(t *testing.T, pkg string, export exportToken) error { + + constructorMatches, err := doublestar.Match("New*Cataloger", export.Name) + require.NoError(t, err) + + defaultConfigMatches, err := doublestar.Match("Default*Config", export.Name) + require.NoError(t, err) + + configMatches, err := doublestar.Match("*Config", export.Name) + require.NoError(t, err) + + switch { + case constructorMatches: + if !export.isFunction() { + return fmt.Errorf("constructor convention used for non-function in pkg=%q: %#v", pkg, export) + } + + returnTypes := strset.New(export.ReturnTypeNames...) + if !returnTypes.Has("pkg.Cataloger") { + return fmt.Errorf("constructor convention is to return pkg.Cataloger and not concrete types. pkg=%q constructor=%q types=%+v", pkg, export.Name, strings.Join(export.ReturnTypeNames, ",")) + } + + case defaultConfigMatches: + if !export.isFunction() { + return fmt.Errorf("default config convention used for non-function in pkg=%q: %#v", pkg, export) + } + if export.SignatureSize != 0 { + return fmt.Errorf("default config convention used for non-zero signature size in pkg=%q: %#v", pkg, export) + } + case configMatches: + if !export.isStruct() { + return fmt.Errorf("config convention used for non-struct in pkg=%q: %#v", pkg, export) + } + default: + return fmt.Errorf("unexpected export in pkg=%q: %#v", pkg, export) + } + return nil +} + +type exportToken struct { + Name string + Type string + SignatureSize int + ReturnTypeNames []string +} + +func (e exportToken) isFunction() bool { + return strings.Contains(e.Type, "ast.FuncType") +} + +func (e exportToken) isStruct() bool { + return strings.Contains(e.Type, "ast.StructType") +} + +type exportTokenSet map[string]exportToken + +func (s exportTokenSet) Names() *strset.Set { + set := strset.New() + for k := range s { + set.Add(k) + } + return set +} + +func (s exportTokenSet) Has(name string) bool { + _, ok := s[name] + return ok +} + +func (s exportTokenSet) Add(tokens ...exportToken) { + for _, t := range tokens { + if _, ok := s[t.Name]; ok { + panic("duplicate token name: " + t.Name) + } + s[t.Name] = t + } +} + +func (s exportTokenSet) Remove(names ...string) { + for _, name := range names { + delete(s, name) + } +} + +func (s exportTokenSet) List() []exportToken { + var tokens []exportToken + for _, t := range s { + tokens = append(tokens, t) + } + return tokens +} + +func packageCatalogerExports(t *testing.T) map[string]exportTokenSet { + t.Helper() + + catalogerPath := filepath.Join(repoRoot(t), "syft", "pkg", "cataloger") + + ignorePaths := []string{ + filepath.Join(catalogerPath, "common"), + filepath.Join(catalogerPath, "generic"), + } + + exportsPerPackage := make(map[string]exportTokenSet) + + err := filepath.Walk(catalogerPath, func(path string, info os.FileInfo, err error) error { + require.NoError(t, err) + + if info.IsDir() || + !strings.HasSuffix(info.Name(), ".go") || + strings.HasSuffix(info.Name(), "_test.go") || + strings.Contains(path, "test-fixtures") || + strings.Contains(path, "internal") { + return nil + } + + for _, ignorePath := range ignorePaths { + if strings.Contains(path, ignorePath) { + return nil + } + } + + fset := token.NewFileSet() + node, err := parser.ParseFile(fset, path, nil, parser.ParseComments) + require.NoError(t, err) + + pkg := node.Name.Name + for _, f := range node.Decls { + switch decl := f.(type) { + case *ast.GenDecl: + for _, spec := range decl.Specs { + switch spec := spec.(type) { + case *ast.TypeSpec: + if spec.Name.IsExported() { + if _, ok := exportsPerPackage[pkg]; !ok { + exportsPerPackage[pkg] = make(exportTokenSet) + } + exportsPerPackage[pkg].Add(exportToken{ + Name: spec.Name.Name, + Type: reflect.TypeOf(spec.Type).String(), + }) + } + } + } + case *ast.FuncDecl: + if decl.Recv == nil && decl.Name.IsExported() { + var returnTypes []string + if decl.Type.Results != nil { + for _, field := range decl.Type.Results.List { + // TODO: there is probably a better way to extract the specific type name + //ty := strings.Join(strings.Split(fmt.Sprint(field.Type), " "), ".") + ty := types.ExprString(field.Type) + + returnTypes = append(returnTypes, ty) + } + } + + if _, ok := exportsPerPackage[pkg]; !ok { + exportsPerPackage[pkg] = make(exportTokenSet) + } + exportsPerPackage[pkg].Add(exportToken{ + Name: decl.Name.Name, + Type: reflect.TypeOf(decl.Type).String(), + SignatureSize: len(decl.Type.Params.List), + ReturnTypeNames: returnTypes, + }) + } + } + } + + return nil + }) + + require.NoError(t, err) + + // remove exceptions + // these are known violations to the common convention that are allowed. + if vs, ok := exportsPerPackage["binary"]; ok { + vs.Remove("Classifier", "EvidenceMatcher", "FileContentsVersionMatcher", "DefaultClassifiers") + } + + return exportsPerPackage +}