improve testing a docs

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Alex Goodman 2025-10-29 10:08:29 -04:00
parent 0dd906b071
commit d6512456b3
43 changed files with 4602 additions and 342 deletions

View File

@ -215,6 +215,24 @@ Interested in building a new cataloger? Checkout the [list of issues with the `n
If you have questions about implementing a cataloger feel free to file an issue or reach out to us [on discourse](https://anchore.com/discourse)! If you have questions about implementing a cataloger feel free to file an issue or reach out to us [on discourse](https://anchore.com/discourse)!
#### Documenting Cataloger Capabilities
When adding a new cataloger or changing the capabilities of an existing one, you'll need to document its capabilities in `internal/capabilities/packages.yaml`. This includes:
- What metadata types it produces
- What package types it catalogs
- What dependency information it provides (depth, edges, kinds)
- Whether it extracts license information
- How configuration affects its behavior
After implementing your cataloger:
1. **Write tests using the `pkgtest` helpers** - this automatically generates test observations that feed into capability documentation
2. **Run `make generate-capabilities`** - this regenerates the `packages.yaml` file and validates your changes
3. **Manually edit capabilities** - add the `ecosystem` field and detailed `capabilities` sections in `packages.yaml`
For detailed information about the capability documentation system, see [`internal/capabilities/generate/README.md`](internal/capabilities/generate/README.md).
#### Searching for files #### Searching for files
All catalogers are provided an instance of the [`file.Resolver`](https://github.com/anchore/syft/blob/v0.70.0/syft/source/file_resolver.go#L8) to interface with the image and search for files. The implementations for these All catalogers are provided an instance of the [`file.Resolver`](https://github.com/anchore/syft/blob/v0.70.0/syft/source/file_resolver.go#L8) to interface with the image and search for files. The implementations for these

View File

@ -521,6 +521,8 @@ tasks:
generate-capabilities: generate-capabilities:
desc: Generate the capabilities data file desc: Generate the capabilities data file
cmds: cmds:
# this is required to update test observations; such evidence is used to update the packages.yaml
- "go test ./syft/pkg/..."
- "go generate ./internal/capabilities/..." - "go generate ./internal/capabilities/..."
- "gofmt -s -w ./internal/capabilities" - "gofmt -s -w ./internal/capabilities"

View File

@ -77,6 +77,8 @@ func DefaultCatalog() Catalog {
Package: defaultPackageConfig(), Package: defaultPackageConfig(),
License: defaultLicenseConfig(), License: defaultLicenseConfig(),
LinuxKernel: defaultLinuxKernelConfig(), LinuxKernel: defaultLinuxKernelConfig(),
JavaScript: defaultJavaScriptConfig(),
Python: defaultPythonConfig(),
Nix: defaultNixConfig(), Nix: defaultNixConfig(),
Dotnet: defaultDotnetConfig(), Dotnet: defaultDotnetConfig(),
Golang: defaultGolangConfig(), Golang: defaultGolangConfig(),

View File

@ -1,6 +1,9 @@
package options package options
import "github.com/anchore/clio" import (
"github.com/anchore/clio"
"github.com/anchore/syft/syft/pkg/cataloger/javascript"
)
type javaScriptConfig struct { type javaScriptConfig struct {
SearchRemoteLicenses *bool `json:"search-remote-licenses" yaml:"search-remote-licenses" mapstructure:"search-remote-licenses"` SearchRemoteLicenses *bool `json:"search-remote-licenses" yaml:"search-remote-licenses" mapstructure:"search-remote-licenses"`
@ -12,6 +15,24 @@ var _ interface {
clio.FieldDescriber clio.FieldDescriber
} = (*javaScriptConfig)(nil) } = (*javaScriptConfig)(nil)
func defaultJavaScriptConfig() javaScriptConfig {
def := javascript.DefaultCatalogerConfig()
var includeDevDependencies *bool
if def.IncludeDevDependencies {
includeDevDependencies = &def.IncludeDevDependencies
}
var searchRemoteLicenses *bool
if def.SearchRemoteLicenses {
searchRemoteLicenses = &def.SearchRemoteLicenses
}
return javaScriptConfig{
NpmBaseURL: def.NPMBaseURL,
SearchRemoteLicenses: searchRemoteLicenses,
IncludeDevDependencies: includeDevDependencies,
}
}
func (o *javaScriptConfig) DescribeFields(descriptions clio.FieldDescriptionSet) { func (o *javaScriptConfig) DescribeFields(descriptions clio.FieldDescriptionSet) {
descriptions.Add(&o.SearchRemoteLicenses, `enables Syft to use the network to fill in more detailed license information`) descriptions.Add(&o.SearchRemoteLicenses, `enables Syft to use the network to fill in more detailed license information`)
descriptions.Add(&o.NpmBaseURL, `base NPM url to use`) descriptions.Add(&o.NpmBaseURL, `base NPM url to use`)

View File

@ -1,14 +1,18 @@
package options package options
import "github.com/anchore/clio" import (
"github.com/anchore/clio"
"github.com/anchore/syft/syft/pkg/cataloger/kernel"
)
type linuxKernelConfig struct { type linuxKernelConfig struct {
CatalogModules bool `json:"catalog-modules" yaml:"catalog-modules" mapstructure:"catalog-modules"` CatalogModules bool `json:"catalog-modules" yaml:"catalog-modules" mapstructure:"catalog-modules"`
} }
func defaultLinuxKernelConfig() linuxKernelConfig { func defaultLinuxKernelConfig() linuxKernelConfig {
def := kernel.DefaultLinuxKernelCatalogerConfig()
return linuxKernelConfig{ return linuxKernelConfig{
CatalogModules: true, CatalogModules: def.CatalogModules,
} }
} }

View File

@ -1,6 +1,9 @@
package options package options
import "github.com/anchore/clio" import (
"github.com/anchore/clio"
"github.com/anchore/syft/syft/pkg/cataloger/python"
)
type pythonConfig struct { type pythonConfig struct {
GuessUnpinnedRequirements bool `json:"guess-unpinned-requirements" yaml:"guess-unpinned-requirements" mapstructure:"guess-unpinned-requirements"` GuessUnpinnedRequirements bool `json:"guess-unpinned-requirements" yaml:"guess-unpinned-requirements" mapstructure:"guess-unpinned-requirements"`
@ -10,6 +13,13 @@ var _ interface {
clio.FieldDescriber clio.FieldDescriber
} = (*pythonConfig)(nil) } = (*pythonConfig)(nil)
func defaultPythonConfig() pythonConfig {
def := python.DefaultCatalogerConfig()
return pythonConfig{
GuessUnpinnedRequirements: def.GuessUnpinnedRequirements,
}
}
func (o *pythonConfig) DescribeFields(descriptions clio.FieldDescriptionSet) { func (o *pythonConfig) DescribeFields(descriptions clio.FieldDescriptionSet) {
descriptions.Add(&o.GuessUnpinnedRequirements, `when running across entries in requirements.txt that do not specify a specific version descriptions.Add(&o.GuessUnpinnedRequirements, `when running across entries in requirements.txt that do not specify a specific version
(e.g. "sqlalchemy >= 1.0.0, <= 2.0.0, != 3.0.0, <= 3.0.0"), attempt to guess what the version could (e.g. "sqlalchemy >= 1.0.0, <= 2.0.0, != 3.0.0, <= 3.0.0"), attempt to guess what the version could

View File

@ -1,3 +1,4 @@
// Package capabilities provides discovery and tracking of cataloger capabilities.
package capabilities package capabilities
import ( import (
@ -11,6 +12,8 @@ import (
"github.com/anchore/syft/internal/task" "github.com/anchore/syft/internal/task"
) )
//go:generate go run ./generate
//go:embed packages.yaml //go:embed packages.yaml
var catalogersYAML []byte var catalogersYAML []byte

View File

@ -1,11 +0,0 @@
// Package capabilities provides discovery and tracking of cataloger capabilities.
//
// Run 'go generate' in this directory to discover catalogers from source code and update
// the packages.yaml file with newly discovered generic catalogers.
//
// The packages.yaml file is the source of truth for cataloger capabilities. It contains
// both auto-generated metadata (cataloger names, parser functions, glob patterns) and
// manually-edited capability descriptions (what each cataloger can discover).
package capabilities
//go:generate go run ./generate

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,4 @@
// this file links catalogers to their configuration structs by analyzing constructor function signatures to determine which config struct each cataloger uses.
package main package main
import ( import (
@ -16,8 +17,18 @@ import (
// Returns empty string for catalogers that don't take a config parameter. // Returns empty string for catalogers that don't take a config parameter.
func LinkCatalogersToConfigs(repoRoot string) (map[string]string, error) { func LinkCatalogersToConfigs(repoRoot string) (map[string]string, error) {
catalogerRoot := filepath.Join(repoRoot, "syft", "pkg", "cataloger") catalogerRoot := filepath.Join(repoRoot, "syft", "pkg", "cataloger")
return LinkCatalogersToConfigsFromPath(catalogerRoot, repoRoot)
}
// find all .go files under syft/pkg/cataloger/ recursively // LinkCatalogersToConfigsFromPath analyzes cataloger constructor functions in the specified directory
// to determine which config struct each cataloger uses. This is the parameterized version that allows
// testing with custom fixture directories.
// Returns a map where key is the cataloger name (e.g., "go-module-binary-cataloger")
// and value is the config struct reference (e.g., "golang.CatalogerConfig").
// Returns empty string for catalogers that don't take a config parameter.
// The baseRoot parameter is used for relative path calculation to determine package names.
func LinkCatalogersToConfigsFromPath(catalogerRoot, baseRoot string) (map[string]string, error) {
// find all .go files under the cataloger root recursively
var files []string var files []string
err := filepath.Walk(catalogerRoot, func(path string, info os.FileInfo, err error) error { err := filepath.Walk(catalogerRoot, func(path string, info os.FileInfo, err error) error {
if err != nil { if err != nil {
@ -35,7 +46,7 @@ func LinkCatalogersToConfigs(repoRoot string) (map[string]string, error) {
linkages := make(map[string]string) linkages := make(map[string]string)
for _, file := range files { for _, file := range files {
links, err := linkCatalogersInFile(file, repoRoot) links, err := linkCatalogersInFile(file, baseRoot)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to parse %s: %w", file, err) return nil, fmt.Errorf("failed to parse %s: %w", file, err)
} }
@ -324,7 +335,5 @@ func looksLikeConfigType(typeName string) bool {
structName := parts[len(parts)-1] structName := parts[len(parts)-1]
// check for common config patterns // check for common config patterns
return strings.Contains(structName, "Config") || return strings.Contains(structName, "Config")
strings.HasSuffix(structName, "Config") ||
strings.HasPrefix(structName, "Config")
} }

View File

@ -1,12 +1,20 @@
package main package main
import ( import (
"go/ast"
"go/parser"
"go/token"
"path/filepath"
"testing" "testing"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
func TestLinkCatalogersToConfigs(t *testing.T) { func TestLinkCatalogersToConfigs(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
repoRoot, err := RepoRoot() repoRoot, err := RepoRoot()
require.NoError(t, err) require.NoError(t, err)
@ -124,7 +132,107 @@ func TestLinkCatalogersToConfigs(t *testing.T) {
require.GreaterOrEqual(t, len(withConfig), 6, "should find at least 6 catalogers with configs") require.GreaterOrEqual(t, len(withConfig), 6, "should find at least 6 catalogers with configs")
} }
func TestLinkCatalogersToConfigsFromPath(t *testing.T) {
tests := []struct {
name string
fixturePath string
expectedLinkages map[string]string
wantErr require.ErrorAssertionFunc
}{
{
name: "simple generic cataloger with local config",
fixturePath: "simple-generic-cataloger",
expectedLinkages: map[string]string{
"go-module-cataloger": "golang.CatalogerConfig",
},
},
{
name: "cataloger name from constant",
fixturePath: "cataloger-with-constant",
expectedLinkages: map[string]string{
"python-package-cataloger": "python.CatalogerConfig",
},
},
{
name: "custom cataloger with Name() in same file",
fixturePath: "custom-cataloger-same-file",
expectedLinkages: map[string]string{
"java-pom-cataloger": "java.ArchiveCatalogerConfig",
},
},
{
name: "custom cataloger with Name() in different file - not detected",
fixturePath: "custom-cataloger-different-file",
expectedLinkages: map[string]string{
// empty - current limitation, cannot detect cross-file Names
},
},
{
name: "cataloger without config parameter",
fixturePath: "no-config-cataloger",
expectedLinkages: map[string]string{
"javascript-cataloger": "", // empty string means no config
},
},
{
name: "imported config type",
fixturePath: "imported-config-type",
expectedLinkages: map[string]string{
"linux-kernel-cataloger": "kernel.LinuxKernelCatalogerConfig",
},
},
{
name: "non-config first parameter",
fixturePath: "non-config-first-param",
expectedLinkages: map[string]string{
"binary-cataloger": "", // Parser not a config type
},
},
{
name: "conflicting cataloger names",
fixturePath: "conflicting-names",
wantErr: require.Error,
},
{
name: "mixed naming patterns",
fixturePath: "mixed-naming-patterns",
expectedLinkages: map[string]string{
"ruby-cataloger": "ruby.Config",
},
},
{
name: "selector expression config",
fixturePath: "selector-expression-config",
expectedLinkages: map[string]string{
"rust-cataloger": "cargo.CatalogerConfig",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.wantErr == nil {
tt.wantErr = require.NoError
}
fixtureDir := filepath.Join("test-fixtures", "config-linking", tt.fixturePath)
linkages, err := LinkCatalogersToConfigsFromPath(fixtureDir, fixtureDir)
tt.wantErr(t, err)
if err != nil {
return
}
require.Equal(t, tt.expectedLinkages, linkages)
})
}
}
func TestExtractConfigTypeName(t *testing.T) { func TestExtractConfigTypeName(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
tests := []struct { tests := []struct {
name string name string
catalogerName string catalogerName string
@ -240,3 +348,162 @@ func TestLooksLikeConfigType(t *testing.T) {
}) })
} }
} }
func TestExtractReceiverTypeName(t *testing.T) {
tests := []struct {
name string
receiver string // receiver code snippet
want string
}{
{
name: "value receiver",
receiver: "func (c Cataloger) Name() string { return \"\" }",
want: "Cataloger",
},
{
name: "pointer receiver",
receiver: "func (c *Cataloger) Name() string { return \"\" }",
want: "Cataloger",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// parse the function to get the receiver type
fset := token.NewFileSet()
file, err := parser.ParseFile(fset, "", "package test\n"+tt.receiver, 0)
require.NoError(t, err)
// extract the function declaration
require.Len(t, file.Decls, 1)
funcDecl, ok := file.Decls[0].(*ast.FuncDecl)
require.True(t, ok)
// get receiver type
var recvType ast.Expr
if funcDecl.Recv != nil && len(funcDecl.Recv.List) > 0 {
recvType = funcDecl.Recv.List[0].Type
}
got := extractReceiverTypeName(recvType)
require.Equal(t, tt.want, got)
})
}
}
func TestExtractConfigTypeNameHelper(t *testing.T) {
tests := []struct {
name string
funcSig string // function signature with parameter
localPackageName string
want string
}{
{
name: "local type",
funcSig: "func New(cfg CatalogerConfig) pkg.Cataloger { return nil }",
localPackageName: "python",
want: "python.CatalogerConfig",
},
{
name: "imported type",
funcSig: "func New(cfg java.ArchiveCatalogerConfig) pkg.Cataloger { return nil }",
localPackageName: "python",
want: "java.ArchiveCatalogerConfig",
},
{
name: "imported type - kernel package",
funcSig: "func New(cfg kernel.LinuxKernelCatalogerConfig) pkg.Cataloger { return nil }",
localPackageName: "other",
want: "kernel.LinuxKernelCatalogerConfig",
},
{
name: "no parameters",
funcSig: "func New() pkg.Cataloger { return nil }",
localPackageName: "python",
want: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// parse the function to get parameter type
fset := token.NewFileSet()
file, err := parser.ParseFile(fset, "", "package test\n"+tt.funcSig, 0)
require.NoError(t, err)
// extract the function declaration
require.Len(t, file.Decls, 1)
funcDecl, ok := file.Decls[0].(*ast.FuncDecl)
require.True(t, ok)
// get first parameter type
var paramType ast.Expr
if funcDecl.Type.Params != nil && len(funcDecl.Type.Params.List) > 0 {
paramType = funcDecl.Type.Params.List[0].Type
}
got := extractConfigTypeName(paramType, tt.localPackageName)
require.Equal(t, tt.want, got)
})
}
}
func TestExtractReturnTypeName(t *testing.T) {
tests := []struct {
name string
funcDef string // complete function definition
want string
}{
{
name: "pointer to composite literal",
funcDef: `func New() pkg.Cataloger {
return &javaCataloger{name: "test"}
}`,
want: "javaCataloger",
},
{
name: "composite literal",
funcDef: `func New() pkg.Cataloger {
return pythonCataloger{name: "test"}
}`,
want: "pythonCataloger",
},
{
name: "variable return",
funcDef: `func New() pkg.Cataloger {
c := &Cataloger{}
return c
}`,
want: "",
},
{
name: "nil return",
funcDef: `func New() pkg.Cataloger {
return nil
}`,
want: "",
},
{
name: "empty function body",
funcDef: `func New() pkg.Cataloger {}`,
want: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// parse the function
fset := token.NewFileSet()
file, err := parser.ParseFile(fset, "", "package test\n"+tt.funcDef, 0)
require.NoError(t, err)
// extract the function declaration
require.Len(t, file.Decls, 1)
funcDecl, ok := file.Decls[0].(*ast.FuncDecl)
require.True(t, ok)
got := extractReturnTypeName(funcDecl)
require.Equal(t, tt.want, got)
})
}
}

View File

@ -1,3 +1,4 @@
// this file retrieves the canonical list of cataloger names and their selectors from syft's task factories.
package main package main
import ( import (

View File

@ -1,3 +1,4 @@
// this file verifies the claims made in packages.yaml against test observations and source code, ensuring cataloger capabilities are accurate and complete.
package main package main
import ( import (
@ -344,8 +345,8 @@ func TestCatalogerDataQuality(t *testing.T) {
}) })
} }
// TestRegenerateCapabilitiesDoesNotFail verifies that regeneration runs successfully // TestCapabilitiesAreUpToDate verifies that regeneration runs successfully
func TestRegenerateCapabilitiesDoesNotFail(t *testing.T) { func TestCapabilitiesAreUpToDate(t *testing.T) {
if os.Getenv("CI") == "" { if os.Getenv("CI") == "" {
t.Skip("skipping regeneration test in local environment") t.Skip("skipping regeneration test in local environment")
} }
@ -366,9 +367,9 @@ func TestRegenerateCapabilitiesDoesNotFail(t *testing.T) {
require.NoError(t, err, "packages.yaml has uncommitted changes after regeneration. Run 'go generate ./internal/capabilities' locally and commit the changes.") require.NoError(t, err, "packages.yaml has uncommitted changes after regeneration. Run 'go generate ./internal/capabilities' locally and commit the changes.")
} }
// TestAllCatalogersHaveObservations verifies that all catalogers have test observations, // TestCatalogersHaveTestObservations verifies that all catalogers have test observations,
// ensuring they are using the pkgtest helpers // ensuring they are using the pkgtest helpers
func TestAllCatalogersHaveObservations(t *testing.T) { func TestCatalogersHaveTestObservations(t *testing.T) {
repoRoot, err := RepoRoot() repoRoot, err := RepoRoot()
require.NoError(t, err) require.NoError(t, err)
@ -1303,3 +1304,70 @@ func TestCapabilityEvidenceFieldReferences(t *testing.T) {
}) })
} }
} }
// TestDetectorConfigFieldReferences validates that config field names referenced in detector
// conditions actually exist in the cataloger's config struct
func TestDetectorConfigFieldReferences(t *testing.T) {
repoRoot, err := RepoRoot()
require.NoError(t, err)
// load the packages.yaml
doc, _, err := loadCapabilities(filepath.Join(repoRoot, "internal/capabilities/packages.yaml"))
require.NoError(t, err)
// collect all validation errors before failing
var errors []string
// check each cataloger's detectors
for _, cataloger := range doc.Catalogers {
if cataloger.Type != "custom" {
continue // only custom catalogers have detectors
}
for detectorIdx, detector := range cataloger.Detectors {
// if detector has no conditions, skip validation
if len(detector.Conditions) == 0 {
continue
}
// detector has conditions - cataloger must have a config
if cataloger.Config == "" {
errors = append(errors,
fmt.Sprintf("Cataloger %q detector %d has conditions but cataloger has no config struct",
cataloger.Name, detectorIdx))
continue
}
// load the cataloger's config struct
configEntry, exists := doc.Configs[cataloger.Config]
if !exists {
errors = append(errors,
fmt.Sprintf("Cataloger %q references config %q which doesn't exist",
cataloger.Name, cataloger.Config))
continue
}
// build a set of valid config field names
validFields := make(map[string]bool)
for _, field := range configEntry.Fields {
validFields[field.Key] = true
}
// validate each condition
for condIdx, condition := range detector.Conditions {
for fieldName := range condition.When {
if !validFields[fieldName] {
errors = append(errors,
fmt.Sprintf("Cataloger %q detector %d condition %d references config field %q which doesn't exist in config struct %q",
cataloger.Name, detectorIdx, condIdx, fieldName, cataloger.Config))
}
}
}
}
}
// report all errors at once
if len(errors) > 0 {
require.Fail(t, "Detector config field reference validation failed", strings.Join(errors, "\n"))
}
}

View File

@ -1,133 +0,0 @@
package main
import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/require"
)
func TestDiscoverConfigs(t *testing.T) {
repoRoot, err := RepoRoot()
require.NoError(t, err)
configs, err := DiscoverConfigs(repoRoot)
require.NoError(t, err)
// verify we discovered multiple config structs
require.NotEmpty(t, configs, "should discover at least one config struct")
// check for known config structs that have app-config annotations
expectedConfigs := []string{
"golang.CatalogerConfig",
"golang.MainModuleVersionConfig",
"java.ArchiveCatalogerConfig",
"python.CatalogerConfig",
"dotnet.CatalogerConfig",
"kernel.LinuxKernelCatalogerConfig",
"javascript.CatalogerConfig",
"nix.Config",
}
for _, expected := range expectedConfigs {
config, ok := configs[expected]
require.True(t, ok, "should discover config: %s", expected)
require.NotEmpty(t, config.Fields, "config %s should have fields", expected)
require.Equal(t, expected, config.PackageName+"."+config.StructName)
}
// verify golang.CatalogerConfig fields
golangConfig := configs["golang.CatalogerConfig"]
require.Equal(t, "golang", golangConfig.PackageName)
require.Equal(t, "CatalogerConfig", golangConfig.StructName)
require.NotEmpty(t, golangConfig.Fields)
// check for specific field
var foundSearchLocalModCache bool
for _, field := range golangConfig.Fields {
if field.Name == "SearchLocalModCacheLicenses" {
foundSearchLocalModCache = true
require.Equal(t, "bool", field.Type)
require.Equal(t, "golang.search-local-mod-cache-licenses", field.AppKey)
require.NotEmpty(t, field.Description)
require.Contains(t, field.Description, "searching for go package licenses")
}
}
require.True(t, foundSearchLocalModCache, "should find SearchLocalModCacheLicenses field")
// verify nested config struct
golangMainModuleConfig := configs["golang.MainModuleVersionConfig"]
require.Equal(t, "golang", golangMainModuleConfig.PackageName)
require.Equal(t, "MainModuleVersionConfig", golangMainModuleConfig.StructName)
require.NotEmpty(t, golangMainModuleConfig.Fields)
// check for specific nested field
var foundFromLDFlags bool
for _, field := range golangMainModuleConfig.Fields {
if field.Name == "FromLDFlags" {
foundFromLDFlags = true
require.Equal(t, "bool", field.Type)
require.Equal(t, "golang.main-module-version.from-ld-flags", field.AppKey)
require.NotEmpty(t, field.Description)
}
}
require.True(t, foundFromLDFlags, "should find FromLDFlags field in MainModuleVersionConfig")
// print summary for manual inspection
t.Logf("Discovered %d config structs:", len(configs))
for key, config := range configs {
t.Logf(" %s: %d fields", key, len(config.Fields))
for _, field := range config.Fields {
t.Logf(" - %s (%s): %s", field.Name, field.Type, field.AppKey)
if diff := cmp.Diff("", field.Description); diff == "" {
t.Logf(" WARNING: field %s has no description", field.Name)
}
}
}
}
func TestExtractPackageNameFromPath(t *testing.T) {
tests := []struct {
name string
filePath string
want string
}{
{
name: "golang package",
filePath: "syft/pkg/cataloger/golang/config.go",
want: "golang",
},
{
name: "java package",
filePath: "syft/pkg/cataloger/java/config.go",
want: "java",
},
{
name: "python cataloger",
filePath: "syft/pkg/cataloger/python/cataloger.go",
want: "python",
},
{
name: "kernel cataloger",
filePath: "syft/pkg/cataloger/kernel/cataloger.go",
want: "kernel",
},
{
name: "binary classifier",
filePath: "syft/pkg/cataloger/binary/classifier_cataloger.go",
want: "binary",
},
{
name: "not a cataloger path",
filePath: "syft/pkg/other/file.go",
want: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := extractPackageNameFromPath(tt.filePath)
require.Equal(t, tt.want, got)
})
}
}

View File

@ -1,77 +0,0 @@
package main
import (
"fmt"
"path/filepath"
"strings"
"testing"
"github.com/stretchr/testify/require"
)
// TestDetectorConfigFieldReferences validates that config field names referenced in detector
// conditions actually exist in the cataloger's config struct
func TestDetectorConfigFieldReferences(t *testing.T) {
repoRoot, err := RepoRoot()
require.NoError(t, err)
// load the packages.yaml
doc, _, err := loadCapabilities(filepath.Join(repoRoot, "internal/capabilities/packages.yaml"))
require.NoError(t, err)
// collect all validation errors before failing
var errors []string
// check each cataloger's detectors
for _, cataloger := range doc.Catalogers {
if cataloger.Type != "custom" {
continue // only custom catalogers have detectors
}
for detectorIdx, detector := range cataloger.Detectors {
// if detector has no conditions, skip validation
if len(detector.Conditions) == 0 {
continue
}
// detector has conditions - cataloger must have a config
if cataloger.Config == "" {
errors = append(errors,
fmt.Sprintf("Cataloger %q detector %d has conditions but cataloger has no config struct",
cataloger.Name, detectorIdx))
continue
}
// load the cataloger's config struct
configEntry, exists := doc.Configs[cataloger.Config]
if !exists {
errors = append(errors,
fmt.Sprintf("Cataloger %q references config %q which doesn't exist",
cataloger.Name, cataloger.Config))
continue
}
// build a set of valid config field names
validFields := make(map[string]bool)
for _, field := range configEntry.Fields {
validFields[field.Key] = true
}
// validate each condition
for condIdx, condition := range detector.Conditions {
for fieldName := range condition.When {
if !validFields[fieldName] {
errors = append(errors,
fmt.Sprintf("Cataloger %q detector %d condition %d references config field %q which doesn't exist in config struct %q",
cataloger.Name, detectorIdx, condIdx, fieldName, cataloger.Config))
}
}
}
}
}
// report all errors at once
if len(errors) > 0 {
require.Fail(t, "Detector config field reference validation failed", strings.Join(errors, "\n"))
}
}

View File

@ -1,3 +1,4 @@
// this file discovers application-level configuration from cmd/syft/internal/options/ by parsing ecosystem config structs, their DescribeFields() methods, and default value functions.
package main package main
import ( import (
@ -5,6 +6,7 @@ import (
"go/ast" "go/ast"
"go/parser" "go/parser"
"go/token" "go/token"
"os"
"path/filepath" "path/filepath"
"reflect" "reflect"
"sort" "sort"
@ -18,30 +20,214 @@ type AppConfigField struct {
DefaultValue interface{} // extracted from Default*() functions DefaultValue interface{} // extracted from Default*() functions
} }
// extractEcosystemConfigFieldsFromCatalog parses catalog.go and extracts the ecosystem-specific
// config fields from the Catalog struct, returning a map of struct type name to YAML tag
func extractEcosystemConfigFieldsFromCatalog(catalogFilePath string) (map[string]string, error) {
fset := token.NewFileSet()
f, err := parser.ParseFile(fset, catalogFilePath, nil, parser.ParseComments)
if err != nil {
return nil, fmt.Errorf("failed to parse catalog.go: %w", err)
}
// find the Catalog struct
catalogStruct := findConfigStruct(f, "Catalog")
if catalogStruct == nil {
return nil, fmt.Errorf("catalog struct not found in %s", catalogFilePath)
}
// extract ecosystem config fields from the Catalog struct
// these are between the "ecosystem-specific cataloger configuration" comment and the next section
ecosystemConfigs := make(map[string]string)
inEcosystemSection := false
for _, field := range catalogStruct.Fields.List {
// check for ecosystem section marker comment
if field.Doc != nil {
for _, comment := range field.Doc.List {
if strings.Contains(comment.Text, "ecosystem-specific cataloger configuration") {
inEcosystemSection = true
break
}
// check if we've hit the next section (any comment marking a new section)
if inEcosystemSection && strings.HasPrefix(comment.Text, "// configuration for") {
inEcosystemSection = false
break
}
}
}
if !inEcosystemSection {
continue
}
// extract field type and yaml tag
if len(field.Names) == 0 {
continue
}
// get the type name (e.g., "golangConfig")
var typeName string
if ident, ok := field.Type.(*ast.Ident); ok {
typeName = ident.Name
} else {
continue
}
// get the yaml tag
yamlTag := extractYAMLTag(field)
if yamlTag == "" || yamlTag == "-" {
continue
}
ecosystemConfigs[typeName] = yamlTag
}
return ecosystemConfigs, nil
}
// findFilesWithCatalogerImports scans the options directory for .go files that import
// from "github.com/anchore/syft/syft/pkg/cataloger/*" packages
func findFilesWithCatalogerImports(optionsDir string) ([]string, error) {
entries, err := os.ReadDir(optionsDir)
if err != nil {
return nil, fmt.Errorf("failed to read options directory: %w", err)
}
var candidateFiles []string
for _, entry := range entries {
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".go") {
continue
}
filePath := filepath.Join(optionsDir, entry.Name())
// parse the file to check imports
fset := token.NewFileSet()
f, err := parser.ParseFile(fset, filePath, nil, parser.ImportsOnly)
if err != nil {
continue // skip files that can't be parsed
}
// check if file imports from cataloger packages
for _, imp := range f.Imports {
importPath := strings.Trim(imp.Path.Value, `"`)
if strings.HasPrefix(importPath, "github.com/anchore/syft/syft/pkg/cataloger/") {
candidateFiles = append(candidateFiles, filePath)
break
}
}
}
return candidateFiles, nil
}
// extractConfigStructTypes parses a Go file and returns all struct type names defined in it
func extractConfigStructTypes(filePath string) ([]string, error) {
fset := token.NewFileSet()
f, err := parser.ParseFile(fset, filePath, nil, 0)
if err != nil {
return nil, fmt.Errorf("failed to parse %s: %w", filePath, err)
}
var structTypes []string
for _, decl := range f.Decls {
genDecl, ok := decl.(*ast.GenDecl)
if !ok || genDecl.Tok != token.TYPE {
continue
}
for _, spec := range genDecl.Specs {
typeSpec, ok := spec.(*ast.TypeSpec)
if !ok {
continue
}
// check if it's a struct type
if _, ok := typeSpec.Type.(*ast.StructType); ok {
structTypes = append(structTypes, typeSpec.Name.Name)
}
}
}
return structTypes, nil
}
// discoverCatalogerConfigs discovers cataloger config files by:
// 1. Finding files with cataloger imports in options directory
// 2. Extracting ecosystem config fields from Catalog struct
// 3. Matching file structs against Catalog fields
// Returns a map of file path to top-level YAML key
func discoverCatalogerConfigs(repoRoot string) (map[string]string, error) {
optionsDir := filepath.Join(repoRoot, "cmd", "syft", "internal", "options")
catalogFilePath := filepath.Join(optionsDir, "catalog.go")
// get ecosystem config fields from Catalog struct
ecosystemConfigs, err := extractEcosystemConfigFieldsFromCatalog(catalogFilePath)
if err != nil {
return nil, err
}
if len(ecosystemConfigs) == 0 {
return nil, fmt.Errorf("no ecosystem config fields found in Catalog struct")
}
// find files with cataloger imports
candidateFiles, err := findFilesWithCatalogerImports(optionsDir)
if err != nil {
return nil, err
}
// match candidate files against Catalog ecosystem fields
fileToKey := make(map[string]string)
foundStructs := make(map[string]bool)
for _, filePath := range candidateFiles {
structTypes, err := extractConfigStructTypes(filePath)
if err != nil {
return nil, err
}
// check if any struct type matches an ecosystem config
for _, structType := range structTypes {
if yamlKey, exists := ecosystemConfigs[structType]; exists {
fileToKey[filePath] = yamlKey
foundStructs[structType] = true
break
}
}
}
// validate that all ecosystem configs were found
var missingConfigs []string
for structType := range ecosystemConfigs {
if !foundStructs[structType] {
missingConfigs = append(missingConfigs, structType)
}
}
if len(missingConfigs) > 0 {
sort.Strings(missingConfigs)
return nil, fmt.Errorf("could not find files for ecosystem configs: %s", strings.Join(missingConfigs, ", "))
}
return fileToKey, nil
}
// DiscoverAppConfigs discovers all application-level cataloger configuration fields // DiscoverAppConfigs discovers all application-level cataloger configuration fields
// from the options package // from the options package
func DiscoverAppConfigs(repoRoot string) ([]AppConfigField, error) { func DiscoverAppConfigs(repoRoot string) ([]AppConfigField, error) {
optionsDir := filepath.Join(repoRoot, "cmd", "syft", "internal", "options") // discover cataloger config files dynamically
configFiles, err := discoverCatalogerConfigs(repoRoot)
// parse all .go files in the options directory to extract configuration fields if err != nil {
configs := []AppConfigField{} return nil, fmt.Errorf("failed to discover cataloger configs: %w", err)
// define the config files we want to parse with their top-level keys
configFiles := map[string]string{
"dotnet.go": "dotnet",
"golang.go": "golang",
"java.go": "java",
"javascript.go": "javascript",
"linux_kernel.go": "linux-kernel",
"nix.go": "nix",
"python.go": "python",
} }
for filename, topLevelKey := range configFiles { // extract configuration fields from each discovered file
filePath := filepath.Join(optionsDir, filename) var configs []AppConfigField
for filePath, topLevelKey := range configFiles {
fields, err := extractAppConfigFields(filePath, topLevelKey) fields, err := extractAppConfigFields(filePath, topLevelKey)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to extract config from %s: %w", filename, err) return nil, fmt.Errorf("failed to extract config from %s: %w", filePath, err)
} }
configs = append(configs, fields...) configs = append(configs, fields...)
} }

View File

@ -0,0 +1,413 @@
package main
import (
"go/ast"
"go/parser"
"go/token"
"testing"
"github.com/stretchr/testify/require"
)
func TestDetermineExpectedConfigName(t *testing.T) {
tests := []struct {
name string
topLevelKey string
wantName string
}{
{
name: "linux-kernel special case",
topLevelKey: "linux-kernel",
wantName: "linuxKernelConfig",
},
{
name: "javascript special case",
topLevelKey: "javascript",
wantName: "javaScriptConfig",
},
{
name: "standard config golang",
topLevelKey: "golang",
wantName: "golangConfig",
},
{
name: "standard config python",
topLevelKey: "python",
wantName: "pythonConfig",
},
{
name: "standard config java",
topLevelKey: "java",
wantName: "javaConfig",
},
{
name: "standard config dotnet",
topLevelKey: "dotnet",
wantName: "dotnetConfig",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := determineExpectedConfigName(tt.topLevelKey)
require.Equal(t, tt.wantName, got)
})
}
}
func TestCleanDescription(t *testing.T) {
tests := []struct {
name string
desc string
want string
}{
{
name: "single line no extra whitespace",
desc: "this is a description",
want: "this is a description",
},
{
name: "multiple spaces collapsed",
desc: "this has multiple spaces",
want: "this has multiple spaces",
},
{
name: "multi-line description",
desc: "this is a\nmulti-line\ndescription",
want: "this is a multi-line description",
},
{
name: "leading and trailing whitespace",
desc: " \t description with spaces \t ",
want: "description with spaces",
},
{
name: "tabs and newlines",
desc: "description\t\twith\n\ttabs",
want: "description with tabs",
},
{
name: "empty string",
desc: "",
want: "",
},
{
name: "only whitespace",
desc: " \n\t ",
want: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := cleanDescription(tt.desc)
require.Equal(t, tt.want, got)
})
}
}
func TestExtractYAMLTag(t *testing.T) {
tests := []struct {
name string
tagStr string
want string
}{
{
name: "simple yaml tag",
tagStr: "`yaml:\"field-name\"`",
want: "field-name",
},
{
name: "yaml tag with omitempty",
tagStr: "`yaml:\"field-name,omitempty\"`",
want: "field-name",
},
{
name: "yaml tag with multiple options",
tagStr: "`yaml:\"field-name,omitempty,inline\"`",
want: "field-name",
},
{
name: "yaml tag dash means skip",
tagStr: "`yaml:\"-\"`",
want: "-",
},
{
name: "no yaml tag",
tagStr: "`json:\"field-name\"`",
want: "",
},
{
name: "empty tag",
tagStr: "",
want: "",
},
{
name: "yaml tag with json tag",
tagStr: "`yaml:\"yaml-name\" json:\"json-name\"`",
want: "yaml-name",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// construct a minimal ast.Field with the tag
field := &ast.Field{}
if tt.tagStr != "" {
field.Tag = &ast.BasicLit{
Kind: token.STRING,
Value: tt.tagStr,
}
}
got := extractYAMLTag(field)
require.Equal(t, tt.want, got)
})
}
}
func TestIsNestedStruct(t *testing.T) {
tests := []struct {
name string
expr ast.Expr
want bool
}{
{
name: "custom struct type",
expr: &ast.Ident{Name: "MainModuleVersion"},
want: true,
},
{
name: "string type",
expr: &ast.Ident{Name: "string"},
want: false,
},
{
name: "int type",
expr: &ast.Ident{Name: "int"},
want: false,
},
{
name: "bool type",
expr: &ast.Ident{Name: "bool"},
want: false,
},
{
name: "pointer type",
expr: &ast.StarExpr{X: &ast.Ident{Name: "Config"}},
want: false,
},
{
name: "array type",
expr: &ast.ArrayType{Elt: &ast.Ident{Name: "string"}},
want: false,
},
{
name: "map type",
expr: &ast.MapType{
Key: &ast.Ident{Name: "string"},
Value: &ast.Ident{Name: "string"},
},
want: false,
},
{
name: "int32 type",
expr: &ast.Ident{Name: "int32"},
want: false,
},
{
name: "uint64 type",
expr: &ast.Ident{Name: "uint64"},
want: false,
},
{
name: "float64 type",
expr: &ast.Ident{Name: "float64"},
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := isNestedStruct(tt.expr)
require.Equal(t, tt.want, got)
})
}
}
func TestExtractStringLiteral(t *testing.T) {
tests := []struct {
name string
expr ast.Expr
want string
}{
{
name: "double quoted string",
expr: &ast.BasicLit{
Kind: token.STRING,
Value: `"hello world"`,
},
want: "hello world",
},
{
name: "backtick string",
expr: &ast.BasicLit{
Kind: token.STRING,
Value: "`hello world`",
},
want: "hello world",
},
{
name: "empty string",
expr: &ast.BasicLit{
Kind: token.STRING,
Value: `""`,
},
want: "",
},
{
name: "string with spaces",
expr: &ast.BasicLit{
Kind: token.STRING,
Value: `" spaces "`,
},
want: " spaces ",
},
{
name: "not a string literal (int)",
expr: &ast.BasicLit{
Kind: token.INT,
Value: "42",
},
want: "",
},
{
name: "not a basic lit",
expr: &ast.Ident{Name: "someVar"},
want: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := extractStringLiteral(tt.expr)
require.Equal(t, tt.want, got)
})
}
}
func TestExtractFieldPathFromRef(t *testing.T) {
tests := []struct {
name string
src string
want string
}{
{
name: "simple field reference",
src: "&o.Field",
want: "Field",
},
{
name: "nested field reference",
src: "&o.Parent.Field",
want: "Parent.Field",
},
{
name: "deeply nested field reference",
src: "&o.MainModuleVersion.FromLDFlags",
want: "MainModuleVersion.FromLDFlags",
},
{
name: "three levels deep",
src: "&o.Level1.Level2.Level3",
want: "Level1.Level2.Level3",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// parse the expression
expr, err := parser.ParseExpr(tt.src)
require.NoError(t, err)
got := extractFieldPathFromRef(expr)
require.Equal(t, tt.want, got)
})
}
}
func TestExtractAppValue(t *testing.T) {
tests := []struct {
name string
src string
want interface{}
}{
{
name: "string literal",
src: `"hello"`,
want: "hello",
},
{
name: "int literal",
src: "42",
want: "42",
},
{
name: "float literal",
src: "3.14",
want: "3.14",
},
{
name: "bool true",
src: "true",
want: true,
},
{
name: "bool false",
src: "false",
want: false,
},
{
name: "nil value",
src: "nil",
want: nil,
},
{
name: "empty string",
src: `""`,
want: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// parse the expression
expr, err := parser.ParseExpr(tt.src)
require.NoError(t, err)
got := extractAppValue(expr)
require.Equal(t, tt.want, got)
})
}
}
func TestExtractAppValue_NestedStruct(t *testing.T) {
// test nested struct separately since it returns a map
src := `struct{Field1 string; Field2 bool}{Field1: "value", Field2: true}`
// parse as a composite literal
expr, err := parser.ParseExpr(src)
require.NoError(t, err)
// extract the composite literal
compositeLit, ok := expr.(*ast.CompositeLit)
require.True(t, ok)
got := extractAppValue(compositeLit)
// verify it's a map with the expected values
gotMap, ok := got.(map[string]interface{})
require.True(t, ok)
require.Equal(t, "value", gotMap["Field1"])
require.Equal(t, true, gotMap["Field2"])
}

View File

@ -1,3 +1,4 @@
// this file discovers cataloger configuration structs using AST parsing to find Config structs and extract fields with app-config annotations.
package main package main
import ( import (
@ -135,9 +136,7 @@ func discoverConfigsInFile(path, repoRoot string) (map[string]ConfigInfo, error)
// isConfigStruct determines if a struct name looks like a configuration struct // isConfigStruct determines if a struct name looks like a configuration struct
func isConfigStruct(name string) bool { func isConfigStruct(name string) bool {
// check for common config patterns // check for common config patterns
return strings.Contains(name, "Config") || return strings.Contains(name, "Config")
strings.HasSuffix(name, "Config") ||
strings.HasPrefix(name, "Config")
} }
// extractCatalogerConfigFields parses struct fields and extracts their metadata // extractCatalogerConfigFields parses struct fields and extracts their metadata

View File

@ -0,0 +1,455 @@
package main
import (
"go/ast"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/require"
)
// expected config structs that should be discovered with app-config annotations
var expectedCatalogConfigs = []string{
"golang.CatalogerConfig",
"golang.MainModuleVersionConfig",
"java.ArchiveCatalogerConfig",
"python.CatalogerConfig",
"dotnet.CatalogerConfig",
"kernel.LinuxKernelCatalogerConfig",
"javascript.CatalogerConfig",
"nix.Config",
}
func TestDiscoverConfigs(t *testing.T) {
repoRoot, err := RepoRoot()
require.NoError(t, err)
configs, err := DiscoverConfigs(repoRoot)
require.NoError(t, err)
// verify we discovered multiple config structs
require.NotEmpty(t, configs, "should discover at least one config struct")
// check for known config structs that have app-config annotations
for _, expected := range expectedCatalogConfigs {
config, ok := configs[expected]
require.True(t, ok, "should discover config: %s", expected)
require.NotEmpty(t, config.Fields, "config %s should have fields", expected)
require.Equal(t, expected, config.PackageName+"."+config.StructName)
}
// verify golang.CatalogerConfig structure
golangConfig := configs["golang.CatalogerConfig"]
wantGolangConfig := ConfigInfo{
PackageName: "golang",
StructName: "CatalogerConfig",
}
if diff := cmp.Diff(wantGolangConfig.PackageName, golangConfig.PackageName); diff != "" {
t.Errorf("golang.CatalogerConfig.PackageName mismatch (-want +got):\n%s", diff)
}
if diff := cmp.Diff(wantGolangConfig.StructName, golangConfig.StructName); diff != "" {
t.Errorf("golang.CatalogerConfig.StructName mismatch (-want +got):\n%s", diff)
}
require.NotEmpty(t, golangConfig.Fields)
// check for specific field
var foundSearchLocalModCache bool
for _, field := range golangConfig.Fields {
if field.Name == "SearchLocalModCacheLicenses" {
foundSearchLocalModCache = true
wantField := ConfigField{
Name: "SearchLocalModCacheLicenses",
Type: "bool",
AppKey: "golang.search-local-mod-cache-licenses",
}
if diff := cmp.Diff(wantField.Name, field.Name); diff != "" {
t.Errorf("SearchLocalModCacheLicenses field Name mismatch (-want +got):\n%s", diff)
}
if diff := cmp.Diff(wantField.Type, field.Type); diff != "" {
t.Errorf("SearchLocalModCacheLicenses field Type mismatch (-want +got):\n%s", diff)
}
if diff := cmp.Diff(wantField.AppKey, field.AppKey); diff != "" {
t.Errorf("SearchLocalModCacheLicenses field AppKey mismatch (-want +got):\n%s", diff)
}
require.NotEmpty(t, field.Description)
require.Contains(t, field.Description, "searching for go package licenses")
}
}
require.True(t, foundSearchLocalModCache, "should find SearchLocalModCacheLicenses field")
// verify nested config struct
golangMainModuleConfig := configs["golang.MainModuleVersionConfig"]
wantMainModuleConfig := ConfigInfo{
PackageName: "golang",
StructName: "MainModuleVersionConfig",
}
if diff := cmp.Diff(wantMainModuleConfig.PackageName, golangMainModuleConfig.PackageName); diff != "" {
t.Errorf("golang.MainModuleVersionConfig.PackageName mismatch (-want +got):\n%s", diff)
}
if diff := cmp.Diff(wantMainModuleConfig.StructName, golangMainModuleConfig.StructName); diff != "" {
t.Errorf("golang.MainModuleVersionConfig.StructName mismatch (-want +got):\n%s", diff)
}
require.NotEmpty(t, golangMainModuleConfig.Fields)
// check for specific nested field
var foundFromLDFlags bool
for _, field := range golangMainModuleConfig.Fields {
if field.Name == "FromLDFlags" {
foundFromLDFlags = true
wantField := ConfigField{
Name: "FromLDFlags",
Type: "bool",
AppKey: "golang.main-module-version.from-ld-flags",
}
if diff := cmp.Diff(wantField.Name, field.Name); diff != "" {
t.Errorf("FromLDFlags field Name mismatch (-want +got):\n%s", diff)
}
if diff := cmp.Diff(wantField.Type, field.Type); diff != "" {
t.Errorf("FromLDFlags field Type mismatch (-want +got):\n%s", diff)
}
if diff := cmp.Diff(wantField.AppKey, field.AppKey); diff != "" {
t.Errorf("FromLDFlags field AppKey mismatch (-want +got):\n%s", diff)
}
require.NotEmpty(t, field.Description)
}
}
require.True(t, foundFromLDFlags, "should find FromLDFlags field in MainModuleVersionConfig")
// print summary for manual inspection
t.Logf("Discovered %d config structs:", len(configs))
for key, config := range configs {
t.Logf(" %s: %d fields", key, len(config.Fields))
for _, field := range config.Fields {
t.Logf(" - %s (%s): %s", field.Name, field.Type, field.AppKey)
if diff := cmp.Diff("", field.Description); diff == "" {
t.Logf(" WARNING: field %s has no description", field.Name)
}
}
}
}
func TestExtractPackageNameFromPath(t *testing.T) {
tests := []struct {
name string
filePath string
want string
}{
{
name: "golang package",
filePath: "syft/pkg/cataloger/golang/config.go",
want: "golang",
},
{
name: "java package",
filePath: "syft/pkg/cataloger/java/config.go",
want: "java",
},
{
name: "python cataloger",
filePath: "syft/pkg/cataloger/python/cataloger.go",
want: "python",
},
{
name: "kernel cataloger",
filePath: "syft/pkg/cataloger/kernel/cataloger.go",
want: "kernel",
},
{
name: "binary classifier",
filePath: "syft/pkg/cataloger/binary/classifier_cataloger.go",
want: "binary",
},
{
name: "not a cataloger path",
filePath: "syft/pkg/other/file.go",
want: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := extractPackageNameFromPath(tt.filePath)
require.Equal(t, tt.want, got)
})
}
}
func TestFormatFieldType(t *testing.T) {
tests := []struct {
name string
expr ast.Expr
want string
}{
{
name: "basic identifier - string",
expr: &ast.Ident{Name: "string"},
want: "string",
},
{
name: "basic identifier - bool",
expr: &ast.Ident{Name: "bool"},
want: "bool",
},
{
name: "basic identifier - int",
expr: &ast.Ident{Name: "int"},
want: "int",
},
{
name: "selector expression - package.Type",
expr: &ast.SelectorExpr{
X: &ast.Ident{Name: "time"},
Sel: &ast.Ident{Name: "Time"},
},
want: "time.Time",
},
{
name: "selector expression - cataloging.Config",
expr: &ast.SelectorExpr{
X: &ast.Ident{Name: "cataloging"},
Sel: &ast.Ident{Name: "ArchiveSearchConfig"},
},
want: "cataloging.ArchiveSearchConfig",
},
{
name: "array of strings",
expr: &ast.ArrayType{
Elt: &ast.Ident{Name: "string"},
},
want: "[]string",
},
{
name: "array of ints",
expr: &ast.ArrayType{
Elt: &ast.Ident{Name: "int"},
},
want: "[]int",
},
{
name: "map[string]bool",
expr: &ast.MapType{
Key: &ast.Ident{Name: "string"},
Value: &ast.Ident{Name: "bool"},
},
want: "map[string]bool",
},
{
name: "map[string]int",
expr: &ast.MapType{
Key: &ast.Ident{Name: "string"},
Value: &ast.Ident{Name: "int"},
},
want: "map[string]int",
},
{
name: "pointer to type",
expr: &ast.StarExpr{
X: &ast.Ident{Name: "Config"},
},
want: "*Config",
},
{
name: "pointer to selector",
expr: &ast.StarExpr{
X: &ast.SelectorExpr{
X: &ast.Ident{Name: "time"},
Sel: &ast.Ident{Name: "Time"},
},
},
want: "*time.Time",
},
{
name: "interface{}",
expr: &ast.InterfaceType{
Methods: &ast.FieldList{},
},
want: "interface{}",
},
{
name: "nested array of arrays",
expr: &ast.ArrayType{
Elt: &ast.ArrayType{
Elt: &ast.Ident{Name: "string"},
},
},
want: "[][]string",
},
{
name: "map with array value",
expr: &ast.MapType{
Key: &ast.Ident{Name: "string"},
Value: &ast.ArrayType{
Elt: &ast.Ident{Name: "int"},
},
},
want: "map[string][]int",
},
{
name: "pointer to array",
expr: &ast.StarExpr{
X: &ast.ArrayType{
Elt: &ast.Ident{Name: "string"},
},
},
want: "*[]string",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := formatFieldType(tt.expr)
require.Equal(t, tt.want, got)
})
}
}
func TestExtractFieldComments(t *testing.T) {
tests := []struct {
name string
commentGroup *ast.CommentGroup
wantDescription string
wantAppKey string
}{
{
name: "nil comment group",
commentGroup: nil,
wantDescription: "",
wantAppKey: "",
},
{
name: "empty comment group",
commentGroup: &ast.CommentGroup{
List: []*ast.Comment{},
},
wantDescription: "",
wantAppKey: "",
},
{
name: "app-config annotation only",
commentGroup: &ast.CommentGroup{
List: []*ast.Comment{
{Text: "// app-config: golang.search-local-mod-cache-licenses"},
},
},
wantDescription: "",
wantAppKey: "golang.search-local-mod-cache-licenses",
},
{
name: "description only",
commentGroup: &ast.CommentGroup{
List: []*ast.Comment{
{Text: "// enable searching for go package licenses in the local mod cache"},
},
},
wantDescription: "enable searching for go package licenses in the local mod cache",
wantAppKey: "",
},
{
name: "description and app-config",
commentGroup: &ast.CommentGroup{
List: []*ast.Comment{
{Text: "// enable searching for go package licenses in the local mod cache"},
{Text: "// app-config: golang.search-local-mod-cache-licenses"},
},
},
wantDescription: "enable searching for go package licenses in the local mod cache",
wantAppKey: "golang.search-local-mod-cache-licenses",
},
{
name: "app-config before description",
commentGroup: &ast.CommentGroup{
List: []*ast.Comment{
{Text: "// app-config: golang.search-local-mod-cache-licenses"},
{Text: "// enable searching for go package licenses in the local mod cache"},
},
},
wantDescription: "enable searching for go package licenses in the local mod cache",
wantAppKey: "golang.search-local-mod-cache-licenses",
},
{
name: "multi-line description",
commentGroup: &ast.CommentGroup{
List: []*ast.Comment{
{Text: "// this is the first line of the description."},
{Text: "// this is the second line of the description."},
{Text: "// app-config: test.multi-line"},
},
},
wantDescription: "this is the first line of the description. this is the second line of the description.",
wantAppKey: "test.multi-line",
},
{
name: "app-config with extra whitespace",
commentGroup: &ast.CommentGroup{
List: []*ast.Comment{
{Text: "// app-config: golang.test-key "},
},
},
wantDescription: "",
wantAppKey: "golang.test-key",
},
{
name: "description with special characters",
commentGroup: &ast.CommentGroup{
List: []*ast.Comment{
{Text: "// enable searching for Go's package licenses (*.mod files)"},
{Text: "// app-config: golang.search"},
},
},
wantDescription: "enable searching for Go's package licenses (*.mod files)",
wantAppKey: "golang.search",
},
{
name: "comment with empty lines",
commentGroup: &ast.CommentGroup{
List: []*ast.Comment{
{Text: "// first line"},
{Text: "//"},
{Text: "// second line"},
{Text: "// app-config: test.key"},
},
},
wantDescription: "first line second line",
wantAppKey: "test.key",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotDescription, gotAppKey := extractFieldComments(tt.commentGroup)
require.Equal(t, tt.wantDescription, gotDescription)
require.Equal(t, tt.wantAppKey, gotAppKey)
})
}
}
func TestDiscoverAllowedConfigStructs(t *testing.T) {
repoRoot, err := RepoRoot()
require.NoError(t, err)
allowedConfigs, err := DiscoverAllowedConfigStructs(repoRoot)
require.NoError(t, err)
// verify we found multiple config types
require.NotEmpty(t, allowedConfigs, "should discover at least one allowed config type")
// verify specific config types that should be in pkgcataloging.Config
expectedConfigs := []string{
"golang.CatalogerConfig",
"java.ArchiveCatalogerConfig",
"python.CatalogerConfig",
"dotnet.CatalogerConfig",
"kernel.LinuxKernelCatalogerConfig",
"javascript.CatalogerConfig",
}
for _, expected := range expectedConfigs {
require.True(t, allowedConfigs[expected], "should find %s in allowed configs", expected)
}
// log all discovered configs for manual inspection
t.Logf("Discovered %d allowed config types:", len(allowedConfigs))
for configType := range allowedConfigs {
t.Logf(" - %s", configType)
}
}

View File

@ -1,3 +1,4 @@
// this file discovers generic catalogers from source code by walking syft/pkg/cataloger/ and using AST parsing to find generic.NewCataloger() calls and extract parser information.
package main package main
import ( import (

View File

@ -0,0 +1,389 @@
package main
import (
"go/ast"
"go/parser"
"go/token"
"testing"
"github.com/stretchr/testify/require"
)
// test helper functions
// parseFuncDecl parses a function declaration from a code string
func parseFuncDecl(t *testing.T, code string) *ast.FuncDecl {
t.Helper()
fset := token.NewFileSet()
file, err := parser.ParseFile(fset, "", "package test\n"+code, 0)
require.NoError(t, err)
require.Len(t, file.Decls, 1, "expected exactly one declaration")
funcDecl, ok := file.Decls[0].(*ast.FuncDecl)
require.True(t, ok, "expected declaration to be a function")
return funcDecl
}
// parseCallExpr parses a call expression from a code string
func parseCallExpr(t *testing.T, code string) *ast.CallExpr {
t.Helper()
expr, err := parser.ParseExpr(code)
require.NoError(t, err)
callExpr, ok := expr.(*ast.CallExpr)
require.True(t, ok, "expected expression to be a call expression")
return callExpr
}
// parseCompositeLit parses a composite literal from a code string
func parseCompositeLit(t *testing.T, code string) *ast.CompositeLit {
t.Helper()
expr, err := parser.ParseExpr(code)
require.NoError(t, err)
lit, ok := expr.(*ast.CompositeLit)
require.True(t, ok, "expected expression to be a composite literal")
return lit
}
// parseConstDecl parses a const declaration from a code string and returns the GenDecl
func parseConstDecl(t *testing.T, code string) *ast.GenDecl {
t.Helper()
fset := token.NewFileSet()
file, err := parser.ParseFile(fset, "", "package test\n"+code, 0)
require.NoError(t, err)
require.Len(t, file.Decls, 1, "expected exactly one declaration")
genDecl, ok := file.Decls[0].(*ast.GenDecl)
require.True(t, ok, "expected declaration to be a general declaration")
return genDecl
}
func TestReturnsPackageCataloger(t *testing.T) {
tests := []struct {
name string
code string
want bool
}{
{
name: "returns pkg.Cataloger",
code: `func NewFoo() pkg.Cataloger { return nil }`,
want: true,
},
{
name: "returns bare Cataloger",
code: `func NewFoo() Cataloger { return nil }`,
want: true,
},
{
name: "returns multiple values",
code: `func NewFoo() (pkg.Cataloger, error) { return nil, nil }`,
want: false,
},
{
name: "returns error",
code: `func NewFoo() error { return nil }`,
want: false,
},
{
name: "returns pointer to Cataloger",
code: `func NewFoo() *pkg.Cataloger { return nil }`,
want: false,
},
{
name: "returns string",
code: `func NewFoo() string { return "" }`,
want: false,
},
{
name: "no return type",
code: `func NewFoo() { }`,
want: false,
},
{
name: "returns wrong package Cataloger",
code: `func NewFoo() other.Cataloger { return nil }`,
want: false,
},
{
name: "returns pkg.OtherType",
code: `func NewFoo() pkg.OtherType { return nil }`,
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
funcDecl := parseFuncDecl(t, tt.code)
got := returnsPackageCataloger(funcDecl)
require.Equal(t, tt.want, got)
})
}
}
func TestIsGenericNewCatalogerCall(t *testing.T) {
tests := []struct {
name string
code string
want bool
}{
{
name: "generic.NewCataloger call",
code: `generic.NewCataloger("foo")`,
want: true,
},
{
name: "generic.NewCataloger with no args",
code: `generic.NewCataloger()`,
want: true,
},
{
name: "other.NewCataloger call",
code: `other.NewCataloger("foo")`,
want: false,
},
{
name: "generic.OtherMethod call",
code: `generic.OtherMethod("foo")`,
want: false,
},
{
name: "bare NewCataloger call",
code: `NewCataloger("foo")`,
want: false,
},
{
name: "nested call",
code: `foo(generic.NewCataloger("bar"))`,
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
callExpr := parseCallExpr(t, tt.code)
got := isGenericNewCatalogerCall(callExpr)
require.Equal(t, tt.want, got)
})
}
}
func TestExtractStringSliceFromExpr(t *testing.T) {
tests := []struct {
name string
code string
want []string
}{
{
name: "strset.New with strings",
code: `strset.New([]string{"foo", "bar", "baz"})`,
want: []string{"foo", "bar", "baz"},
},
{
name: "strset.New with single string",
code: `strset.New([]string{"single"})`,
want: []string{"single"},
},
{
name: "strset.New with empty slice",
code: `strset.New([]string{})`,
want: nil,
},
{
name: "other.New with strings",
code: `other.New([]string{"x", "y"})`,
want: []string{"x", "y"},
},
{
name: "call with no args",
code: `strset.New()`,
want: nil,
},
{
name: "call with non-composite-literal arg",
code: `strset.New("not a slice")`,
want: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
callExpr := parseCallExpr(t, tt.code)
got := extractStringSliceFromExpr(callExpr)
require.Equal(t, tt.want, got)
})
}
}
func TestSearchConstInDecl(t *testing.T) {
tests := []struct {
name string
code string
constName string
want string
}{
{
name: "single const",
code: `const Foo = "bar"`,
constName: "Foo",
want: "bar",
},
{
name: "grouped consts - first",
code: `const (
Foo = "bar"
Baz = "qux"
)`,
constName: "Foo",
want: "bar",
},
{
name: "grouped consts - second",
code: `const (
Foo = "bar"
Baz = "qux"
)`,
constName: "Baz",
want: "qux",
},
{
name: "const not found",
code: `const Foo = "bar"`,
constName: "Missing",
want: "",
},
{
name: "var declaration instead of const",
code: `var Foo = "bar"`,
constName: "Foo",
want: "",
},
{
name: "const with non-string value",
code: `const Foo = 42`,
constName: "Foo",
want: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
genDecl := parseConstDecl(t, tt.code)
got := searchConstInDecl(genDecl, tt.constName)
require.Equal(t, tt.want, got)
})
}
}
func TestGetConstValue(t *testing.T) {
tests := []struct {
name string
code string
constName string
want string
}{
{
name: "single const match",
code: `const Foo = "bar"`,
constName: "Foo",
want: "bar",
},
{
name: "no match",
code: `const Foo = "bar"`,
constName: "NotFoo",
want: "",
},
{
name: "non-string literal",
code: `const Foo = 123`,
constName: "Foo",
want: "",
},
{
name: "const with complex value",
code: `const Foo = Bar + "suffix"`,
constName: "Foo",
want: "",
},
{
name: "first of multiple in same spec",
code: `const Foo, Bar = "baz", "qux"`,
constName: "Foo",
want: "baz",
},
{
name: "second of multiple in same spec",
code: `const Foo, Bar = "baz", "qux"`,
constName: "Bar",
want: "qux",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
genDecl := parseConstDecl(t, tt.code)
require.Equal(t, token.CONST, genDecl.Tok)
require.NotEmpty(t, genDecl.Specs)
// getConstValue works on a single ValueSpec, so we need to find the right one
// in case of grouped constants, each const is its own spec
var got string
for _, spec := range genDecl.Specs {
valueSpec, ok := spec.(*ast.ValueSpec)
require.True(t, ok)
got = getConstValue(valueSpec, tt.constName)
if got != "" {
break
}
}
require.Equal(t, tt.want, got)
})
}
}
func TestResolveImportPath(t *testing.T) {
const testRepoRoot = "/repo/root"
tests := []struct {
name string
importPath string
want string
}{
{
name: "syft pkg cataloger golang",
importPath: "github.com/anchore/syft/syft/pkg/cataloger/golang",
want: "/repo/root/syft/pkg/cataloger/golang",
},
{
name: "syft internal capabilities",
importPath: "github.com/anchore/syft/internal/capabilities",
want: "/repo/root/internal/capabilities",
},
{
name: "syft root package",
importPath: "github.com/anchore/syft/syft",
want: "/repo/root/syft",
},
{
name: "external package",
importPath: "github.com/other/repo/pkg",
want: "",
},
{
name: "standard library",
importPath: "fmt",
want: "",
},
{
name: "empty import path",
importPath: "",
want: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := resolveImportPath(tt.importPath, testRepoRoot)
require.Equal(t, tt.want, got)
})
}
}

View File

@ -1,3 +1,4 @@
// this file discovers metadata and package types by reading test-observations.json files generated by pkgtest helpers during test execution.
package main package main
import ( import (

View File

@ -0,0 +1,320 @@
package main
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestApplyParserObservations(t *testing.T) {
tests := []struct {
name string
cataloger DiscoveredCataloger
index *TestObservationIndex
wantFoundData bool
wantMetadataType string
wantPackageType string
}{
{
name: "parser observations applied to matching parser",
cataloger: DiscoveredCataloger{
Name: "test-cataloger",
PackageName: "testpkg",
Parsers: []DiscoveredParser{
{ParserFunction: "parseTestFile"},
},
},
index: func() *TestObservationIndex {
idx := newTestObservationIndex()
idx.setParserObservations("testpkg", "parseTestFile", &TypeObservation{
MetadataTypes: []string{"pkg.TestMetadata"},
PackageTypes: []string{"test-type"},
})
return idx
}(),
wantFoundData: true,
wantMetadataType: "pkg.TestMetadata",
wantPackageType: "test-type",
},
{
name: "no observations found for parser",
cataloger: DiscoveredCataloger{
Name: "test-cataloger",
PackageName: "testpkg",
Parsers: []DiscoveredParser{
{ParserFunction: "parseOtherFile"},
},
},
index: func() *TestObservationIndex {
idx := newTestObservationIndex()
idx.setParserObservations("testpkg", "parseTestFile", &TypeObservation{
MetadataTypes: []string{"pkg.TestMetadata"},
})
return idx
}(),
wantFoundData: false,
},
{
name: "multiple parsers with mixed observations",
cataloger: DiscoveredCataloger{
Name: "test-cataloger",
PackageName: "testpkg",
Parsers: []DiscoveredParser{
{ParserFunction: "parseFirst"},
{ParserFunction: "parseSecond"},
},
},
index: func() *TestObservationIndex {
idx := newTestObservationIndex()
idx.setParserObservations("testpkg", "parseFirst", &TypeObservation{
MetadataTypes: []string{"pkg.FirstMetadata"},
})
// parseSecond has no observations
return idx
}(),
wantFoundData: true,
wantMetadataType: "pkg.FirstMetadata",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotFoundData := applyParserObservations(&tt.cataloger, tt.index)
require.Equal(t, tt.wantFoundData, gotFoundData)
if tt.wantFoundData && tt.wantMetadataType != "" {
require.Contains(t, tt.cataloger.Parsers[0].MetadataTypes, tt.wantMetadataType)
}
if tt.wantFoundData && tt.wantPackageType != "" {
require.Contains(t, tt.cataloger.Parsers[0].PackageTypes, tt.wantPackageType)
}
})
}
}
func TestApplySingleParserCatalogerObservations(t *testing.T) {
tests := []struct {
name string
cataloger DiscoveredCataloger
catalogerObs *TypeObservation
wantFoundData bool
wantMetadataType []string
wantPackageType []string
}{
{
name: "cataloger-level observations applied to single parser",
cataloger: DiscoveredCataloger{
Name: "single-parser-cataloger",
Parsers: []DiscoveredParser{
{ParserFunction: "parseSingle"},
},
},
catalogerObs: &TypeObservation{
MetadataTypes: []string{"pkg.CatalogerMetadata"},
PackageTypes: []string{"cataloger-type"},
},
wantFoundData: true,
wantMetadataType: []string{"pkg.CatalogerMetadata"},
wantPackageType: []string{"cataloger-type"},
},
{
name: "cataloger-level merges with existing parser-level observations",
cataloger: DiscoveredCataloger{
Name: "single-parser-cataloger",
Parsers: []DiscoveredParser{
{
ParserFunction: "parseSingle",
MetadataTypes: []string{"pkg.ParserMetadata"},
PackageTypes: []string{"parser-type"},
},
},
},
catalogerObs: &TypeObservation{
MetadataTypes: []string{"pkg.CatalogerMetadata"},
PackageTypes: []string{"cataloger-type"},
},
wantFoundData: true,
wantMetadataType: []string{"pkg.CatalogerMetadata", "pkg.ParserMetadata"},
wantPackageType: []string{"cataloger-type", "parser-type"},
},
{
name: "empty cataloger observations",
cataloger: DiscoveredCataloger{
Name: "single-parser-cataloger",
Parsers: []DiscoveredParser{
{ParserFunction: "parseSingle"},
},
},
catalogerObs: &TypeObservation{},
wantFoundData: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotFoundData := applySingleParserCatalogerObservations(&tt.cataloger, tt.catalogerObs)
require.Equal(t, tt.wantFoundData, gotFoundData)
if tt.wantFoundData {
if len(tt.wantMetadataType) > 0 {
require.ElementsMatch(t, tt.wantMetadataType, tt.cataloger.Parsers[0].MetadataTypes)
}
if len(tt.wantPackageType) > 0 {
require.ElementsMatch(t, tt.wantPackageType, tt.cataloger.Parsers[0].PackageTypes)
}
}
})
}
}
func TestApplyMultiParserCatalogerObservations(t *testing.T) {
tests := []struct {
name string
cataloger DiscoveredCataloger
catalogerObs *TypeObservation
wantFoundData bool
// expectations for each parser by index
wantParser0HasMetadata bool
wantParser1HasMetadata bool
}{
{
name: "all parsers without data - cataloger-level applied to all",
cataloger: DiscoveredCataloger{
Name: "multi-parser-cataloger",
Parsers: []DiscoveredParser{
{ParserFunction: "parseFirst"},
{ParserFunction: "parseSecond"},
},
},
catalogerObs: &TypeObservation{
MetadataTypes: []string{"pkg.SharedMetadata"},
PackageTypes: []string{"shared-type"},
},
wantFoundData: true,
wantParser0HasMetadata: true,
wantParser1HasMetadata: true,
},
{
name: "some parsers have data - cataloger-level only fills gaps",
cataloger: DiscoveredCataloger{
Name: "multi-parser-cataloger",
Parsers: []DiscoveredParser{
{
ParserFunction: "parseFirst",
MetadataTypes: []string{"pkg.FirstMetadata"},
},
{ParserFunction: "parseSecond"}, // no data
},
},
catalogerObs: &TypeObservation{
MetadataTypes: []string{"pkg.SharedMetadata"},
},
wantFoundData: true,
wantParser0HasMetadata: false, // already has data, not overwritten
wantParser1HasMetadata: true, // gets cataloger-level data
},
{
name: "all parsers have data - cataloger-level not applied",
cataloger: DiscoveredCataloger{
Name: "multi-parser-cataloger",
Parsers: []DiscoveredParser{
{
ParserFunction: "parseFirst",
MetadataTypes: []string{"pkg.FirstMetadata"},
},
{
ParserFunction: "parseSecond",
MetadataTypes: []string{"pkg.SecondMetadata"},
},
},
},
catalogerObs: &TypeObservation{
MetadataTypes: []string{"pkg.SharedMetadata"},
},
wantFoundData: false,
wantParser0HasMetadata: false, // should not have shared metadata
wantParser1HasMetadata: false, // should not have shared metadata
},
{
name: "empty cataloger observations",
cataloger: DiscoveredCataloger{
Name: "multi-parser-cataloger",
Parsers: []DiscoveredParser{
{ParserFunction: "parseFirst"},
{ParserFunction: "parseSecond"},
},
},
catalogerObs: &TypeObservation{},
wantFoundData: false,
wantParser0HasMetadata: false,
wantParser1HasMetadata: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotFoundData := applyMultiParserCatalogerObservations(&tt.cataloger, tt.catalogerObs)
require.Equal(t, tt.wantFoundData, gotFoundData)
if tt.wantParser0HasMetadata {
require.Contains(t, tt.cataloger.Parsers[0].MetadataTypes, "pkg.SharedMetadata",
"parser 0 should have shared metadata")
} else if len(tt.catalogerObs.MetadataTypes) > 0 {
// if cataloger has metadata but we don't expect it in parser 0, verify it's not there
require.NotContains(t, tt.cataloger.Parsers[0].MetadataTypes, "pkg.SharedMetadata",
"parser 0 should not have shared metadata")
}
if tt.wantParser1HasMetadata {
require.Contains(t, tt.cataloger.Parsers[1].MetadataTypes, "pkg.SharedMetadata",
"parser 1 should have shared metadata")
} else if len(tt.catalogerObs.MetadataTypes) > 0 {
// if cataloger has metadata but we don't expect it in parser 1, verify it's not there
require.NotContains(t, tt.cataloger.Parsers[1].MetadataTypes, "pkg.SharedMetadata",
"parser 1 should not have shared metadata")
}
})
}
}
func TestMergeAndDeduplicateStrings(t *testing.T) {
tests := []struct {
name string
existing []string
additional []string
want []string
}{
{
name: "merge with duplicates",
existing: []string{"a", "b"},
additional: []string{"b", "c"},
want: []string{"a", "b", "c"},
},
{
name: "empty existing",
existing: []string{},
additional: []string{"a", "b"},
want: []string{"a", "b"},
},
{
name: "empty additional",
existing: []string{"a", "b"},
additional: []string{},
want: []string{"a", "b"},
},
{
name: "both empty",
existing: []string{},
additional: []string{},
want: []string{},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := mergeAndDeduplicateStrings(tt.existing, tt.additional)
require.ElementsMatch(t, tt.want, got)
})
}
}

View File

@ -1,3 +1,4 @@
// this file handles YAML file reading and writing with comment preservation, using gopkg.in/yaml.v3's node tree to maintain all existing comments during regeneration.
package main package main
import ( import (

View File

@ -0,0 +1,553 @@
package main
import (
"testing"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v3"
)
func TestFindSectionNode(t *testing.T) {
tests := []struct {
name string
yamlContent string
sectionName string
wantFound bool
wantValue string // expected value for scalar nodes
}{
{
name: "finds existing configs section",
yamlContent: `
configs:
key: value
catalogers:
- name: test
`,
sectionName: "configs",
wantFound: true,
},
{
name: "finds existing catalogers section",
yamlContent: `
configs:
key: value
catalogers:
- name: test
`,
sectionName: "catalogers",
wantFound: true,
},
{
name: "returns nil for non-existent section",
yamlContent: `
configs:
key: value
`,
sectionName: "nonexistent",
wantFound: false,
},
{
name: "handles empty mapping",
yamlContent: `{}`,
sectionName: "any",
wantFound: false,
},
{
name: "finds section with scalar value",
yamlContent: `
name: test-cataloger
type: custom
`,
sectionName: "name",
wantFound: true,
wantValue: "test-cataloger",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var rootNode yaml.Node
err := yaml.Unmarshal([]byte(tt.yamlContent), &rootNode)
require.NoError(t, err)
// get the mapping node
var mappingNode *yaml.Node
if rootNode.Kind == yaml.DocumentNode && len(rootNode.Content) > 0 {
mappingNode = rootNode.Content[0]
} else {
mappingNode = &rootNode
}
got := findSectionNode(mappingNode, tt.sectionName)
if tt.wantFound {
require.NotNil(t, got)
if tt.wantValue != "" {
require.Equal(t, tt.wantValue, got.Value)
}
} else {
require.Nil(t, got)
}
})
}
}
func TestFindFieldValue(t *testing.T) {
tests := []struct {
name string
yamlContent string
fieldName string
want string
}{
{
name: "finds simple string field",
yamlContent: `
name: test-cataloger
type: custom
`,
fieldName: "name",
want: "test-cataloger",
},
{
name: "finds type field",
yamlContent: `
name: test-cataloger
type: generic
`,
fieldName: "type",
want: "generic",
},
{
name: "returns empty for non-existent field",
yamlContent: `
name: test-cataloger
`,
fieldName: "nonexistent",
want: "",
},
{
name: "finds parser_function field",
yamlContent: `
parser_function: parseGoMod
metadata_types: [GoModMetadata]
`,
fieldName: "parser_function",
want: "parseGoMod",
},
{
name: "handles empty mapping",
yamlContent: `{}`,
fieldName: "any",
want: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var rootNode yaml.Node
err := yaml.Unmarshal([]byte(tt.yamlContent), &rootNode)
require.NoError(t, err)
// get the mapping node
var mappingNode *yaml.Node
if rootNode.Kind == yaml.DocumentNode && len(rootNode.Content) > 0 {
mappingNode = rootNode.Content[0]
} else {
mappingNode = &rootNode
}
got := findFieldValue(mappingNode, tt.fieldName)
require.Equal(t, tt.want, got)
})
}
}
func TestPreserveMappingNodeComments(t *testing.T) {
tests := []struct {
name string
checkField string
wantHeadComment string
wantLineComment string
}{
{
name: "preserves line comment on field",
checkField: "name",
wantLineComment: "AUTO-GENERATED",
},
{
name: "preserves head comment on field",
checkField: "type",
wantHeadComment: "Important field",
wantLineComment: "AUTO-GENERATED",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// manually construct nodes with comments
existingMapping := &yaml.Node{
Kind: yaml.MappingNode,
Content: []*yaml.Node{
{Kind: yaml.ScalarNode, Value: "name", LineComment: "AUTO-GENERATED"},
{Kind: yaml.ScalarNode, Value: "test", HeadComment: "value comment"},
{Kind: yaml.ScalarNode, Value: "type", HeadComment: "Important field", LineComment: "AUTO-GENERATED"},
{Kind: yaml.ScalarNode, Value: "custom"},
},
}
newMapping := &yaml.Node{
Kind: yaml.MappingNode,
Content: []*yaml.Node{
{Kind: yaml.ScalarNode, Value: "name"},
{Kind: yaml.ScalarNode, Value: "test-new"},
{Kind: yaml.ScalarNode, Value: "type"},
{Kind: yaml.ScalarNode, Value: "generic"},
},
}
preserveMappingNodeComments(existingMapping, newMapping)
// find the field we're checking
keyNode, valueNode := findFieldNodes(newMapping, tt.checkField)
require.NotNil(t, keyNode, "field %s not found", tt.checkField)
// check comments were preserved
if tt.wantHeadComment != "" {
require.Equal(t, tt.wantHeadComment, keyNode.HeadComment)
}
if tt.wantLineComment != "" {
require.Equal(t, tt.wantLineComment, keyNode.LineComment)
}
// verify that value node comments are also preserved
if tt.checkField == "name" {
require.Equal(t, "value comment", valueNode.HeadComment)
}
})
}
}
func TestPreserveSequenceNodeComments(t *testing.T) {
tests := []struct {
name string
existingYAML string
newYAML string
wantHeadComment string
}{
{
name: "preserves parser comments by parser_function",
existingYAML: `
- parser_function: parseGoMod # old parser
metadata_types: [GoModMetadata]
- parser_function: parseGoSum
metadata_types: [GoSumMetadata]
`,
newYAML: `
- parser_function: parseGoMod
metadata_types: [GoModMetadataNew]
- parser_function: parseGoSum
metadata_types: [GoSumMetadataNew]
`,
// we'll verify in the test body that comments are preserved
},
{
name: "handles new parsers not in existing",
existingYAML: `
- parser_function: parseGoMod
metadata_types: [GoModMetadata]
`,
newYAML: `
- parser_function: parseGoMod
metadata_types: [GoModMetadata]
- parser_function: parseGoSum
metadata_types: [GoSumMetadata]
`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var existingNode, newNode yaml.Node
err := yaml.Unmarshal([]byte(tt.existingYAML), &existingNode)
require.NoError(t, err)
err = yaml.Unmarshal([]byte(tt.newYAML), &newNode)
require.NoError(t, err)
// get sequence nodes
existingSeq := getSequenceNode(&existingNode)
newSeq := getSequenceNode(&newNode)
preserveSequenceNodeComments(existingSeq, newSeq)
// verify that the function ran without panicking
require.NotNil(t, newSeq)
})
}
}
func TestPreserveFieldComments(t *testing.T) {
tests := []struct {
name string
existingYAML string
newYAML string
wantPreserve bool
}{
{
name: "preserves mapping node comments",
existingYAML: `
name: test # AUTO-GENERATED
type: custom
`,
newYAML: `
name: test-new
type: custom
`,
wantPreserve: true,
},
{
name: "handles kind mismatch gracefully",
existingYAML: `
- item1
- item2
`,
newYAML: `
name: test
`,
wantPreserve: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var existingNode, newNode yaml.Node
err := yaml.Unmarshal([]byte(tt.existingYAML), &existingNode)
require.NoError(t, err)
err = yaml.Unmarshal([]byte(tt.newYAML), &newNode)
require.NoError(t, err)
existingContent := getContentNode(&existingNode)
newContent := getContentNode(&newNode)
preserveFieldComments(existingContent, newContent)
// verify the function completed without panicking
require.NotNil(t, newContent)
})
}
}
func TestUpdateOrAddSection(t *testing.T) {
tests := []struct {
name string
existingYAML string
newYAML string
sectionName string
wantUpdated bool
wantAdded bool
}{
{
name: "updates existing section",
existingYAML: `
configs:
old: value
catalogers:
- name: test
`,
newYAML: `
configs:
new: value
`,
sectionName: "configs",
wantUpdated: true,
},
{
name: "adds new section",
existingYAML: `
catalogers:
- name: test
`,
newYAML: `
configs:
new: value
`,
sectionName: "configs",
wantAdded: true,
},
{
name: "handles application section",
existingYAML: `
catalogers:
- name: test
`,
newYAML: `
application:
key: value
`,
sectionName: "application",
wantAdded: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var existingNode, newNode yaml.Node
err := yaml.Unmarshal([]byte(tt.existingYAML), &existingNode)
require.NoError(t, err)
err = yaml.Unmarshal([]byte(tt.newYAML), &newNode)
require.NoError(t, err)
existingMapping := getMappingNode(&existingNode)
newMapping := getMappingNode(&newNode)
updateOrAddSection(existingMapping, newMapping, tt.sectionName)
// verify the section exists in the result
resultSection := findSectionNode(existingMapping, tt.sectionName)
require.NotNil(t, resultSection, "section %s should exist after update", tt.sectionName)
})
}
}
func TestAddCatalogerFieldComment(t *testing.T) {
tests := []struct {
name string
fieldName string
fieldValue string
catalogerName string
wantLineComment string
}{
{
name: "ecosystem is MANUAL",
fieldName: "ecosystem",
catalogerName: "test-cataloger",
wantLineComment: "MANUAL",
},
{
name: "name is AUTO-GENERATED",
fieldName: "name",
catalogerName: "test-cataloger",
wantLineComment: autoGeneratedComment,
},
{
name: "type is AUTO-GENERATED",
fieldName: "type",
catalogerName: "test-cataloger",
wantLineComment: autoGeneratedComment,
},
{
name: "source is AUTO-GENERATED",
fieldName: "source",
catalogerName: "test-cataloger",
wantLineComment: autoGeneratedComment,
},
{
name: "config is AUTO-GENERATED",
fieldName: "config",
catalogerName: "test-cataloger",
wantLineComment: autoGeneratedComment,
},
{
name: "selectors is AUTO-GENERATED",
fieldName: "selectors",
catalogerName: "test-cataloger",
wantLineComment: autoGeneratedComment,
},
{
name: "parsers is AUTO-GENERATED structure",
fieldName: "parsers",
catalogerName: "test-cataloger",
wantLineComment: "AUTO-GENERATED structure",
},
{
name: "detectors for binary-classifier-cataloger is AUTO-GENERATED",
fieldName: "detectors",
catalogerName: "binary-classifier-cataloger",
wantLineComment: autoGeneratedComment,
},
{
name: "detectors for other catalogers is MANUAL",
fieldName: "detectors",
catalogerName: "java-archive-cataloger",
wantLineComment: "MANUAL - edit detectors here",
},
{
name: "metadata_types is AUTO-GENERATED",
fieldName: "metadata_types",
catalogerName: "test-cataloger",
wantLineComment: autoGeneratedComment,
},
{
name: "package_types is AUTO-GENERATED",
fieldName: "package_types",
catalogerName: "test-cataloger",
wantLineComment: autoGeneratedComment,
},
{
name: "json_schema_types is AUTO-GENERATED",
fieldName: "json_schema_types",
catalogerName: "test-cataloger",
wantLineComment: autoGeneratedComment,
},
{
name: "capabilities is MANUAL",
fieldName: "capabilities",
catalogerName: "test-cataloger",
wantLineComment: "MANUAL - edit capabilities here",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// create key and value nodes
keyNode := &yaml.Node{
Kind: yaml.ScalarNode,
Value: tt.fieldName,
}
valueNode := &yaml.Node{
Kind: yaml.ScalarNode,
Value: tt.fieldValue,
}
addCatalogerFieldComment(keyNode, valueNode, tt.catalogerName)
require.Equal(t, tt.wantLineComment, keyNode.LineComment)
})
}
}
// helper functions
func getMappingNode(node *yaml.Node) *yaml.Node {
if node.Kind == yaml.DocumentNode && len(node.Content) > 0 {
return node.Content[0]
}
return node
}
func getSequenceNode(node *yaml.Node) *yaml.Node {
if node.Kind == yaml.DocumentNode && len(node.Content) > 0 {
return node.Content[0]
}
return node
}
func getContentNode(node *yaml.Node) *yaml.Node {
if node.Kind == yaml.DocumentNode && len(node.Content) > 0 {
return node.Content[0]
}
return node
}
func findFieldNodes(mappingNode *yaml.Node, fieldName string) (*yaml.Node, *yaml.Node) {
if mappingNode.Kind != yaml.MappingNode {
return nil, nil
}
for i := 0; i < len(mappingNode.Content); i += 2 {
if mappingNode.Content[i].Value == fieldName {
return mappingNode.Content[i], mappingNode.Content[i+1]
}
}
return nil, nil
}

View File

@ -1,3 +1,4 @@
// this is the entry point for regenerating the packages.yaml file, which orchestrates discovery, merging, and validation of cataloger capabilities.
package main package main
import ( import (
@ -19,25 +20,6 @@ var (
dimStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("245")) // lighter grey (256-color) dimStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("245")) // lighter grey (256-color)
) )
func printSuccessASCII() {
fmt.Println()
fmt.Println(successStyle.Render("✓ All validations passed!") + " 🎉")
fmt.Println()
fmt.Println(successStyle.Render(" ░█▀▀░█░█░█▀▀░█▀▀░█▀▀░█▀▀░█▀▀"))
fmt.Println(successStyle.Render(" ░▀▀█░█░█░█░░░█░░░█▀▀░▀▀█░▀▀█"))
fmt.Println(successStyle.Render(" ░▀▀▀░▀▀▀░▀▀▀░▀▀▀░▀▀▀░▀▀▀░▀▀▀"))
fmt.Println()
}
func printFailureASCII() {
fmt.Println(errorStyle.Render("✗ Validation failed") + " 😢")
fmt.Println()
fmt.Println(errorStyle.Render(" ░█▀▀░█▀█░▀█▀░█░░░█▀▀░█▀▄"))
fmt.Println(errorStyle.Render(" ░█▀▀░█▀█░░█░░█░░░█▀▀░█░█"))
fmt.Println(errorStyle.Render(" ░▀░░░▀░▀░▀▀▀░▀▀▀░▀▀▀░▀▀░"))
fmt.Println()
}
func main() { func main() {
repoRoot, err := RepoRoot() repoRoot, err := RepoRoot()
if err != nil { if err != nil {
@ -147,3 +129,22 @@ func hasEmptyCapabilities(caps capabilities.CapabilitySet) bool {
// if someone filled out the capabilities section (even with all false/empty values), that's intentional // if someone filled out the capabilities section (even with all false/empty values), that's intentional
return len(caps) == 0 return len(caps) == 0
} }
func printSuccessASCII() {
fmt.Println()
fmt.Println(successStyle.Render("✓ All validations passed!") + " 🎉")
fmt.Println()
fmt.Println(successStyle.Render(" ░█▀▀░█░█░█▀▀░█▀▀░█▀▀░█▀▀░█▀▀"))
fmt.Println(successStyle.Render(" ░▀▀█░█░█░█░░░█░░░█▀▀░▀▀█░▀▀█"))
fmt.Println(successStyle.Render(" ░▀▀▀░▀▀▀░▀▀▀░▀▀▀░▀▀▀░▀▀▀░▀▀▀"))
fmt.Println()
}
func printFailureASCII() {
fmt.Println(errorStyle.Render("✗ Validation failed") + " 😢")
fmt.Println()
fmt.Println(errorStyle.Render(" ░█▀▀░█▀█░▀█▀░█░░░█▀▀░█▀▄"))
fmt.Println(errorStyle.Render(" ░█▀▀░█▀█░░█░░█░░░█▀▀░█░█"))
fmt.Println(errorStyle.Render(" ░▀░░░▀░▀░▀▀▀░▀▀▀░▀▀▀░▀▀░"))
fmt.Println()
}

View File

@ -1,3 +1,4 @@
// this file contains the core merging logic that combines discovered cataloger data with existing packages.yaml, preserving all manual sections while updating auto-generated fields.
package main package main
import ( import (
@ -49,6 +50,74 @@ var catalogerConfigOverrides = map[string]string{
"nix-store-cataloger": "nix.Config", "nix-store-cataloger": "nix.Config",
} }
// ecosystemMapping maps patterns in cataloger names to ecosystem names.
// order matters - more specific patterns should come first.
type ecosystemMapping struct {
patterns []string // patterns to match in the cataloger name
ecosystem string // ecosystem to return if any pattern matches
}
// ecosystemMappings defines the pattern-to-ecosystem mappings.
// note: order matters - check more specific patterns first
var ecosystemMappings = []ecosystemMapping{
// language-based ecosystems
{[]string{"rust", "cargo"}, "rust"},
{[]string{"javascript", "node", "npm"}, "javascript"},
{[]string{"python"}, "python"},
{[]string{"java", "graalvm"}, "java"},
{[]string{"go-module", "golang"}, "go"},
{[]string{"ruby", "gem"}, "ruby"},
{[]string{"php", "composer", "pear", "pecl"}, "php"},
{[]string{"dotnet", ".net", "csharp"}, "dotnet"},
{[]string{"swift", "cocoapods"}, "swift"},
{[]string{"dart", "pubspec"}, "dart"},
{[]string{"elixir", "mix"}, "elixir"},
{[]string{"erlang", "rebar"}, "erlang"},
{[]string{"haskell", "cabal", "stack"}, "haskell"},
{[]string{"lua"}, "lua"},
{[]string{"ocaml", "opam"}, "ocaml"},
{[]string{"r-package"}, "r"},
{[]string{"swipl", "prolog"}, "prolog"},
{[]string{"cpp", "conan"}, "c++"},
{[]string{"kotlin"}, "kotlin"},
// os/distro-based ecosystems
{[]string{"apk", "alpine"}, "alpine"},
{[]string{"dpkg", "deb", "debian"}, "debian"},
{[]string{"rpm", "redhat"}, "rpm"},
{[]string{"alpm", "arch"}, "arch"},
{[]string{"portage", "gentoo"}, "gentoo"},
{[]string{"homebrew"}, "homebrew"},
{[]string{"snap"}, "snap"},
// other ecosystems
{[]string{"binary", "elf", "pe-binary"}, "binary"},
{[]string{"conda"}, "conda"},
{[]string{"nix"}, "nix"},
{[]string{"kernel"}, "linux"},
{[]string{"bitnami"}, "bitnami"},
{[]string{"terraform"}, "terraform"},
{[]string{"github"}, "github-actions"},
{[]string{"wordpress"}, "wordpress"},
{[]string{"sbom"}, "sbom"},
}
// inferEcosystem attempts to determine the ecosystem from a cataloger name
func inferEcosystem(catalogerName string) string {
name := strings.ToLower(catalogerName)
for _, mapping := range ecosystemMappings {
for _, pattern := range mapping.patterns {
if strings.Contains(name, pattern) {
return mapping.ecosystem
}
}
}
// default
return "other"
}
// Statistics contains information about the regeneration process // Statistics contains information about the regeneration process
type Statistics struct { type Statistics struct {
TotalGenericCatalogers int TotalGenericCatalogers int
@ -813,71 +882,3 @@ func formatOrphans(orphans []orphanInfo) string {
} }
return strings.Join(lines, "\n") return strings.Join(lines, "\n")
} }
// ecosystemMapping maps patterns in cataloger names to ecosystem names.
// order matters - more specific patterns should come first.
type ecosystemMapping struct {
patterns []string // patterns to match in the cataloger name
ecosystem string // ecosystem to return if any pattern matches
}
// ecosystemMappings defines the pattern-to-ecosystem mappings.
// note: order matters - check more specific patterns first
var ecosystemMappings = []ecosystemMapping{
// language-based ecosystems
{[]string{"rust", "cargo"}, "rust"},
{[]string{"javascript", "node", "npm"}, "javascript"},
{[]string{"python"}, "python"},
{[]string{"java", "graalvm"}, "java"},
{[]string{"go-module", "golang"}, "go"},
{[]string{"ruby", "gem"}, "ruby"},
{[]string{"php", "composer", "pear", "pecl"}, "php"},
{[]string{"dotnet", ".net", "csharp"}, "dotnet"},
{[]string{"swift", "cocoapods"}, "swift"},
{[]string{"dart", "pubspec"}, "dart"},
{[]string{"elixir", "mix"}, "elixir"},
{[]string{"erlang", "rebar"}, "erlang"},
{[]string{"haskell", "cabal", "stack"}, "haskell"},
{[]string{"lua"}, "lua"},
{[]string{"ocaml", "opam"}, "ocaml"},
{[]string{"r-package"}, "r"},
{[]string{"swipl", "prolog"}, "prolog"},
{[]string{"cpp", "conan"}, "c++"},
{[]string{"kotlin"}, "kotlin"},
// os/distro-based ecosystems
{[]string{"apk", "alpine"}, "alpine"},
{[]string{"dpkg", "deb", "debian"}, "debian"},
{[]string{"rpm", "redhat"}, "rpm"},
{[]string{"alpm", "arch"}, "arch"},
{[]string{"portage", "gentoo"}, "gentoo"},
{[]string{"homebrew"}, "homebrew"},
{[]string{"snap"}, "snap"},
// other ecosystems
{[]string{"binary", "elf", "pe-binary"}, "binary"},
{[]string{"conda"}, "conda"},
{[]string{"nix"}, "nix"},
{[]string{"kernel"}, "linux"},
{[]string{"bitnami"}, "bitnami"},
{[]string{"terraform"}, "terraform"},
{[]string{"github"}, "github-actions"},
{[]string{"wordpress"}, "wordpress"},
{[]string{"sbom"}, "sbom"},
}
// inferEcosystem attempts to determine the ecosystem from a cataloger name
func inferEcosystem(catalogerName string) string {
name := strings.ToLower(catalogerName)
for _, mapping := range ecosystemMappings {
for _, pattern := range mapping.patterns {
if strings.Contains(name, pattern) {
return mapping.ecosystem
}
}
}
// default
return "other"
}

View File

@ -374,3 +374,153 @@ func TestCatalogerConfigFieldUpdatedForNewCatalogers(t *testing.T) {
}) })
} }
} }
func TestStripPURLVersion(t *testing.T) {
tests := []struct {
name string
input string
want string
}{
{
name: "purl with version",
input: "pkg:generic/python@1.0.0",
want: "pkg:generic/python",
},
{
name: "purl without version",
input: "pkg:generic/python",
want: "pkg:generic/python",
},
{
name: "purl with multiple @ signs",
input: "pkg:generic/py@thon@1.0.0",
want: "pkg:generic/py@thon",
},
{
name: "empty string",
input: "",
want: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := stripPURLVersion(tt.input)
require.Equal(t, tt.want, got)
})
}
}
func TestInferEcosystem(t *testing.T) {
tests := []struct {
name string
catalogerName string
want string
}{
{
name: "go module cataloger",
catalogerName: "go-module-binary-cataloger",
want: "go",
},
{
name: "python cataloger",
catalogerName: "python-package-cataloger",
want: "python",
},
{
name: "java archive cataloger",
catalogerName: "java-archive-cataloger",
want: "java",
},
{
name: "rust cargo cataloger",
catalogerName: "rust-cargo-lock-cataloger",
want: "rust",
},
{
name: "javascript npm cataloger",
catalogerName: "javascript-package-cataloger",
want: "javascript",
},
{
name: "ruby gem cataloger",
catalogerName: "ruby-gemspec-cataloger",
want: "ruby",
},
{
name: "debian dpkg cataloger",
catalogerName: "dpkg-db-cataloger",
want: "debian",
},
{
name: "alpine apk cataloger",
catalogerName: "apk-db-cataloger",
want: "alpine",
},
{
name: "linux kernel cataloger",
catalogerName: "linux-kernel-cataloger",
want: "linux",
},
{
name: "binary classifier cataloger",
catalogerName: "binary-classifier-cataloger",
want: "binary",
},
{
name: "github actions cataloger",
catalogerName: "github-actions-usage-cataloger",
want: "github-actions",
},
{
name: "unknown cataloger defaults to other",
catalogerName: "unknown-custom-cataloger",
want: "other",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := inferEcosystem(tt.catalogerName)
require.Equal(t, tt.want, got)
})
}
}
func TestConvertToJSONSchemaTypesFromMetadata(t *testing.T) {
tests := []struct {
name string
metadataTypes []string
want []string
}{
{
name: "empty slice returns nil",
metadataTypes: []string{},
want: nil,
},
{
name: "nil slice returns nil",
metadataTypes: nil,
want: nil,
},
{
name: "single metadata type",
metadataTypes: []string{"pkg.AlpmDBEntry"},
want: []string{"AlpmDbEntry"},
},
{
name: "multiple metadata types",
metadataTypes: []string{"pkg.ApkDBEntry", "pkg.BinarySignature"},
want: []string{"ApkDbEntry", "BinarySignature"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := convertToJSONSchemaTypesFromMetadata(tt.metadataTypes)
if diff := cmp.Diff(tt.want, got); diff != "" {
t.Errorf("convertToJSONSchemaTypesFromMetadata() mismatch (-want +got):\n%s", diff)
}
})
}
}

View File

@ -1,3 +1,4 @@
// this file validates that all known metadata and package types are documented in packages.yaml by checking coverage and reporting any missing types.
package main package main
import ( import (

View File

@ -0,0 +1,21 @@
package python
import (
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
const catalogerName = "python-package-cataloger"
type CatalogerConfig struct {
Setting string
}
func NewPythonCataloger(cfg CatalogerConfig) pkg.Cataloger {
return generic.NewCataloger(catalogerName).
WithParserByGlobs(parse, "**/*.py")
}
func parse(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) {
return nil, nil, nil
}

View File

@ -0,0 +1,19 @@
package duplicate
import (
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
type Config1 struct {
Option1 bool
}
func NewDuplicateCataloger1(cfg Config1) pkg.Cataloger {
return generic.NewCataloger("duplicate-cataloger").
WithParserByGlobs(parse1, "**/*.txt")
}
func parse1(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) {
return nil, nil, nil
}

View File

@ -0,0 +1,19 @@
package duplicate
import (
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
type Config2 struct {
Option2 string
}
func NewDuplicateCataloger2(cfg Config2) pkg.Cataloger {
return generic.NewCataloger("duplicate-cataloger").
WithParserByGlobs(parse2, "**/*.json")
}
func parse2(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) {
return nil, nil, nil
}

View File

@ -0,0 +1,9 @@
package dotnet
import (
"github.com/anchore/syft/syft/pkg"
)
func NewDotnetCataloger(cfg CatalogerConfig) pkg.Cataloger {
return dotnetCataloger{cfg: cfg}
}

View File

@ -0,0 +1,23 @@
package dotnet
import (
"github.com/anchore/syft/syft/pkg"
)
const catalogerName = "dotnet-cataloger"
type CatalogerConfig struct {
Option bool
}
type dotnetCataloger struct {
cfg CatalogerConfig
}
func (d dotnetCataloger) Name() string {
return catalogerName
}
func (d dotnetCataloger) Catalog(resolver any) ([]pkg.Package, []pkg.Relationship, error) {
return nil, nil, nil
}

View File

@ -0,0 +1,27 @@
package java
import (
"github.com/anchore/syft/syft/pkg"
)
const pomCatalogerName = "java-pom-cataloger"
type ArchiveCatalogerConfig struct {
IncludeArchives bool
}
type pomXMLCataloger struct {
cfg ArchiveCatalogerConfig
}
func (p pomXMLCataloger) Name() string {
return pomCatalogerName
}
func (p pomXMLCataloger) Catalog(resolver any) ([]pkg.Package, []pkg.Relationship, error) {
return nil, nil, nil
}
func NewPomCataloger(cfg ArchiveCatalogerConfig) pkg.Cataloger {
return pomXMLCataloger{cfg: cfg}
}

View File

@ -0,0 +1,15 @@
package kernel
import (
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
func NewLinuxKernelCataloger(cfg LinuxKernelCatalogerConfig) pkg.Cataloger {
return generic.NewCataloger("linux-kernel-cataloger").
WithParserByGlobs(parse, "**/vmlinuz")
}
func parse(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) {
return nil, nil, nil
}

View File

@ -0,0 +1,5 @@
package kernel
type LinuxKernelCatalogerConfig struct {
KernelVersion string
}

View File

@ -0,0 +1,19 @@
package ruby
import (
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
type Config struct {
Setting bool
}
func NewRubyCataloger(opts Config) pkg.Cataloger {
return generic.NewCataloger("ruby-cataloger").
WithParserByGlobs(parse, "**/Gemfile")
}
func parse(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) {
return nil, nil, nil
}

View File

@ -0,0 +1,15 @@
package javascript
import (
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
func NewJavaScriptCataloger() pkg.Cataloger {
return generic.NewCataloger("javascript-cataloger").
WithParserByGlobs(parse, "**/*.js")
}
func parse(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) {
return nil, nil, nil
}

View File

@ -0,0 +1,17 @@
package binary
import (
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
type Parser struct{}
func NewBinaryCataloger(parser Parser) pkg.Cataloger {
return generic.NewCataloger("binary-cataloger").
WithParserByGlobs(parse, "**/*")
}
func parse(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) {
return nil, nil, nil
}

View File

@ -0,0 +1,16 @@
package rust
import (
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/test/cargo"
)
func NewRustCataloger(cfg cargo.CatalogerConfig) pkg.Cataloger {
return generic.NewCataloger("rust-cataloger").
WithParserByGlobs(parse, "**/Cargo.toml")
}
func parse(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) {
return nil, nil, nil
}

View File

@ -0,0 +1,19 @@
package golang
import (
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
type CatalogerConfig struct {
SomeOption bool
}
func NewGoModuleCataloger(cfg CatalogerConfig) pkg.Cataloger {
return generic.NewCataloger("go-module-cataloger").
WithParserByGlobs(parseGoMod, "**/go.mod")
}
func parseGoMod(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) {
return nil, nil, nil
}

View File

@ -218,7 +218,6 @@ application: # AUTO-GENERATED - application-level config keys
description: enables Syft to use the network to fill in more detailed license information description: enables Syft to use the network to fill in more detailed license information
- key: linux-kernel.catalog-modules - key: linux-kernel.catalog-modules
description: whether to catalog linux kernel modules found within lib/modules/** directories description: whether to catalog linux kernel modules found within lib/modules/** directories
default: true
- key: nix.capture-owned-files - key: nix.capture-owned-files
description: enumerate all files owned by packages found within Nix store paths description: enumerate all files owned by packages found within Nix store paths
- key: python.guess-unpinned-requirements - key: python.guess-unpinned-requirements