diff --git a/DEVELOPING.md b/DEVELOPING.md
index 8fd5b57f3..4ce1534dd 100644
--- a/DEVELOPING.md
+++ b/DEVELOPING.md
@@ -215,6 +215,24 @@ Interested in building a new cataloger? Checkout the [list of issues with the `n
If you have questions about implementing a cataloger feel free to file an issue or reach out to us [on discourse](https://anchore.com/discourse)!
+#### Documenting Cataloger Capabilities
+
+When adding a new cataloger or changing the capabilities of an existing one, you'll need to document its capabilities in `internal/capabilities/packages.yaml`. This includes:
+- What metadata types it produces
+- What package types it catalogs
+- What dependency information it provides (depth, edges, kinds)
+- Whether it extracts license information
+- How configuration affects its behavior
+
+After implementing your cataloger:
+
+1. **Write tests using the `pkgtest` helpers** - this automatically generates test observations that feed into capability documentation
+2. **Run `make generate-capabilities`** - this regenerates the `packages.yaml` file and validates your changes
+3. **Manually edit capabilities** - add the `ecosystem` field and detailed `capabilities` sections in `packages.yaml`
+
+For detailed information about the capability documentation system, see [`internal/capabilities/generate/README.md`](internal/capabilities/generate/README.md).
+
+
#### Searching for files
All catalogers are provided an instance of the [`file.Resolver`](https://github.com/anchore/syft/blob/v0.70.0/syft/source/file_resolver.go#L8) to interface with the image and search for files. The implementations for these
diff --git a/Taskfile.yaml b/Taskfile.yaml
index a4a1201b4..e2166b953 100644
--- a/Taskfile.yaml
+++ b/Taskfile.yaml
@@ -521,6 +521,8 @@ tasks:
generate-capabilities:
desc: Generate the capabilities data file
cmds:
+ # this is required to update test observations; such evidence is used to update the packages.yaml
+ - "go test ./syft/pkg/..."
- "go generate ./internal/capabilities/..."
- "gofmt -s -w ./internal/capabilities"
diff --git a/cmd/syft/internal/options/catalog.go b/cmd/syft/internal/options/catalog.go
index 652d6a40d..342733d5c 100644
--- a/cmd/syft/internal/options/catalog.go
+++ b/cmd/syft/internal/options/catalog.go
@@ -77,6 +77,8 @@ func DefaultCatalog() Catalog {
Package: defaultPackageConfig(),
License: defaultLicenseConfig(),
LinuxKernel: defaultLinuxKernelConfig(),
+ JavaScript: defaultJavaScriptConfig(),
+ Python: defaultPythonConfig(),
Nix: defaultNixConfig(),
Dotnet: defaultDotnetConfig(),
Golang: defaultGolangConfig(),
diff --git a/cmd/syft/internal/options/javascript.go b/cmd/syft/internal/options/javascript.go
index 982bffa29..73019dc91 100644
--- a/cmd/syft/internal/options/javascript.go
+++ b/cmd/syft/internal/options/javascript.go
@@ -1,6 +1,9 @@
package options
-import "github.com/anchore/clio"
+import (
+ "github.com/anchore/clio"
+ "github.com/anchore/syft/syft/pkg/cataloger/javascript"
+)
type javaScriptConfig struct {
SearchRemoteLicenses *bool `json:"search-remote-licenses" yaml:"search-remote-licenses" mapstructure:"search-remote-licenses"`
@@ -12,6 +15,24 @@ var _ interface {
clio.FieldDescriber
} = (*javaScriptConfig)(nil)
+func defaultJavaScriptConfig() javaScriptConfig {
+ def := javascript.DefaultCatalogerConfig()
+ var includeDevDependencies *bool
+ if def.IncludeDevDependencies {
+ includeDevDependencies = &def.IncludeDevDependencies
+ }
+
+ var searchRemoteLicenses *bool
+ if def.SearchRemoteLicenses {
+ searchRemoteLicenses = &def.SearchRemoteLicenses
+ }
+ return javaScriptConfig{
+ NpmBaseURL: def.NPMBaseURL,
+ SearchRemoteLicenses: searchRemoteLicenses,
+ IncludeDevDependencies: includeDevDependencies,
+ }
+}
+
func (o *javaScriptConfig) DescribeFields(descriptions clio.FieldDescriptionSet) {
descriptions.Add(&o.SearchRemoteLicenses, `enables Syft to use the network to fill in more detailed license information`)
descriptions.Add(&o.NpmBaseURL, `base NPM url to use`)
diff --git a/cmd/syft/internal/options/linux_kernel.go b/cmd/syft/internal/options/linux_kernel.go
index 03f24dbf0..fab464aa6 100644
--- a/cmd/syft/internal/options/linux_kernel.go
+++ b/cmd/syft/internal/options/linux_kernel.go
@@ -1,14 +1,18 @@
package options
-import "github.com/anchore/clio"
+import (
+ "github.com/anchore/clio"
+ "github.com/anchore/syft/syft/pkg/cataloger/kernel"
+)
type linuxKernelConfig struct {
CatalogModules bool `json:"catalog-modules" yaml:"catalog-modules" mapstructure:"catalog-modules"`
}
func defaultLinuxKernelConfig() linuxKernelConfig {
+ def := kernel.DefaultLinuxKernelCatalogerConfig()
return linuxKernelConfig{
- CatalogModules: true,
+ CatalogModules: def.CatalogModules,
}
}
diff --git a/cmd/syft/internal/options/python.go b/cmd/syft/internal/options/python.go
index c645cbfcd..f18174ced 100644
--- a/cmd/syft/internal/options/python.go
+++ b/cmd/syft/internal/options/python.go
@@ -1,6 +1,9 @@
package options
-import "github.com/anchore/clio"
+import (
+ "github.com/anchore/clio"
+ "github.com/anchore/syft/syft/pkg/cataloger/python"
+)
type pythonConfig struct {
GuessUnpinnedRequirements bool `json:"guess-unpinned-requirements" yaml:"guess-unpinned-requirements" mapstructure:"guess-unpinned-requirements"`
@@ -10,6 +13,13 @@ var _ interface {
clio.FieldDescriber
} = (*pythonConfig)(nil)
+func defaultPythonConfig() pythonConfig {
+ def := python.DefaultCatalogerConfig()
+ return pythonConfig{
+ GuessUnpinnedRequirements: def.GuessUnpinnedRequirements,
+ }
+}
+
func (o *pythonConfig) DescribeFields(descriptions clio.FieldDescriptionSet) {
descriptions.Add(&o.GuessUnpinnedRequirements, `when running across entries in requirements.txt that do not specify a specific version
(e.g. "sqlalchemy >= 1.0.0, <= 2.0.0, != 3.0.0, <= 3.0.0"), attempt to guess what the version could
diff --git a/internal/capabilities/capabilities.go b/internal/capabilities/capabilities.go
index 7d1842070..9669e6536 100644
--- a/internal/capabilities/capabilities.go
+++ b/internal/capabilities/capabilities.go
@@ -1,3 +1,4 @@
+// Package capabilities provides discovery and tracking of cataloger capabilities.
package capabilities
import (
@@ -11,6 +12,8 @@ import (
"github.com/anchore/syft/internal/task"
)
+//go:generate go run ./generate
+
//go:embed packages.yaml
var catalogersYAML []byte
diff --git a/internal/capabilities/doc.go b/internal/capabilities/doc.go
deleted file mode 100644
index 289d9e655..000000000
--- a/internal/capabilities/doc.go
+++ /dev/null
@@ -1,11 +0,0 @@
-// Package capabilities provides discovery and tracking of cataloger capabilities.
-//
-// Run 'go generate' in this directory to discover catalogers from source code and update
-// the packages.yaml file with newly discovered generic catalogers.
-//
-// The packages.yaml file is the source of truth for cataloger capabilities. It contains
-// both auto-generated metadata (cataloger names, parser functions, glob patterns) and
-// manually-edited capability descriptions (what each cataloger can discover).
-package capabilities
-
-//go:generate go run ./generate
diff --git a/internal/capabilities/generate/README.md b/internal/capabilities/generate/README.md
new file mode 100644
index 000000000..57f85b4b5
--- /dev/null
+++ b/internal/capabilities/generate/README.md
@@ -0,0 +1,1382 @@
+# Capabilities Generation System
+
+This internal tool is responsible for:
+- partially generating the `packages.yaml` file, which documents what capabilities each cataloger in syft has
+- running completeness / consistency tests of the claims from `packages.yaml` against actual test observation
+
+Syft has dozens of catalogers across many ecosystems. Each cataloger has different capabilities, such as:
+- Some provide license information, others don't
+- Some detect transitive dependencies, others only direct
+- Some capabilities depend on configuration
+
+The `packages.yaml` contains all of these capability claims.
+
+The `capabilities` generation system itself:
+1. **Discovers** cataloger information from source code using AST parsing
+2. **Extracts** metadata about parsers, detectors, and configuration from code and tests
+3. **Merges** discovered information with manually-maintained capability documentation
+4. **Validates** that the generated document is complete and in sync with the codebase
+
+**Why do this?**
+The short answer is to provide a foundation for the OSS documentation, where the source of truth for facts about the capabilities of Syft can be derived from verifiable claims from the tool itself.
+
+
+## Quick Start
+
+**To regenerate packages.yaml after code changes:**
+```bash
+go generate ./internal/capabilities
+```
+
+**To run validation of capability claims:**
+```bash
+# update test evidence
+go test ./syft/pkg/...
+
+# check claims against test evidence
+go test ./internal/capabilities/generate
+```
+
+## Data Flow
+
+```mermaid
+graph TB
+ subgraph "Source Code Inputs"
+ A1[syft/pkg/cataloger/*/
cataloger.go]
+ A2[syft/pkg/cataloger/*/
config.go]
+ A3[cmd/syft/internal/options/
catalog.go, ecosystem.go]
+ A4[syft task factories
AllCatalogers]
+ end
+
+ subgraph "Test Inputs"
+ B1[test-fixtures/
test-observations.json]
+ end
+
+ subgraph "Discovery Processes"
+ C1[discover_catalogers.go
AST Parse Catalogers]
+ C2[discover_cataloger_configs.go
AST Parse Configs]
+ C3[discover_app_config.go
AST Parse App Configs]
+ C4[discover_metadata.go
Read Observations]
+ C5[cataloger_config_linking.go
Link Catalogers to Configs]
+ C6[cataloger_names.go
Query Task Factories]
+ end
+
+ subgraph "Discovered Data"
+ D1[Generic Catalogers
name, parsers, detectors]
+ D2[Config Structs
fields, app-config keys]
+ D3[App Config Fields
keys, descriptions, defaults]
+ D4[Metadata Types
per parser/cataloger]
+ D5[Package Types
per parser/cataloger]
+ D6[Cataloger-Config Links
mapping]
+ D7[Selectors
tags per cataloger]
+ end
+
+ subgraph "Configuration/Overrides"
+ E1[catalogerTypeOverrides
catalogerConfigOverrides
catalogerConfigExceptions]
+ E2[metadataTypeCoverageExceptions
packageTypeCoverageExceptions
observationExceptions]
+ end
+
+ subgraph "Merge Process"
+ F1[io.go
Load Existing YAML]
+ F2[merge.go
Merge Logic]
+ F3[Preserve MANUAL fields
Update AUTO-GENERATED]
+ end
+
+ subgraph "Output"
+ G1[packages.yaml
Complete Catalog Document]
+ end
+
+ subgraph "Validation"
+ H1[completeness_test.go
Comprehensive Tests]
+ H2[metadata_check.go
Type Coverage]
+ end
+
+ A1 --> C1
+ A2 --> C2
+ A3 --> C3
+ A4 --> C6
+ B1 --> C4
+
+ C1 --> D1
+ C2 --> D2
+ C3 --> D3
+ C4 --> D4
+ C4 --> D5
+ C5 --> D6
+ C6 --> D7
+
+ D1 --> F2
+ D2 --> F2
+ D3 --> F2
+ D4 --> F2
+ D5 --> F2
+ D6 --> F2
+ D7 --> F2
+
+ E1 -.configure.-> F2
+ E2 -.configure.-> H1
+
+ F1 --> F3
+ F2 --> F3
+ F3 --> G1
+
+ G1 --> H1
+ G1 --> H2
+
+ style D1 fill:#e1f5ff
+ style D2 fill:#e1f5ff
+ style D3 fill:#e1f5ff
+ style D4 fill:#e1f5ff
+ style D5 fill:#e1f5ff
+ style D6 fill:#e1f5ff
+ style D7 fill:#e1f5ff
+ style G1 fill:#c8e6c9
+ style E1 fill:#fff9c4
+ style E2 fill:#fff9c4
+```
+
+### Key Data Flows
+
+1. **Cataloger Discovery**: AST parser walks `syft/pkg/cataloger/` to find `generic.NewCataloger()` calls and extract parser information
+2. **Config Discovery**: AST parser finds config structs and extracts fields with `// app-config:` annotations
+3. **App Config Discovery**: AST parser extracts ecosystem configurations from options package, including descriptions and defaults
+4. **Metadata Discovery**: JSON reader loads test observations that record what metadata/package types each parser produces
+5. **Linking**: AST analyzer connects catalogers to their config structs by examining constructor parameters
+6. **Merge**: Discovered data combines with existing YAML, preserving all manually-maintained capability sections
+7. **Validation**: Comprehensive tests ensure the output is complete and synchronized with codebase
+
+## The `packages.yaml` File
+
+### Purpose
+
+`internal/capabilities/packages.yaml` is the canonical documentation of:
+- Every cataloger in syft
+- What files/patterns each cataloger detects
+- What metadata and package types each cataloger produces
+- What capabilities each cataloger has (licenses, dependencies, etc.)
+- How configuration affects these capabilities
+
+### Structure
+
+```yaml
+# File header with usage instructions (AUTO-GENERATED)
+
+application: # AUTO-GENERATED
+ # Application-level config keys with descriptions
+ - key: golang.search-local-mod-cache-licenses
+ description: search for go package licences in the GOPATH...
+ default_value: false
+
+configs: # AUTO-GENERATED
+ # Config struct definitions
+ golang.CatalogerConfig:
+ fields:
+ - key: SearchLocalModCacheLicenses
+ description: SearchLocalModCacheLicenses enables...
+ app_key: golang.search-local-mod-cache-licenses
+
+catalogers: # Mixed AUTO-GENERATED structure, MANUAL capabilities
+ - ecosystem: golang # MANUAL
+ name: go-module-cataloger # AUTO-GENERATED
+ type: generic # AUTO-GENERATED
+ source: # AUTO-GENERATED
+ file: syft/pkg/cataloger/golang/cataloger.go
+ function: NewGoModuleBinaryCataloger
+ config: golang.CatalogerConfig # AUTO-GENERATED
+ selectors: [go, golang, ...] # AUTO-GENERATED
+ parsers: # AUTO-GENERATED structure
+ - function: parseGoMod # AUTO-GENERATED
+ detector: # AUTO-GENERATED
+ method: glob
+ criteria: ["**/go.mod"]
+ metadata_types: # AUTO-GENERATED
+ - pkg.GolangModuleEntry
+ package_types: # AUTO-GENERATED
+ - go-module
+ json_schema_types: # AUTO-GENERATED
+ - GolangModEntry
+ capabilities: # MANUAL - preserved across regeneration
+ - name: license
+ default: false
+ conditions:
+ - when: {SearchRemoteLicenses: true}
+ value: true
+ comment: fetches licenses from proxy.golang.org
+ - name: dependency.depth
+ default: [direct, indirect]
+ - name: dependency.edges
+ default: complete
+```
+
+### AUTO-GENERATED vs MANUAL Fields
+
+#### AUTO-GENERATED Fields
+These are updated on every regeneration:
+
+**Cataloger Level:**
+- `name` - cataloger identifier
+- `type` - "generic" or "custom"
+- `source.file` - source file path
+- `source.function` - constructor function name
+- `config` - linked config struct name
+- `selectors` - tags from task factories
+
+**Parser Level (generic catalogers):**
+- `function` - parser function name (as used in the generic cataloger)
+- `detector.method` - glob/path/mimetype
+- `detector.criteria` - patterns matched
+- `metadata_types` - from test-observations.json
+- `package_types` - from test-observations.json
+- `json_schema_types` - converted from metadata_types
+
+**Custom Cataloger Level:**
+- `metadata_types` - from test-observations.json
+- `package_types` - from test-observations.json
+- `json_schema_types` - converted from metadata_types
+
+**Sections:**
+- Entire `application:` section: a flat mapping of the application config keys relevant to catalogers
+- Entire `configs:` section: a flat mapping of the API-level cataloger config keys, for each cataloger (map of maps)
+
+#### MANUAL Fields
+These are preserved across regeneration and must be edited by hand:
+
+- `ecosystem` - ecosystem/language identifier (cataloger level)
+- `capabilities` - capability definitions with conditions
+- `detectors` - for custom catalogers (except binary-classifier-cataloger)
+- `conditions` on detectors - when detector is active based on config
+
+### How Regeneration Works
+
+When you run `go generate ./internal/capabilities`:
+
+1. **Loads existing YAML** into both a struct (for logic) and a node tree (for comment preservation)
+2. **Discovers all cataloger data** from source code and tests
+3. **Merges** discovered data with existing:
+ - Updates AUTO-GENERATED fields
+ - **Preserves** all MANUAL fields (capabilities, ecosystem, etc.)
+ - Adds annotations (`# AUTO-GENERATED`, `# MANUAL`) to field comments
+4. **Writes back** using the node tree to preserve all comments
+5. **Validates** the result with completeness tests
+
+> [!NOTE]
+> Don't forget to update test observation evidence with `go test ./syft/pkg/...` before regeneration.
+
+## Generation Process
+
+### High-Level Workflow
+
+```
+1. Discovery Phase
+ ├─ Parse cataloger source code (AST)
+ ├─ Find all parsers and detectors
+ ├─ Read test observations for metadata types
+ ├─ Discover config structs and fields
+ ├─ Discover app-level configurations
+ └─ Link catalogers to their configs
+
+2. Merge Phase
+ ├─ Load existing packages.yaml
+ ├─ Process each cataloger:
+ │ ├─ Update AUTO-GENERATED fields
+ │ └─ Preserve MANUAL fields
+ ├─ Add new catalogers
+ └─ Detect orphaned entries
+
+3. Write Phase
+ ├─ Update YAML node tree in-place
+ ├─ Add field annotations
+ └─ Write to disk
+
+4. Validation Phase
+ ├─ Check all catalogers present
+ ├─ Check metadata/package type coverage
+ └─ Run completeness tests
+```
+
+### Detailed Discovery Processes
+
+#### 1. Generic Cataloger Discovery (`discover_catalogers.go`)
+
+**What it finds:** catalogers using the `generic.NewCataloger()` pattern
+
+**Process:**
+1. Walk `syft/pkg/cataloger/` recursively for `.go` files
+2. Parse each file with Go AST parser (`go/ast`, `go/parser`)
+3. Find functions matching pattern: `New*Cataloger() pkg.Cataloger`
+4. Within function body, find `generic.NewCataloger(name, ...)` call
+5. Extract cataloger name from first argument
+6. Find all chained `WithParserBy*()` calls:
+ ```go
+ generic.NewCataloger("my-cataloger").
+ WithParserByGlobs(parseMyFormat, "**/*.myformat").
+ WithParserByMimeTypes(parseMyBinary, "application/x-mytype")
+ ```
+7. For each parser call:
+ - Extract parser function name (e.g., `parseMyFormat`)
+ - Extract detection method (Globs/Path/MimeTypes)
+ - Extract criteria (patterns or mime types)
+ - Resolve constant references across files if needed
+
+**Output:** `map[string]DiscoveredCataloger` with full parser information
+
+#### 2. Config Discovery (`discover_cataloger_configs.go`)
+
+**What it finds:** cataloger configuration structs
+
+**Process:**
+1. Find all `.go` files in `syft/pkg/cataloger/*/`
+2. Look for structs with "Config" in their name
+3. For each config struct:
+ - Extract struct fields
+ - Look for `// app-config: key.name` annotations in field comments
+ - Extract field descriptions from doc comments
+4. Filter results by whitelist (only configs referenced in `pkgcataloging.Config`)
+
+**Example source:**
+```go
+type CatalogerConfig struct {
+ // SearchLocalModCacheLicenses enables searching for go package licenses
+ // in the local GOPATH mod cache.
+ // app-config: golang.search-local-mod-cache-licenses
+ SearchLocalModCacheLicenses bool
+}
+```
+
+**Output:** `map[string]ConfigInfo` with field details and app-config keys
+
+#### 3. App Config Discovery (`discover_app_config.go`)
+
+**What it finds:** application-level configuration from the options package
+
+**Process:**
+1. Parse `cmd/syft/internal/options/catalog.go` to find `Catalog` struct
+2. Extract ecosystem config fields (e.g., `Golang golangConfig`)
+3. For each ecosystem:
+ - Find the config file (e.g., `golang.go`)
+ - Parse the config struct
+ - Find `DescribeFields() []FieldDescription` method
+ - Extract field descriptions from the returned descriptions
+ - Find `default*Config()` function and extract default values
+4. Build full key paths (e.g., `golang.search-local-mod-cache-licenses`)
+
+**Example source:**
+```go
+// golang.go
+type golangConfig struct {
+ SearchLocalModCacheLicenses bool `yaml:"search-local-mod-cache-licenses" ...`
+}
+
+func (c golangConfig) DescribeFields(opts ...options.DescribeFieldsOption) []options.FieldDescription {
+ return []options.FieldDescription{
+ {
+ Name: "search-local-mod-cache-licenses",
+ Description: "search for go package licences in the GOPATH...",
+ },
+ }
+}
+```
+
+**Output:** `[]AppConfigField` with keys, descriptions, and defaults
+
+#### 4. Cataloger-Config Linking (`cataloger_config_linking.go`)
+
+**What it finds:** which config struct each cataloger uses
+
+**Process:**
+1. For each discovered cataloger, find its constructor function
+2. Extract the first parameter type from the function signature
+3. Filter for types that look like configs (contain "Config")
+4. Build mapping: cataloger name → config struct name
+5. Apply manual overrides from `catalogerConfigOverrides` map
+6. Apply exceptions from `catalogerConfigExceptions` set
+
+**Example:**
+```go
+// Constructor signature:
+func NewGoModuleBinaryCataloger(cfg golang.CatalogerConfig) pkg.Cataloger
+
+// Results in link:
+"go-module-binary-cataloger" → "golang.CatalogerConfig"
+```
+
+**Output:** `map[string]string` (cataloger → config mapping)
+
+#### 5. Metadata Discovery (`discover_metadata.go`)
+
+**What it finds:** metadata types and package types each parser produces
+
+**Process:**
+1. Find all `test-fixtures/test-observations.json` files
+2. Parse JSON which contains:
+ ```json
+ {
+ "package": "golang",
+ "parsers": {
+ "parseGoMod": {
+ "metadata_types": ["pkg.GolangModuleEntry"],
+ "package_types": ["go-module"]
+ }
+ },
+ "catalogers": {
+ "linux-kernel-cataloger": {
+ "metadata_types": ["pkg.LinuxKernel"],
+ "package_types": ["linux-kernel"]
+ }
+ }
+ }
+ ```
+3. Build index by package name and parser function
+4. Apply to discovered catalogers:
+ - Parser-level observations → attached to specific parsers
+ - Cataloger-level observations → for custom catalogers
+5. Convert metadata types to JSON schema types using `packagemetadata` registry
+
+**Why this exists:** the AST parser can't determine what types a parser produces just by reading code. This information comes from test execution.
+
+**Output:** populated `MetadataTypes` and `PackageTypes` on catalogers/parsers
+
+## Input Sources
+
+### 1. Source Code Inputs
+
+#### Cataloger Constructors (`syft/pkg/cataloger/*/cataloger.go`)
+
+**What's extracted:**
+- Cataloger names
+- Parser function names
+- Detection methods (glob, path, mimetype)
+- Detection criteria (patterns)
+
+**Example:**
+```go
+func NewGoModuleBinaryCataloger() pkg.Cataloger {
+ return generic.NewCataloger("go-module-binary-cataloger").
+ WithParserByGlobs(parseGoBin, "**/go.mod").
+ WithParserByMimeTypes(parseGoArchive, "application/x-archive")
+}
+```
+
+#### Config Structs (`syft/pkg/cataloger/*/config.go`)
+
+**What's extracted:**
+- Config struct fields
+- Field types
+- Field descriptions from comments
+- App-config key mappings from annotations
+
+**Example:**
+```go
+type CatalogerConfig struct {
+ // SearchRemoteLicenses enables downloading go package licenses from the upstream
+ // go proxy (typically proxy.golang.org).
+ // app-config: golang.search-remote-licenses
+ SearchRemoteLicenses bool
+
+ // LocalModCacheDir specifies the location of the local go module cache directory.
+ // When not set, syft will attempt to discover the GOPATH env or default to $HOME/go.
+ // app-config: golang.local-mod-cache-dir
+ LocalModCacheDir string
+}
+```
+
+#### Options Package (`cmd/syft/internal/options/`)
+
+**What's extracted:**
+- Ecosystem config structs
+- App-level configuration keys
+- Field descriptions from `DescribeFields()` methods
+- Default values from `default*Config()` functions
+
+**Example:**
+```go
+// catalog.go
+type Catalog struct {
+ Golang golangConfig `yaml:"golang" json:"golang" mapstructure:"golang"`
+}
+
+// golang.go
+func (c golangConfig) DescribeFields(opts ...options.DescribeFieldsOption) []options.FieldDescription {
+ return []options.FieldDescription{
+ {
+ Name: "search-remote-licenses",
+ Description: "search for go package licences by retrieving the package from a network proxy",
+ },
+ }
+}
+```
+
+### 2. Test-Driven Inputs
+
+#### test-observations.json Files
+
+**Location:** `syft/pkg/cataloger/*/test-fixtures/test-observations.json`
+
+**Purpose:** records what metadata and package types each parser produces during test execution
+
+**How they're generated:** automatically by the `pkgtest.CatalogTester` helpers when tests run
+
+**Example test code:**
+```go
+func TestGoModuleCataloger(t *testing.T) {
+ tester := NewGoModuleBinaryCataloger()
+
+ pkgtest.NewCatalogTester().
+ FromDirectory(t, "test-fixtures/go-module-fixture").
+ TestCataloger(t, tester) // Auto-writes observations on first run
+}
+```
+
+**Example observations file:**
+```json
+{
+ "package": "golang",
+ "parsers": {
+ "parseGoMod": {
+ "metadata_types": ["pkg.GolangModuleEntry"],
+ "package_types": ["go-module"]
+ },
+ "parseGoSum": {
+ "metadata_types": ["pkg.GolangModuleEntry"],
+ "package_types": ["go-module"]
+ }
+ }
+}
+```
+
+**Why this exists:**
+- Metadata types can't be determined from AST parsing alone
+- Ensures tests use the pkgtest helpers (enforced by `TestAllCatalogers HaveObservations`)
+- Provides test coverage visibility
+
+### 3. Syft Runtime Inputs
+
+#### Task Factories (`allPackageCatalogerInfo()`)
+
+**What's extracted:**
+- Canonical list of all catalogers (ensures sync with binary)
+- Selectors (tags) for each cataloger
+
+**Example:**
+```go
+info := cataloger.CatalogerInfo{
+ Name: "go-module-binary-cataloger",
+ Selectors: []string{"go", "golang", "binary", "language", "package"},
+}
+```
+
+### 4. Global Configuration Variables
+
+#### Merge Logic Overrides (`merge.go`)
+
+```go
+// catalogerTypeOverrides forces a specific cataloger type when discovery gets it wrong
+var catalogerTypeOverrides = map[string]string{
+ "java-archive-cataloger": "custom", // technically generic but treated as custom
+}
+
+// catalogerConfigExceptions lists catalogers that should NOT have config linked
+var catalogerConfigExceptions = strset.New(
+ "binary-classifier-cataloger",
+)
+
+// catalogerConfigOverrides manually specifies config when linking fails
+var catalogerConfigOverrides = map[string]string{
+ "dotnet-portable-executable-cataloger": "dotnet.CatalogerConfig",
+ "nix-store-cataloger": "nix.Config",
+}
+```
+
+**When to update:**
+- Add to `catalogerTypeOverrides` when a cataloger's type is misdetected
+- Add to `catalogerConfigExceptions` when a cataloger shouldn't have config
+- Add to `catalogerConfigOverrides` when automatic config linking fails
+
+#### Completeness Test Configuration (`completeness_test.go`)
+
+```go
+// requireParserObservations controls observation validation strictness
+// - true: fail if ANY parser is missing observations (strict)
+// - false: only check custom catalogers (lenient, current mode)
+const requireParserObservations = false
+
+// metadataTypeCoverageExceptions lists metadata types allowed to not be documented
+var metadataTypeCoverageExceptions = strset.New(
+ reflect.TypeOf(pkg.MicrosoftKbPatch{}).Name(),
+)
+
+// packageTypeCoverageExceptions lists package types allowed to not be documented
+var packageTypeCoverageExceptions = strset.New(
+ string(pkg.JenkinsPluginPkg),
+ string(pkg.KbPkg),
+)
+
+// observationExceptions maps cataloger/parser names to observation types to skip
+// - nil value: skip ALL observation checks for this cataloger/parser
+// - set value: skip only specified observation types
+var observationExceptions = map[string]*strset.Set{
+ "graalvm-native-image-cataloger": nil, // skip all checks
+ "linux-kernel-cataloger": strset.New("relationships"), // skip only relationships
+}
+```
+
+**When to update:**
+- Add to exceptions when a type is intentionally not documented
+- Add to `observationExceptions` when a cataloger lacks reliable test fixtures
+- Set `requireParserObservations = true` when ready to enforce full parser coverage
+
+## Completeness Tests
+
+### Purpose
+
+The `completeness_test.go` file ensures `packages.yaml` stays in perfect sync with the codebase. These tests catch:
+- New catalogers that haven't been documented
+- Orphaned cataloger entries (cataloger was removed but YAML wasn't updated)
+- Missing metadata/package type documentation
+- Invalid capability field references
+- Catalogers not using test helpers
+
+### Test Categories
+
+#### 1. Synchronization Tests
+
+**`TestCatalogersInSync`**
+- Ensures all catalogers from `syft cataloger list` appear in YAML
+- Ensures all catalogers in YAML exist in the binary
+- Ensures all capabilities sections are filled (no TODOs/nulls)
+
+**Failure means:** you added/removed a cataloger but didn't regenerate packages.yaml
+
+**Fix:** run `go generate ./internal/capabilities`
+
+---
+
+**`TestCapabilitiesAreUpToDate`**
+- Runs only in CI
+- Ensures regeneration succeeds
+- Ensures generated file has no uncommitted changes
+
+**Failure means:** packages.yaml wasn't regenerated after code changes
+
+**Fix:** run `go generate ./internal/capabilities` and commit changes
+
+#### 2. Coverage Tests
+
+**`TestPackageTypeCoverage`**
+- Ensures all types in `pkg.AllPkgs` are documented in some cataloger
+- Allows exceptions via `packageTypeCoverageExceptions`
+
+**Failure means:** you added a new package type but no cataloger documents it
+
+**Fix:** either add a cataloger entry or add to exceptions if intentionally not supported
+
+---
+
+**`TestMetadataTypeCoverage`**
+- Ensures all types in `packagemetadata.AllTypes()` are documented
+- Allows exceptions via `metadataTypeCoverageExceptions`
+
+**Failure means:** you added a new metadata type but no cataloger produces it
+
+**Fix:** either add metadata_types to a cataloger or add to exceptions
+
+---
+
+**`TestMetadataTypesHaveJSONSchemaTypes`**
+- Ensures metadata_types and json_schema_types are synchronized
+- Validates every metadata type has a corresponding json_schema_type with correct conversion
+- Checks both cataloger-level and parser-level types
+
+**Failure means:** metadata_types and json_schema_types are out of sync
+
+**Fix:** run `go generate ./internal/capabilities` to regenerate synchronized types
+
+#### 3. Structure Tests
+
+**`TestCatalogerStructure`**
+- Validates generic vs custom cataloger structure rules:
+ - Generic catalogers must have parsers, no cataloger-level capabilities
+ - Custom catalogers must have detectors and cataloger-level capabilities
+- Ensures ecosystem is always set
+
+**Failure means:** cataloger structure doesn't follow conventions
+
+**Fix:** correct the cataloger structure in packages.yaml
+
+---
+
+**`TestCatalogerDataQuality`**
+- Checks for duplicate cataloger names
+- Validates detector formats for custom catalogers
+- Checks for duplicate parser functions within catalogers
+
+**Failure means:** data integrity issue in packages.yaml
+
+**Fix:** remove duplicates or fix detector definitions
+
+#### 4. Config Tests
+
+**`TestConfigCompleteness`**
+- Ensures all configs in the `configs:` section are referenced by a cataloger
+- Ensures all cataloger config references exist
+- Ensures all app-key references exist in `application:` section
+
+**Failure means:** orphaned config or broken reference
+
+**Fix:** remove unused configs or add missing entries
+
+---
+
+**`TestAppConfigFieldsHaveDescriptions`**
+- Ensures all application config fields have descriptions
+
+**Failure means:** missing `DescribeFields()` entry
+
+**Fix:** add description in the ecosystem's `DescribeFields()` method
+
+---
+
+**`TestAppConfigKeyFormat`**
+- Validates config keys follow format: `ecosystem.field-name`
+- Ensures kebab-case (no underscores or spaces)
+
+**Failure means:** malformed config key
+
+**Fix:** rename the config key to follow conventions
+
+#### 5. Capability Tests
+
+**`TestCapabilityConfigFieldReferences`**
+- Validates that config fields referenced in capability conditions actually exist
+- Checks both cataloger-level and parser-level capabilities
+
+**Example failure:**
+```yaml
+capabilities:
+ - name: license
+ conditions:
+ - when: {NonExistentField: true} # ← this field doesn't exist in config struct
+ value: true
+```
+
+**Fix:** correct the field name to match the actual config struct
+
+---
+
+**`TestCapabilityFieldNaming`**
+- Ensures capability field names follow known patterns:
+ - `license`
+ - `dependency.depth`
+ - `dependency.edges`
+ - `dependency.kinds`
+ - `package_manager.files.listing`
+ - `package_manager.files.digests`
+ - `package_manager.package_integrity_hash`
+
+**Failure means:** typo in capability field name
+
+**Fix:** correct the typo or add new field to known list
+
+---
+
+**`TestCapabilityValueTypes`**
+- Validates capability values match expected types:
+ - Boolean fields: `license`, `package_manager.*`
+ - Array fields: `dependency.depth`, `dependency.kinds`
+ - String fields: `dependency.edges`
+
+**Example failure:**
+```yaml
+capabilities:
+ - name: license
+ default: "yes" # ← should be boolean true/false
+```
+
+**Fix:** use correct type for the field
+
+---
+
+**`TestCapabilityEvidenceFieldReferences`**
+- Validates that evidence references point to real struct fields
+- Uses AST parsing to verify field paths exist
+
+**Example:**
+```yaml
+capabilities:
+ - name: package_manager.files.digests
+ default: true
+ evidence:
+ - AlpmDBEntry.Files[].Digests # ← validates this path exists
+```
+
+**Failure means:** typo in evidence reference or struct was changed
+
+**Fix:** correct the evidence reference or update after struct changes
+
+#### 6. Observations Test
+
+**`TestCatalogersHaveTestObservations`**
+- Ensures all custom catalogers have test observations
+- Optionally checks parsers (controlled by `requireParserObservations`)
+- Allows exceptions via `observationExceptions`
+
+**Failure means:** cataloger tests aren't using pkgtest helpers
+
+**Fix:** update tests to use `pkgtest.CatalogTester`:
+```go
+pkgtest.NewCatalogTester().
+ FromDirectory(t, "test-fixtures/my-fixture").
+ TestCataloger(t, myCataloger)
+```
+
+### How to Fix Test Failures
+
+#### General Approach
+1. **Read the test error message** - it usually tells you exactly what's wrong
+2. **Check if regeneration needed** - most failures fixed by: `go generate ./internal/capabilities`
+3. **Check for code/test changes** - did you add/modify a cataloger?
+4. **Consider exceptions** - is this intentionally unsupported?
+
+#### Common Failures and Fixes
+
+| Failure | Most Likely Cause | Fix |
+|---------|------------------|-----|
+| Cataloger not in YAML | Added new cataloger | Regenerate |
+| Orphaned YAML entry | Removed cataloger | Regenerate |
+| Missing metadata type | Added type but no test observations | Add pkgtest usage or exception |
+| Missing observations | Test not using pkgtest | Update test to use `CatalogTester` |
+| Config field reference | Typo in capability condition | Fix field name in YAML |
+| Incomplete capabilities | Missing capability definition | Add capabilities section to YAML |
+
+## Manual Maintenance
+
+### What Requires Manual Editing
+
+these fields in `packages.yaml` are **MANUAL** and must be maintained by hand:
+
+#### 1. Ecosystem Field (Cataloger Level)
+```yaml
+catalogers:
+ - ecosystem: golang # MANUAL - identify the ecosystem
+```
+
+**Guidelines:** use the ecosystem/language name (golang, python, java, rust, etc.)
+
+#### 2. Capabilities Sections
+
+**For Generic Catalogers** (parser level):
+```yaml
+parsers:
+ - function: parseGoMod
+ capabilities: # MANUAL
+ - name: license
+ default: false
+ conditions:
+ - when: {SearchRemoteLicenses: true}
+ value: true
+ comment: fetches licenses from proxy.golang.org
+ - name: dependency.depth
+ default: [direct, indirect]
+ - name: dependency.edges
+ default: complete
+```
+
+**For Custom Catalogers** (cataloger level):
+```yaml
+catalogers:
+ - name: linux-kernel-cataloger
+ type: custom
+ capabilities: # MANUAL
+ - name: license
+ default: true
+```
+
+#### 3. Detectors for Custom Catalogers
+
+**For most custom catalogers:**
+```yaml
+detectors: # MANUAL
+ - method: glob
+ criteria:
+ - '**/lib/modules/**/modules.builtin'
+ comment: kernel modules directory
+```
+
+**Exception:** `binary-classifier-cataloger` has AUTO-GENERATED detectors extracted from source
+
+#### 4. Detector Conditions
+
+when a detector should only be active with certain configuration:
+```yaml
+detectors:
+ - method: glob
+ criteria: ['**/*.zip']
+ conditions: # MANUAL
+ - when: {IncludeZipFiles: true}
+ comment: ZIP detection requires explicit config
+```
+
+### Capabilities Format and Guidelines
+
+#### Standard Capability Fields
+
+**Boolean Fields:**
+```yaml
+- name: license
+ default: true # always available
+ # OR
+ default: false # never available
+ # OR
+ default: false
+ conditions:
+ - when: {SearchRemoteLicenses: true}
+ value: true
+ comment: requires network access to fetch licenses
+```
+
+**Array Fields (dependency.depth):**
+```yaml
+- name: dependency.depth
+ default: [direct] # only immediate dependencies
+ # OR
+ default: [direct, indirect] # full transitive closure
+ # OR
+ default: [] # no dependency information
+```
+
+**String Fields (dependency.edges):**
+```yaml
+- name: dependency.edges
+ default: "" # dependencies found but no edges between them
+ # OR
+ default: flat # single level of dependencies with edges to root only
+ # OR
+ default: reduced # transitive reduction (redundant edges removed)
+ # OR
+ default: complete # all relationships with accurate direct/indirect edges
+```
+
+**Array Fields (dependency.kinds):**
+```yaml
+- name: dependency.kinds
+ default: [runtime] # production dependencies only
+ # OR
+ default: [runtime, dev] # production and development
+ # OR
+ default: [runtime, dev, build, test] # all dependency types
+```
+
+#### Using Conditions
+
+Conditions allow capabilities to vary based on configuration values:
+
+```yaml
+capabilities:
+ - name: license
+ default: false
+ conditions:
+ - when: {SearchLocalModCacheLicenses: true}
+ value: true
+ comment: searches for licenses in GOPATH mod cache
+ - when: {SearchRemoteLicenses: true}
+ value: true
+ comment: fetches licenses from proxy.golang.org
+ comment: license scanning requires configuration
+```
+
+**Rules:**
+- Conditions are evaluated in array order (first match wins)
+- Multiple fields WITHIN a `when` clause use AND logic (all must match)
+- Multiple conditions in the array use OR logic (first matching condition)
+- If no conditions match, the `default` value is used
+
+#### Adding Evidence
+
+evidence documents which struct fields provide the capability:
+
+```yaml
+- name: package_manager.files.listing
+ default: true
+ evidence:
+ - AlpmDBEntry.Files
+ comment: file listings stored in Files array
+```
+
+**For nested fields:**
+```yaml
+evidence:
+ - CondaMetaPackage.PathsData.Paths
+```
+
+**For array element fields:**
+```yaml
+evidence:
+ - AlpmDBEntry.Files[].Digests
+```
+
+### Best Practices
+
+1. **Be specific in comments:** explain WHY, not just WHAT
+2. **Document conditions clearly:** explain what configuration enables the capability
+3. **Use evidence references:** helps verify capabilities are accurate
+4. **Test after edits:** run `go test ./internal/capabilities/generate` to validate
+
+## Development Workflows
+
+### Adding a New Cataloger
+
+#### If Using `generic.NewCataloger()`:
+
+**What happens automatically:**
+1. Generator discovers the cataloger via AST parsing
+2. Extracts parsers, detectors, and patterns
+3. Adds entry to packages.yaml with structure
+4. Links to config (if constructor has config parameter)
+5. Extracts metadata types from test-observations.json (if test uses pkgtest)
+
+**What you must do manually:**
+1. Set the `ecosystem` field in packages.yaml
+2. Add `capabilities` sections to each parser
+3. Run `go generate ./internal/capabilities`
+4. Commit the updated packages.yaml
+
+**Example workflow:**
+```bash
+# 1. Write cataloger code
+vim syft/pkg/cataloger/mynew/cataloger.go
+
+# 2. Write tests using pkgtest (generates observations)
+vim syft/pkg/cataloger/mynew/cataloger_test.go
+
+# 3. Run tests to generate observations
+go test ./syft/pkg/cataloger/mynew
+
+# 4. Regenerate packages.yaml
+go generate ./internal/capabilities
+
+# 5. Edit packages.yaml manually
+vim internal/capabilities/packages.yaml
+# - Set ecosystem field
+# - Add capabilities sections
+
+# 6. Validate
+go test ./internal/capabilities/generate
+
+# 7. Commit
+git add internal/capabilities/packages.yaml
+git add syft/pkg/cataloger/mynew/test-fixtures/test-observations.json
+git commit
+```
+
+#### If Writing a Custom Cataloger:
+
+**What happens automatically:**
+1. Generator creates entry with name and type
+2. Extracts metadata types from test-observations.json
+
+**What you must do manually:**
+1. Set `ecosystem`
+2. Add `detectors` array with detection methods
+3. Add `capabilities` section (cataloger level, not parser level)
+4. Run `go generate ./internal/capabilities`
+
+### Modifying an Existing Cataloger
+
+#### If Changing Parser Detection Patterns:
+
+**Impact:** AUTO-GENERATED field, automatically updated
+
+**Workflow:**
+```bash
+# 1. Change the code
+vim syft/pkg/cataloger/something/cataloger.go
+
+# 2. Regenerate
+go generate ./internal/capabilities
+
+# 3. Review changes
+git diff internal/capabilities/packages.yaml
+
+# 4. Commit
+git add internal/capabilities/packages.yaml
+git commit
+```
+
+#### If Changing Metadata Type:
+
+**Impact:** AUTO-GENERATED field, updated via test observations
+
+**Workflow:**
+```bash
+# 1. Change the code
+vim syft/pkg/cataloger/something/parser.go
+
+# 2. Update tests (if needed)
+vim syft/pkg/cataloger/something/parser_test.go
+
+# 3. Run tests to update observations
+go test ./syft/pkg/cataloger/something
+
+# 4. Regenerate
+go generate ./internal/capabilities
+
+# 5. Commit
+git add internal/capabilities/packages.yaml
+git add syft/pkg/cataloger/something/test-fixtures/test-observations.json
+git commit
+```
+
+#### If Changing Capabilities:
+
+**Impact:** MANUAL field, preserved across regeneration
+
+**Workflow:**
+```bash
+# 1. Edit packages.yaml directly
+vim internal/capabilities/packages.yaml
+
+# 2. Validate
+go test ./internal/capabilities/generate
+
+# 3. Commit
+git commit internal/capabilities/packages.yaml
+```
+
+### Adding New Capability Fields
+
+if you need to add a completely new capability field (e.g., `package_manager.build_tool_info`):
+
+**Steps:**
+1. Add field name to known fields in `TestCapabilityFieldNaming` (completeness_test.go)
+2. Add value type validation to `validateCapabilityValueType()` (completeness_test.go)
+3. Update file header documentation in packages.yaml
+4. Add the field to relevant catalogers in packages.yaml
+5. Update any runtime code that consumes capabilities
+
+### When to Update Exceptions
+
+#### Add to `catalogerTypeOverrides`:
+- Discovery incorrectly classifies a cataloger's type
+- Example: cataloger uses generic framework but behaves like custom
+
+#### Add to `catalogerConfigExceptions`:
+- Cataloger should not have config linked
+- Example: simple catalogers with no configuration
+
+#### Add to `catalogerConfigOverrides`:
+- Automatic config linking fails
+- Cataloger in a subpackage or unusual structure
+- Example: dotnet catalogers split across multiple packages
+
+#### Add to `metadataTypeCoverageExceptions`:
+- Metadata type is deprecated or intentionally unused
+- Example: `MicrosoftKbPatch` (special case type)
+
+#### Add to `packageTypeCoverageExceptions`:
+- Package type is deprecated or special case
+- Example: `JenkinsPluginPkg`, `KbPkg`
+
+#### Add to `observationExceptions`:
+- Cataloger lacks reliable test fixtures (e.g., requires specific binaries)
+- Cataloger produces relationships but they're not standard dependencies
+- Example: `graalvm-native-image-cataloger` (requires native images)
+
+## File Inventory
+
+### Core Generation
+
+- **`main.go`**: entry point, orchestrates regeneration, prints status messages
+- **`merge.go`**: core merging logic, preserves manual sections while updating auto-generated
+- **`io.go`**: YAML reading/writing with comment preservation using gopkg.in/yaml.v3
+
+### Discovery
+
+- **`discover_catalogers.go`**: AST parsing to discover generic catalogers and parsers from source code
+- **`discover_cataloger_configs.go`**: AST parsing to discover cataloger config structs
+- **`discover_app_config.go`**: AST parsing to discover application-level config from options package
+- **`cataloger_config_linking.go`**: links catalogers to config structs by analyzing constructors
+- **`discover_metadata.go`**: reads test-observations.json files to get metadata/package types
+
+### Validation & Utilities
+
+- **`completeness_test.go`**: comprehensive test suite ensuring packages.yaml is complete and synced
+- **`cataloger_names.go`**: helper to get all cataloger names from syft task factories
+- **`metadata_check.go`**: validates metadata and package type coverage
+
+### Tests
+
+- **`config_discovery_test.go`**: tests for config discovery
+- **`cataloger_config_linking_test.go`**: tests for config linking
+- **`detector_validation_test.go`**: tests for detector validation
+- **`merge_test.go`**: tests for merge logic
+
+## Troubleshooting
+
+### "Cataloger X not found in packages.yaml"
+
+**Cause:** you added a new cataloger but didn't regenerate packages.yaml
+
+**Fix:**
+```bash
+go generate ./internal/capabilities
+```
+
+### "Cataloger X in YAML but not in binary"
+
+**Cause:** you removed a cataloger but didn't regenerate
+
+**Fix:**
+```bash
+go generate ./internal/capabilities
+# Review the diff - the cataloger entry should be removed
+```
+
+### "Metadata type X not represented in any cataloger"
+
+**Cause:** you added a new metadata type but:
+- No cataloger produces it yet, OR
+- Tests don't use pkgtest helpers (so observations aren't generated)
+
+**Fix Option 1 - Add test observations:**
+```go
+// Update test to use pkgtest
+pkgtest.NewCatalogTester().
+ FromDirectory(t, "test-fixtures/my-fixture").
+ TestCataloger(t, myCataloger)
+
+// Run tests
+go test ./syft/pkg/cataloger/mypackage
+
+// Regenerate
+go generate ./internal/capabilities
+```
+
+**Fix Option 2 - Add exception (if intentionally unused):**
+```go
+// completeness_test.go
+var metadataTypeCoverageExceptions = strset.New(
+ reflect.TypeOf(pkg.MyNewType{}).Name(),
+)
+```
+
+### "Parser X has no test observations"
+
+**Cause:** test doesn't use pkgtest helpers
+
+**Fix:**
+```go
+// Before:
+func TestMyParser(t *testing.T) {
+ // manual test code
+}
+
+// After:
+func TestMyParser(t *testing.T) {
+ cataloger := NewMyCataloger()
+ pkgtest.NewCatalogTester().
+ FromDirectory(t, "test-fixtures/my-fixture").
+ TestCataloger(t, cataloger)
+}
+```
+
+### "Config field X not found in struct Y"
+
+**Cause:** capability condition references a non-existent config field
+
+**Fix:** edit packages.yaml and correct the field name:
+```yaml
+# Before:
+conditions:
+ - when: {SerachRemoteLicenses: true} # typo!
+
+# After:
+conditions:
+ - when: {SearchRemoteLicenses: true}
+```
+
+### "Evidence field X.Y not found in struct X"
+
+**Cause:**
+- Typo in evidence reference, OR
+- Struct was refactored and field moved/renamed
+
+**Fix:** edit packages.yaml and correct the evidence reference:
+```yaml
+# Before:
+evidence:
+ - AlpmDBEntry.FileListing # wrong field name
+
+# After:
+evidence:
+ - AlpmDBEntry.Files
+```
+
+### "packages.yaml has uncommitted changes after regeneration"
+
+**Cause:** packages.yaml is out of date (usually caught in CI)
+
+**Fix:**
+```bash
+go generate ./internal/capabilities
+git add internal/capabilities/packages.yaml
+git commit -m "chore: regenerate capabilities"
+```
+
+### Generator Fails with "struct X not found"
+
+**Cause:** config linking trying to link to a non-existent struct
+
+**Fix Option 1 - Add override:**
+```go
+// merge.go
+var catalogerConfigOverrides = map[string]string{
+ "my-cataloger": "mypackage.MyConfig",
+}
+```
+
+**Fix Option 2 - Add exception:**
+```go
+// merge.go
+var catalogerConfigExceptions = strset.New(
+ "my-cataloger", // doesn't use config
+)
+```
+
+### "Parser capabilities must be defined"
+
+**Cause:** parser in packages.yaml has no capabilities section
+
+**Fix:** add capabilities to the parser:
+```yaml
+parsers:
+ - function: parseMyFormat
+ capabilities:
+ - name: license
+ default: false
+ - name: dependency.depth
+ default: []
+ # ... (add all required capability fields)
+```
+
+### Understanding Error Messages
+
+most test failures include detailed guidance. Look for:
+- **List of missing items:** tells you exactly what to add/remove
+- **Suggestions:** usually includes the command to fix (e.g., "Run 'go generate ./internal/capabilities'")
+- **File locations:** tells you which file to edit
+
+**General debugging approach:**
+1. Read the full error message
+2. Check if it's fixed by regeneration
+3. Check for recent code/test changes
+4. Consider if it should be an exception
+5. Ask for help if still stuck (include full error message)
+
+---
+
+## Questions or Issues?
+
+if you encounter problems not covered here:
+1. Check test error messages (they're usually quite helpful)
+2. Look at recent commits for examples of similar changes
+3. Ask in the team chat with the full error message
diff --git a/internal/capabilities/generate/cataloger_config_linking.go b/internal/capabilities/generate/cataloger_config_linking.go
index b9dadc698..41b5b5db8 100644
--- a/internal/capabilities/generate/cataloger_config_linking.go
+++ b/internal/capabilities/generate/cataloger_config_linking.go
@@ -1,3 +1,4 @@
+// this file links catalogers to their configuration structs by analyzing constructor function signatures to determine which config struct each cataloger uses.
package main
import (
@@ -16,8 +17,18 @@ import (
// Returns empty string for catalogers that don't take a config parameter.
func LinkCatalogersToConfigs(repoRoot string) (map[string]string, error) {
catalogerRoot := filepath.Join(repoRoot, "syft", "pkg", "cataloger")
+ return LinkCatalogersToConfigsFromPath(catalogerRoot, repoRoot)
+}
- // find all .go files under syft/pkg/cataloger/ recursively
+// LinkCatalogersToConfigsFromPath analyzes cataloger constructor functions in the specified directory
+// to determine which config struct each cataloger uses. This is the parameterized version that allows
+// testing with custom fixture directories.
+// Returns a map where key is the cataloger name (e.g., "go-module-binary-cataloger")
+// and value is the config struct reference (e.g., "golang.CatalogerConfig").
+// Returns empty string for catalogers that don't take a config parameter.
+// The baseRoot parameter is used for relative path calculation to determine package names.
+func LinkCatalogersToConfigsFromPath(catalogerRoot, baseRoot string) (map[string]string, error) {
+ // find all .go files under the cataloger root recursively
var files []string
err := filepath.Walk(catalogerRoot, func(path string, info os.FileInfo, err error) error {
if err != nil {
@@ -35,7 +46,7 @@ func LinkCatalogersToConfigs(repoRoot string) (map[string]string, error) {
linkages := make(map[string]string)
for _, file := range files {
- links, err := linkCatalogersInFile(file, repoRoot)
+ links, err := linkCatalogersInFile(file, baseRoot)
if err != nil {
return nil, fmt.Errorf("failed to parse %s: %w", file, err)
}
@@ -324,7 +335,5 @@ func looksLikeConfigType(typeName string) bool {
structName := parts[len(parts)-1]
// check for common config patterns
- return strings.Contains(structName, "Config") ||
- strings.HasSuffix(structName, "Config") ||
- strings.HasPrefix(structName, "Config")
+ return strings.Contains(structName, "Config")
}
diff --git a/internal/capabilities/generate/cataloger_config_linking_test.go b/internal/capabilities/generate/cataloger_config_linking_test.go
index 22621a8c0..aae014686 100644
--- a/internal/capabilities/generate/cataloger_config_linking_test.go
+++ b/internal/capabilities/generate/cataloger_config_linking_test.go
@@ -1,12 +1,20 @@
package main
import (
+ "go/ast"
+ "go/parser"
+ "go/token"
+ "path/filepath"
"testing"
"github.com/stretchr/testify/require"
)
func TestLinkCatalogersToConfigs(t *testing.T) {
+ if testing.Short() {
+ t.Skip("skipping integration test in short mode")
+ }
+
repoRoot, err := RepoRoot()
require.NoError(t, err)
@@ -124,7 +132,107 @@ func TestLinkCatalogersToConfigs(t *testing.T) {
require.GreaterOrEqual(t, len(withConfig), 6, "should find at least 6 catalogers with configs")
}
+func TestLinkCatalogersToConfigsFromPath(t *testing.T) {
+ tests := []struct {
+ name string
+ fixturePath string
+ expectedLinkages map[string]string
+ wantErr require.ErrorAssertionFunc
+ }{
+ {
+ name: "simple generic cataloger with local config",
+ fixturePath: "simple-generic-cataloger",
+ expectedLinkages: map[string]string{
+ "go-module-cataloger": "golang.CatalogerConfig",
+ },
+ },
+ {
+ name: "cataloger name from constant",
+ fixturePath: "cataloger-with-constant",
+ expectedLinkages: map[string]string{
+ "python-package-cataloger": "python.CatalogerConfig",
+ },
+ },
+ {
+ name: "custom cataloger with Name() in same file",
+ fixturePath: "custom-cataloger-same-file",
+ expectedLinkages: map[string]string{
+ "java-pom-cataloger": "java.ArchiveCatalogerConfig",
+ },
+ },
+ {
+ name: "custom cataloger with Name() in different file - not detected",
+ fixturePath: "custom-cataloger-different-file",
+ expectedLinkages: map[string]string{
+ // empty - current limitation, cannot detect cross-file Names
+ },
+ },
+ {
+ name: "cataloger without config parameter",
+ fixturePath: "no-config-cataloger",
+ expectedLinkages: map[string]string{
+ "javascript-cataloger": "", // empty string means no config
+ },
+ },
+ {
+ name: "imported config type",
+ fixturePath: "imported-config-type",
+ expectedLinkages: map[string]string{
+ "linux-kernel-cataloger": "kernel.LinuxKernelCatalogerConfig",
+ },
+ },
+ {
+ name: "non-config first parameter",
+ fixturePath: "non-config-first-param",
+ expectedLinkages: map[string]string{
+ "binary-cataloger": "", // Parser not a config type
+ },
+ },
+ {
+ name: "conflicting cataloger names",
+ fixturePath: "conflicting-names",
+ wantErr: require.Error,
+ },
+ {
+ name: "mixed naming patterns",
+ fixturePath: "mixed-naming-patterns",
+ expectedLinkages: map[string]string{
+ "ruby-cataloger": "ruby.Config",
+ },
+ },
+ {
+ name: "selector expression config",
+ fixturePath: "selector-expression-config",
+ expectedLinkages: map[string]string{
+ "rust-cataloger": "cargo.CatalogerConfig",
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ if tt.wantErr == nil {
+ tt.wantErr = require.NoError
+ }
+
+ fixtureDir := filepath.Join("test-fixtures", "config-linking", tt.fixturePath)
+ linkages, err := LinkCatalogersToConfigsFromPath(fixtureDir, fixtureDir)
+ tt.wantErr(t, err)
+
+ if err != nil {
+ return
+ }
+
+ require.Equal(t, tt.expectedLinkages, linkages)
+ })
+ }
+}
+
func TestExtractConfigTypeName(t *testing.T) {
+ if testing.Short() {
+ t.Skip("skipping integration test in short mode")
+ }
+
tests := []struct {
name string
catalogerName string
@@ -240,3 +348,162 @@ func TestLooksLikeConfigType(t *testing.T) {
})
}
}
+
+func TestExtractReceiverTypeName(t *testing.T) {
+ tests := []struct {
+ name string
+ receiver string // receiver code snippet
+ want string
+ }{
+ {
+ name: "value receiver",
+ receiver: "func (c Cataloger) Name() string { return \"\" }",
+ want: "Cataloger",
+ },
+ {
+ name: "pointer receiver",
+ receiver: "func (c *Cataloger) Name() string { return \"\" }",
+ want: "Cataloger",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // parse the function to get the receiver type
+ fset := token.NewFileSet()
+ file, err := parser.ParseFile(fset, "", "package test\n"+tt.receiver, 0)
+ require.NoError(t, err)
+
+ // extract the function declaration
+ require.Len(t, file.Decls, 1)
+ funcDecl, ok := file.Decls[0].(*ast.FuncDecl)
+ require.True(t, ok)
+
+ // get receiver type
+ var recvType ast.Expr
+ if funcDecl.Recv != nil && len(funcDecl.Recv.List) > 0 {
+ recvType = funcDecl.Recv.List[0].Type
+ }
+
+ got := extractReceiverTypeName(recvType)
+ require.Equal(t, tt.want, got)
+ })
+ }
+}
+
+func TestExtractConfigTypeNameHelper(t *testing.T) {
+ tests := []struct {
+ name string
+ funcSig string // function signature with parameter
+ localPackageName string
+ want string
+ }{
+ {
+ name: "local type",
+ funcSig: "func New(cfg CatalogerConfig) pkg.Cataloger { return nil }",
+ localPackageName: "python",
+ want: "python.CatalogerConfig",
+ },
+ {
+ name: "imported type",
+ funcSig: "func New(cfg java.ArchiveCatalogerConfig) pkg.Cataloger { return nil }",
+ localPackageName: "python",
+ want: "java.ArchiveCatalogerConfig",
+ },
+ {
+ name: "imported type - kernel package",
+ funcSig: "func New(cfg kernel.LinuxKernelCatalogerConfig) pkg.Cataloger { return nil }",
+ localPackageName: "other",
+ want: "kernel.LinuxKernelCatalogerConfig",
+ },
+ {
+ name: "no parameters",
+ funcSig: "func New() pkg.Cataloger { return nil }",
+ localPackageName: "python",
+ want: "",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // parse the function to get parameter type
+ fset := token.NewFileSet()
+ file, err := parser.ParseFile(fset, "", "package test\n"+tt.funcSig, 0)
+ require.NoError(t, err)
+
+ // extract the function declaration
+ require.Len(t, file.Decls, 1)
+ funcDecl, ok := file.Decls[0].(*ast.FuncDecl)
+ require.True(t, ok)
+
+ // get first parameter type
+ var paramType ast.Expr
+ if funcDecl.Type.Params != nil && len(funcDecl.Type.Params.List) > 0 {
+ paramType = funcDecl.Type.Params.List[0].Type
+ }
+
+ got := extractConfigTypeName(paramType, tt.localPackageName)
+ require.Equal(t, tt.want, got)
+ })
+ }
+}
+
+func TestExtractReturnTypeName(t *testing.T) {
+ tests := []struct {
+ name string
+ funcDef string // complete function definition
+ want string
+ }{
+ {
+ name: "pointer to composite literal",
+ funcDef: `func New() pkg.Cataloger {
+ return &javaCataloger{name: "test"}
+ }`,
+ want: "javaCataloger",
+ },
+ {
+ name: "composite literal",
+ funcDef: `func New() pkg.Cataloger {
+ return pythonCataloger{name: "test"}
+ }`,
+ want: "pythonCataloger",
+ },
+ {
+ name: "variable return",
+ funcDef: `func New() pkg.Cataloger {
+ c := &Cataloger{}
+ return c
+ }`,
+ want: "",
+ },
+ {
+ name: "nil return",
+ funcDef: `func New() pkg.Cataloger {
+ return nil
+ }`,
+ want: "",
+ },
+ {
+ name: "empty function body",
+ funcDef: `func New() pkg.Cataloger {}`,
+ want: "",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // parse the function
+ fset := token.NewFileSet()
+ file, err := parser.ParseFile(fset, "", "package test\n"+tt.funcDef, 0)
+ require.NoError(t, err)
+
+ // extract the function declaration
+ require.Len(t, file.Decls, 1)
+ funcDecl, ok := file.Decls[0].(*ast.FuncDecl)
+ require.True(t, ok)
+
+ got := extractReturnTypeName(funcDecl)
+ require.Equal(t, tt.want, got)
+ })
+ }
+}
diff --git a/internal/capabilities/generate/cataloger_names.go b/internal/capabilities/generate/cataloger_names.go
index 9f1bfa432..85c31056a 100644
--- a/internal/capabilities/generate/cataloger_names.go
+++ b/internal/capabilities/generate/cataloger_names.go
@@ -1,3 +1,4 @@
+// this file retrieves the canonical list of cataloger names and their selectors from syft's task factories.
package main
import (
diff --git a/internal/capabilities/generate/completeness_test.go b/internal/capabilities/generate/completeness_test.go
index 193165359..ba11708d7 100644
--- a/internal/capabilities/generate/completeness_test.go
+++ b/internal/capabilities/generate/completeness_test.go
@@ -1,3 +1,4 @@
+// this file verifies the claims made in packages.yaml against test observations and source code, ensuring cataloger capabilities are accurate and complete.
package main
import (
@@ -344,8 +345,8 @@ func TestCatalogerDataQuality(t *testing.T) {
})
}
-// TestRegenerateCapabilitiesDoesNotFail verifies that regeneration runs successfully
-func TestRegenerateCapabilitiesDoesNotFail(t *testing.T) {
+// TestCapabilitiesAreUpToDate verifies that regeneration runs successfully
+func TestCapabilitiesAreUpToDate(t *testing.T) {
if os.Getenv("CI") == "" {
t.Skip("skipping regeneration test in local environment")
}
@@ -366,9 +367,9 @@ func TestRegenerateCapabilitiesDoesNotFail(t *testing.T) {
require.NoError(t, err, "packages.yaml has uncommitted changes after regeneration. Run 'go generate ./internal/capabilities' locally and commit the changes.")
}
-// TestAllCatalogersHaveObservations verifies that all catalogers have test observations,
+// TestCatalogersHaveTestObservations verifies that all catalogers have test observations,
// ensuring they are using the pkgtest helpers
-func TestAllCatalogersHaveObservations(t *testing.T) {
+func TestCatalogersHaveTestObservations(t *testing.T) {
repoRoot, err := RepoRoot()
require.NoError(t, err)
@@ -1303,3 +1304,70 @@ func TestCapabilityEvidenceFieldReferences(t *testing.T) {
})
}
}
+
+// TestDetectorConfigFieldReferences validates that config field names referenced in detector
+// conditions actually exist in the cataloger's config struct
+func TestDetectorConfigFieldReferences(t *testing.T) {
+ repoRoot, err := RepoRoot()
+ require.NoError(t, err)
+
+ // load the packages.yaml
+ doc, _, err := loadCapabilities(filepath.Join(repoRoot, "internal/capabilities/packages.yaml"))
+ require.NoError(t, err)
+
+ // collect all validation errors before failing
+ var errors []string
+
+ // check each cataloger's detectors
+ for _, cataloger := range doc.Catalogers {
+ if cataloger.Type != "custom" {
+ continue // only custom catalogers have detectors
+ }
+
+ for detectorIdx, detector := range cataloger.Detectors {
+ // if detector has no conditions, skip validation
+ if len(detector.Conditions) == 0 {
+ continue
+ }
+
+ // detector has conditions - cataloger must have a config
+ if cataloger.Config == "" {
+ errors = append(errors,
+ fmt.Sprintf("Cataloger %q detector %d has conditions but cataloger has no config struct",
+ cataloger.Name, detectorIdx))
+ continue
+ }
+
+ // load the cataloger's config struct
+ configEntry, exists := doc.Configs[cataloger.Config]
+ if !exists {
+ errors = append(errors,
+ fmt.Sprintf("Cataloger %q references config %q which doesn't exist",
+ cataloger.Name, cataloger.Config))
+ continue
+ }
+
+ // build a set of valid config field names
+ validFields := make(map[string]bool)
+ for _, field := range configEntry.Fields {
+ validFields[field.Key] = true
+ }
+
+ // validate each condition
+ for condIdx, condition := range detector.Conditions {
+ for fieldName := range condition.When {
+ if !validFields[fieldName] {
+ errors = append(errors,
+ fmt.Sprintf("Cataloger %q detector %d condition %d references config field %q which doesn't exist in config struct %q",
+ cataloger.Name, detectorIdx, condIdx, fieldName, cataloger.Config))
+ }
+ }
+ }
+ }
+ }
+
+ // report all errors at once
+ if len(errors) > 0 {
+ require.Fail(t, "Detector config field reference validation failed", strings.Join(errors, "\n"))
+ }
+}
diff --git a/internal/capabilities/generate/config_discovery_test.go b/internal/capabilities/generate/config_discovery_test.go
deleted file mode 100644
index 556df8727..000000000
--- a/internal/capabilities/generate/config_discovery_test.go
+++ /dev/null
@@ -1,133 +0,0 @@
-package main
-
-import (
- "testing"
-
- "github.com/google/go-cmp/cmp"
- "github.com/stretchr/testify/require"
-)
-
-func TestDiscoverConfigs(t *testing.T) {
- repoRoot, err := RepoRoot()
- require.NoError(t, err)
-
- configs, err := DiscoverConfigs(repoRoot)
- require.NoError(t, err)
-
- // verify we discovered multiple config structs
- require.NotEmpty(t, configs, "should discover at least one config struct")
-
- // check for known config structs that have app-config annotations
- expectedConfigs := []string{
- "golang.CatalogerConfig",
- "golang.MainModuleVersionConfig",
- "java.ArchiveCatalogerConfig",
- "python.CatalogerConfig",
- "dotnet.CatalogerConfig",
- "kernel.LinuxKernelCatalogerConfig",
- "javascript.CatalogerConfig",
- "nix.Config",
- }
-
- for _, expected := range expectedConfigs {
- config, ok := configs[expected]
- require.True(t, ok, "should discover config: %s", expected)
- require.NotEmpty(t, config.Fields, "config %s should have fields", expected)
- require.Equal(t, expected, config.PackageName+"."+config.StructName)
- }
-
- // verify golang.CatalogerConfig fields
- golangConfig := configs["golang.CatalogerConfig"]
- require.Equal(t, "golang", golangConfig.PackageName)
- require.Equal(t, "CatalogerConfig", golangConfig.StructName)
- require.NotEmpty(t, golangConfig.Fields)
-
- // check for specific field
- var foundSearchLocalModCache bool
- for _, field := range golangConfig.Fields {
- if field.Name == "SearchLocalModCacheLicenses" {
- foundSearchLocalModCache = true
- require.Equal(t, "bool", field.Type)
- require.Equal(t, "golang.search-local-mod-cache-licenses", field.AppKey)
- require.NotEmpty(t, field.Description)
- require.Contains(t, field.Description, "searching for go package licenses")
- }
- }
- require.True(t, foundSearchLocalModCache, "should find SearchLocalModCacheLicenses field")
-
- // verify nested config struct
- golangMainModuleConfig := configs["golang.MainModuleVersionConfig"]
- require.Equal(t, "golang", golangMainModuleConfig.PackageName)
- require.Equal(t, "MainModuleVersionConfig", golangMainModuleConfig.StructName)
- require.NotEmpty(t, golangMainModuleConfig.Fields)
-
- // check for specific nested field
- var foundFromLDFlags bool
- for _, field := range golangMainModuleConfig.Fields {
- if field.Name == "FromLDFlags" {
- foundFromLDFlags = true
- require.Equal(t, "bool", field.Type)
- require.Equal(t, "golang.main-module-version.from-ld-flags", field.AppKey)
- require.NotEmpty(t, field.Description)
- }
- }
- require.True(t, foundFromLDFlags, "should find FromLDFlags field in MainModuleVersionConfig")
-
- // print summary for manual inspection
- t.Logf("Discovered %d config structs:", len(configs))
- for key, config := range configs {
- t.Logf(" %s: %d fields", key, len(config.Fields))
- for _, field := range config.Fields {
- t.Logf(" - %s (%s): %s", field.Name, field.Type, field.AppKey)
- if diff := cmp.Diff("", field.Description); diff == "" {
- t.Logf(" WARNING: field %s has no description", field.Name)
- }
- }
- }
-}
-
-func TestExtractPackageNameFromPath(t *testing.T) {
- tests := []struct {
- name string
- filePath string
- want string
- }{
- {
- name: "golang package",
- filePath: "syft/pkg/cataloger/golang/config.go",
- want: "golang",
- },
- {
- name: "java package",
- filePath: "syft/pkg/cataloger/java/config.go",
- want: "java",
- },
- {
- name: "python cataloger",
- filePath: "syft/pkg/cataloger/python/cataloger.go",
- want: "python",
- },
- {
- name: "kernel cataloger",
- filePath: "syft/pkg/cataloger/kernel/cataloger.go",
- want: "kernel",
- },
- {
- name: "binary classifier",
- filePath: "syft/pkg/cataloger/binary/classifier_cataloger.go",
- want: "binary",
- },
- {
- name: "not a cataloger path",
- filePath: "syft/pkg/other/file.go",
- want: "",
- },
- }
-
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- got := extractPackageNameFromPath(tt.filePath)
- require.Equal(t, tt.want, got)
- })
- }
-}
diff --git a/internal/capabilities/generate/detector_validation_test.go b/internal/capabilities/generate/detector_validation_test.go
deleted file mode 100644
index 63ce1a6cb..000000000
--- a/internal/capabilities/generate/detector_validation_test.go
+++ /dev/null
@@ -1,77 +0,0 @@
-package main
-
-import (
- "fmt"
- "path/filepath"
- "strings"
- "testing"
-
- "github.com/stretchr/testify/require"
-)
-
-// TestDetectorConfigFieldReferences validates that config field names referenced in detector
-// conditions actually exist in the cataloger's config struct
-func TestDetectorConfigFieldReferences(t *testing.T) {
- repoRoot, err := RepoRoot()
- require.NoError(t, err)
-
- // load the packages.yaml
- doc, _, err := loadCapabilities(filepath.Join(repoRoot, "internal/capabilities/packages.yaml"))
- require.NoError(t, err)
-
- // collect all validation errors before failing
- var errors []string
-
- // check each cataloger's detectors
- for _, cataloger := range doc.Catalogers {
- if cataloger.Type != "custom" {
- continue // only custom catalogers have detectors
- }
-
- for detectorIdx, detector := range cataloger.Detectors {
- // if detector has no conditions, skip validation
- if len(detector.Conditions) == 0 {
- continue
- }
-
- // detector has conditions - cataloger must have a config
- if cataloger.Config == "" {
- errors = append(errors,
- fmt.Sprintf("Cataloger %q detector %d has conditions but cataloger has no config struct",
- cataloger.Name, detectorIdx))
- continue
- }
-
- // load the cataloger's config struct
- configEntry, exists := doc.Configs[cataloger.Config]
- if !exists {
- errors = append(errors,
- fmt.Sprintf("Cataloger %q references config %q which doesn't exist",
- cataloger.Name, cataloger.Config))
- continue
- }
-
- // build a set of valid config field names
- validFields := make(map[string]bool)
- for _, field := range configEntry.Fields {
- validFields[field.Key] = true
- }
-
- // validate each condition
- for condIdx, condition := range detector.Conditions {
- for fieldName := range condition.When {
- if !validFields[fieldName] {
- errors = append(errors,
- fmt.Sprintf("Cataloger %q detector %d condition %d references config field %q which doesn't exist in config struct %q",
- cataloger.Name, detectorIdx, condIdx, fieldName, cataloger.Config))
- }
- }
- }
- }
- }
-
- // report all errors at once
- if len(errors) > 0 {
- require.Fail(t, "Detector config field reference validation failed", strings.Join(errors, "\n"))
- }
-}
diff --git a/internal/capabilities/generate/app_config_discovery.go b/internal/capabilities/generate/discover_app_config.go
similarity index 65%
rename from internal/capabilities/generate/app_config_discovery.go
rename to internal/capabilities/generate/discover_app_config.go
index 6a9c07e13..8c14b7a02 100644
--- a/internal/capabilities/generate/app_config_discovery.go
+++ b/internal/capabilities/generate/discover_app_config.go
@@ -1,3 +1,4 @@
+// this file discovers application-level configuration from cmd/syft/internal/options/ by parsing ecosystem config structs, their DescribeFields() methods, and default value functions.
package main
import (
@@ -5,6 +6,7 @@ import (
"go/ast"
"go/parser"
"go/token"
+ "os"
"path/filepath"
"reflect"
"sort"
@@ -18,30 +20,214 @@ type AppConfigField struct {
DefaultValue interface{} // extracted from Default*() functions
}
+// extractEcosystemConfigFieldsFromCatalog parses catalog.go and extracts the ecosystem-specific
+// config fields from the Catalog struct, returning a map of struct type name to YAML tag
+func extractEcosystemConfigFieldsFromCatalog(catalogFilePath string) (map[string]string, error) {
+ fset := token.NewFileSet()
+ f, err := parser.ParseFile(fset, catalogFilePath, nil, parser.ParseComments)
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse catalog.go: %w", err)
+ }
+
+ // find the Catalog struct
+ catalogStruct := findConfigStruct(f, "Catalog")
+ if catalogStruct == nil {
+ return nil, fmt.Errorf("catalog struct not found in %s", catalogFilePath)
+ }
+
+ // extract ecosystem config fields from the Catalog struct
+ // these are between the "ecosystem-specific cataloger configuration" comment and the next section
+ ecosystemConfigs := make(map[string]string)
+ inEcosystemSection := false
+
+ for _, field := range catalogStruct.Fields.List {
+ // check for ecosystem section marker comment
+ if field.Doc != nil {
+ for _, comment := range field.Doc.List {
+ if strings.Contains(comment.Text, "ecosystem-specific cataloger configuration") {
+ inEcosystemSection = true
+ break
+ }
+ // check if we've hit the next section (any comment marking a new section)
+ if inEcosystemSection && strings.HasPrefix(comment.Text, "// configuration for") {
+ inEcosystemSection = false
+ break
+ }
+ }
+ }
+
+ if !inEcosystemSection {
+ continue
+ }
+
+ // extract field type and yaml tag
+ if len(field.Names) == 0 {
+ continue
+ }
+
+ // get the type name (e.g., "golangConfig")
+ var typeName string
+ if ident, ok := field.Type.(*ast.Ident); ok {
+ typeName = ident.Name
+ } else {
+ continue
+ }
+
+ // get the yaml tag
+ yamlTag := extractYAMLTag(field)
+ if yamlTag == "" || yamlTag == "-" {
+ continue
+ }
+
+ ecosystemConfigs[typeName] = yamlTag
+ }
+
+ return ecosystemConfigs, nil
+}
+
+// findFilesWithCatalogerImports scans the options directory for .go files that import
+// from "github.com/anchore/syft/syft/pkg/cataloger/*" packages
+func findFilesWithCatalogerImports(optionsDir string) ([]string, error) {
+ entries, err := os.ReadDir(optionsDir)
+ if err != nil {
+ return nil, fmt.Errorf("failed to read options directory: %w", err)
+ }
+
+ var candidateFiles []string
+ for _, entry := range entries {
+ if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".go") {
+ continue
+ }
+
+ filePath := filepath.Join(optionsDir, entry.Name())
+
+ // parse the file to check imports
+ fset := token.NewFileSet()
+ f, err := parser.ParseFile(fset, filePath, nil, parser.ImportsOnly)
+ if err != nil {
+ continue // skip files that can't be parsed
+ }
+
+ // check if file imports from cataloger packages
+ for _, imp := range f.Imports {
+ importPath := strings.Trim(imp.Path.Value, `"`)
+ if strings.HasPrefix(importPath, "github.com/anchore/syft/syft/pkg/cataloger/") {
+ candidateFiles = append(candidateFiles, filePath)
+ break
+ }
+ }
+ }
+
+ return candidateFiles, nil
+}
+
+// extractConfigStructTypes parses a Go file and returns all struct type names defined in it
+func extractConfigStructTypes(filePath string) ([]string, error) {
+ fset := token.NewFileSet()
+ f, err := parser.ParseFile(fset, filePath, nil, 0)
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse %s: %w", filePath, err)
+ }
+
+ var structTypes []string
+ for _, decl := range f.Decls {
+ genDecl, ok := decl.(*ast.GenDecl)
+ if !ok || genDecl.Tok != token.TYPE {
+ continue
+ }
+
+ for _, spec := range genDecl.Specs {
+ typeSpec, ok := spec.(*ast.TypeSpec)
+ if !ok {
+ continue
+ }
+
+ // check if it's a struct type
+ if _, ok := typeSpec.Type.(*ast.StructType); ok {
+ structTypes = append(structTypes, typeSpec.Name.Name)
+ }
+ }
+ }
+
+ return structTypes, nil
+}
+
+// discoverCatalogerConfigs discovers cataloger config files by:
+// 1. Finding files with cataloger imports in options directory
+// 2. Extracting ecosystem config fields from Catalog struct
+// 3. Matching file structs against Catalog fields
+// Returns a map of file path to top-level YAML key
+func discoverCatalogerConfigs(repoRoot string) (map[string]string, error) {
+ optionsDir := filepath.Join(repoRoot, "cmd", "syft", "internal", "options")
+ catalogFilePath := filepath.Join(optionsDir, "catalog.go")
+
+ // get ecosystem config fields from Catalog struct
+ ecosystemConfigs, err := extractEcosystemConfigFieldsFromCatalog(catalogFilePath)
+ if err != nil {
+ return nil, err
+ }
+
+ if len(ecosystemConfigs) == 0 {
+ return nil, fmt.Errorf("no ecosystem config fields found in Catalog struct")
+ }
+
+ // find files with cataloger imports
+ candidateFiles, err := findFilesWithCatalogerImports(optionsDir)
+ if err != nil {
+ return nil, err
+ }
+
+ // match candidate files against Catalog ecosystem fields
+ fileToKey := make(map[string]string)
+ foundStructs := make(map[string]bool)
+
+ for _, filePath := range candidateFiles {
+ structTypes, err := extractConfigStructTypes(filePath)
+ if err != nil {
+ return nil, err
+ }
+
+ // check if any struct type matches an ecosystem config
+ for _, structType := range structTypes {
+ if yamlKey, exists := ecosystemConfigs[structType]; exists {
+ fileToKey[filePath] = yamlKey
+ foundStructs[structType] = true
+ break
+ }
+ }
+ }
+
+ // validate that all ecosystem configs were found
+ var missingConfigs []string
+ for structType := range ecosystemConfigs {
+ if !foundStructs[structType] {
+ missingConfigs = append(missingConfigs, structType)
+ }
+ }
+
+ if len(missingConfigs) > 0 {
+ sort.Strings(missingConfigs)
+ return nil, fmt.Errorf("could not find files for ecosystem configs: %s", strings.Join(missingConfigs, ", "))
+ }
+
+ return fileToKey, nil
+}
+
// DiscoverAppConfigs discovers all application-level cataloger configuration fields
// from the options package
func DiscoverAppConfigs(repoRoot string) ([]AppConfigField, error) {
- optionsDir := filepath.Join(repoRoot, "cmd", "syft", "internal", "options")
-
- // parse all .go files in the options directory to extract configuration fields
- configs := []AppConfigField{}
-
- // define the config files we want to parse with their top-level keys
- configFiles := map[string]string{
- "dotnet.go": "dotnet",
- "golang.go": "golang",
- "java.go": "java",
- "javascript.go": "javascript",
- "linux_kernel.go": "linux-kernel",
- "nix.go": "nix",
- "python.go": "python",
+ // discover cataloger config files dynamically
+ configFiles, err := discoverCatalogerConfigs(repoRoot)
+ if err != nil {
+ return nil, fmt.Errorf("failed to discover cataloger configs: %w", err)
}
- for filename, topLevelKey := range configFiles {
- filePath := filepath.Join(optionsDir, filename)
+ // extract configuration fields from each discovered file
+ var configs []AppConfigField
+ for filePath, topLevelKey := range configFiles {
fields, err := extractAppConfigFields(filePath, topLevelKey)
if err != nil {
- return nil, fmt.Errorf("failed to extract config from %s: %w", filename, err)
+ return nil, fmt.Errorf("failed to extract config from %s: %w", filePath, err)
}
configs = append(configs, fields...)
}
diff --git a/internal/capabilities/generate/discover_app_config_test.go b/internal/capabilities/generate/discover_app_config_test.go
new file mode 100644
index 000000000..de353571f
--- /dev/null
+++ b/internal/capabilities/generate/discover_app_config_test.go
@@ -0,0 +1,413 @@
+package main
+
+import (
+ "go/ast"
+ "go/parser"
+ "go/token"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestDetermineExpectedConfigName(t *testing.T) {
+ tests := []struct {
+ name string
+ topLevelKey string
+ wantName string
+ }{
+ {
+ name: "linux-kernel special case",
+ topLevelKey: "linux-kernel",
+ wantName: "linuxKernelConfig",
+ },
+ {
+ name: "javascript special case",
+ topLevelKey: "javascript",
+ wantName: "javaScriptConfig",
+ },
+ {
+ name: "standard config golang",
+ topLevelKey: "golang",
+ wantName: "golangConfig",
+ },
+ {
+ name: "standard config python",
+ topLevelKey: "python",
+ wantName: "pythonConfig",
+ },
+ {
+ name: "standard config java",
+ topLevelKey: "java",
+ wantName: "javaConfig",
+ },
+ {
+ name: "standard config dotnet",
+ topLevelKey: "dotnet",
+ wantName: "dotnetConfig",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got := determineExpectedConfigName(tt.topLevelKey)
+ require.Equal(t, tt.wantName, got)
+ })
+ }
+}
+
+func TestCleanDescription(t *testing.T) {
+ tests := []struct {
+ name string
+ desc string
+ want string
+ }{
+ {
+ name: "single line no extra whitespace",
+ desc: "this is a description",
+ want: "this is a description",
+ },
+ {
+ name: "multiple spaces collapsed",
+ desc: "this has multiple spaces",
+ want: "this has multiple spaces",
+ },
+ {
+ name: "multi-line description",
+ desc: "this is a\nmulti-line\ndescription",
+ want: "this is a multi-line description",
+ },
+ {
+ name: "leading and trailing whitespace",
+ desc: " \t description with spaces \t ",
+ want: "description with spaces",
+ },
+ {
+ name: "tabs and newlines",
+ desc: "description\t\twith\n\ttabs",
+ want: "description with tabs",
+ },
+ {
+ name: "empty string",
+ desc: "",
+ want: "",
+ },
+ {
+ name: "only whitespace",
+ desc: " \n\t ",
+ want: "",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got := cleanDescription(tt.desc)
+ require.Equal(t, tt.want, got)
+ })
+ }
+}
+
+func TestExtractYAMLTag(t *testing.T) {
+ tests := []struct {
+ name string
+ tagStr string
+ want string
+ }{
+ {
+ name: "simple yaml tag",
+ tagStr: "`yaml:\"field-name\"`",
+ want: "field-name",
+ },
+ {
+ name: "yaml tag with omitempty",
+ tagStr: "`yaml:\"field-name,omitempty\"`",
+ want: "field-name",
+ },
+ {
+ name: "yaml tag with multiple options",
+ tagStr: "`yaml:\"field-name,omitempty,inline\"`",
+ want: "field-name",
+ },
+ {
+ name: "yaml tag dash means skip",
+ tagStr: "`yaml:\"-\"`",
+ want: "-",
+ },
+ {
+ name: "no yaml tag",
+ tagStr: "`json:\"field-name\"`",
+ want: "",
+ },
+ {
+ name: "empty tag",
+ tagStr: "",
+ want: "",
+ },
+ {
+ name: "yaml tag with json tag",
+ tagStr: "`yaml:\"yaml-name\" json:\"json-name\"`",
+ want: "yaml-name",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // construct a minimal ast.Field with the tag
+ field := &ast.Field{}
+ if tt.tagStr != "" {
+ field.Tag = &ast.BasicLit{
+ Kind: token.STRING,
+ Value: tt.tagStr,
+ }
+ }
+
+ got := extractYAMLTag(field)
+ require.Equal(t, tt.want, got)
+ })
+ }
+}
+
+func TestIsNestedStruct(t *testing.T) {
+ tests := []struct {
+ name string
+ expr ast.Expr
+ want bool
+ }{
+ {
+ name: "custom struct type",
+ expr: &ast.Ident{Name: "MainModuleVersion"},
+ want: true,
+ },
+ {
+ name: "string type",
+ expr: &ast.Ident{Name: "string"},
+ want: false,
+ },
+ {
+ name: "int type",
+ expr: &ast.Ident{Name: "int"},
+ want: false,
+ },
+ {
+ name: "bool type",
+ expr: &ast.Ident{Name: "bool"},
+ want: false,
+ },
+ {
+ name: "pointer type",
+ expr: &ast.StarExpr{X: &ast.Ident{Name: "Config"}},
+ want: false,
+ },
+ {
+ name: "array type",
+ expr: &ast.ArrayType{Elt: &ast.Ident{Name: "string"}},
+ want: false,
+ },
+ {
+ name: "map type",
+ expr: &ast.MapType{
+ Key: &ast.Ident{Name: "string"},
+ Value: &ast.Ident{Name: "string"},
+ },
+ want: false,
+ },
+ {
+ name: "int32 type",
+ expr: &ast.Ident{Name: "int32"},
+ want: false,
+ },
+ {
+ name: "uint64 type",
+ expr: &ast.Ident{Name: "uint64"},
+ want: false,
+ },
+ {
+ name: "float64 type",
+ expr: &ast.Ident{Name: "float64"},
+ want: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got := isNestedStruct(tt.expr)
+ require.Equal(t, tt.want, got)
+ })
+ }
+}
+
+func TestExtractStringLiteral(t *testing.T) {
+ tests := []struct {
+ name string
+ expr ast.Expr
+ want string
+ }{
+ {
+ name: "double quoted string",
+ expr: &ast.BasicLit{
+ Kind: token.STRING,
+ Value: `"hello world"`,
+ },
+ want: "hello world",
+ },
+ {
+ name: "backtick string",
+ expr: &ast.BasicLit{
+ Kind: token.STRING,
+ Value: "`hello world`",
+ },
+ want: "hello world",
+ },
+ {
+ name: "empty string",
+ expr: &ast.BasicLit{
+ Kind: token.STRING,
+ Value: `""`,
+ },
+ want: "",
+ },
+ {
+ name: "string with spaces",
+ expr: &ast.BasicLit{
+ Kind: token.STRING,
+ Value: `" spaces "`,
+ },
+ want: " spaces ",
+ },
+ {
+ name: "not a string literal (int)",
+ expr: &ast.BasicLit{
+ Kind: token.INT,
+ Value: "42",
+ },
+ want: "",
+ },
+ {
+ name: "not a basic lit",
+ expr: &ast.Ident{Name: "someVar"},
+ want: "",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got := extractStringLiteral(tt.expr)
+ require.Equal(t, tt.want, got)
+ })
+ }
+}
+
+func TestExtractFieldPathFromRef(t *testing.T) {
+ tests := []struct {
+ name string
+ src string
+ want string
+ }{
+ {
+ name: "simple field reference",
+ src: "&o.Field",
+ want: "Field",
+ },
+ {
+ name: "nested field reference",
+ src: "&o.Parent.Field",
+ want: "Parent.Field",
+ },
+ {
+ name: "deeply nested field reference",
+ src: "&o.MainModuleVersion.FromLDFlags",
+ want: "MainModuleVersion.FromLDFlags",
+ },
+ {
+ name: "three levels deep",
+ src: "&o.Level1.Level2.Level3",
+ want: "Level1.Level2.Level3",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // parse the expression
+ expr, err := parser.ParseExpr(tt.src)
+ require.NoError(t, err)
+
+ got := extractFieldPathFromRef(expr)
+ require.Equal(t, tt.want, got)
+ })
+ }
+}
+
+func TestExtractAppValue(t *testing.T) {
+ tests := []struct {
+ name string
+ src string
+ want interface{}
+ }{
+ {
+ name: "string literal",
+ src: `"hello"`,
+ want: "hello",
+ },
+ {
+ name: "int literal",
+ src: "42",
+ want: "42",
+ },
+ {
+ name: "float literal",
+ src: "3.14",
+ want: "3.14",
+ },
+ {
+ name: "bool true",
+ src: "true",
+ want: true,
+ },
+ {
+ name: "bool false",
+ src: "false",
+ want: false,
+ },
+ {
+ name: "nil value",
+ src: "nil",
+ want: nil,
+ },
+ {
+ name: "empty string",
+ src: `""`,
+ want: "",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // parse the expression
+ expr, err := parser.ParseExpr(tt.src)
+ require.NoError(t, err)
+
+ got := extractAppValue(expr)
+ require.Equal(t, tt.want, got)
+ })
+ }
+}
+
+func TestExtractAppValue_NestedStruct(t *testing.T) {
+ // test nested struct separately since it returns a map
+ src := `struct{Field1 string; Field2 bool}{Field1: "value", Field2: true}`
+
+ // parse as a composite literal
+ expr, err := parser.ParseExpr(src)
+ require.NoError(t, err)
+
+ // extract the composite literal
+ compositeLit, ok := expr.(*ast.CompositeLit)
+ require.True(t, ok)
+
+ got := extractAppValue(compositeLit)
+
+ // verify it's a map with the expected values
+ gotMap, ok := got.(map[string]interface{})
+ require.True(t, ok)
+ require.Equal(t, "value", gotMap["Field1"])
+ require.Equal(t, true, gotMap["Field2"])
+}
diff --git a/internal/capabilities/generate/config_discovery.go b/internal/capabilities/generate/discover_cataloger_configs.go
similarity index 98%
rename from internal/capabilities/generate/config_discovery.go
rename to internal/capabilities/generate/discover_cataloger_configs.go
index 14747ba4b..f1d6edf2f 100644
--- a/internal/capabilities/generate/config_discovery.go
+++ b/internal/capabilities/generate/discover_cataloger_configs.go
@@ -1,3 +1,4 @@
+// this file discovers cataloger configuration structs using AST parsing to find Config structs and extract fields with app-config annotations.
package main
import (
@@ -135,9 +136,7 @@ func discoverConfigsInFile(path, repoRoot string) (map[string]ConfigInfo, error)
// isConfigStruct determines if a struct name looks like a configuration struct
func isConfigStruct(name string) bool {
// check for common config patterns
- return strings.Contains(name, "Config") ||
- strings.HasSuffix(name, "Config") ||
- strings.HasPrefix(name, "Config")
+ return strings.Contains(name, "Config")
}
// extractCatalogerConfigFields parses struct fields and extracts their metadata
diff --git a/internal/capabilities/generate/discover_cataloger_configs_test.go b/internal/capabilities/generate/discover_cataloger_configs_test.go
new file mode 100644
index 000000000..fadcae887
--- /dev/null
+++ b/internal/capabilities/generate/discover_cataloger_configs_test.go
@@ -0,0 +1,455 @@
+package main
+
+import (
+ "go/ast"
+ "testing"
+
+ "github.com/google/go-cmp/cmp"
+ "github.com/stretchr/testify/require"
+)
+
+// expected config structs that should be discovered with app-config annotations
+var expectedCatalogConfigs = []string{
+ "golang.CatalogerConfig",
+ "golang.MainModuleVersionConfig",
+ "java.ArchiveCatalogerConfig",
+ "python.CatalogerConfig",
+ "dotnet.CatalogerConfig",
+ "kernel.LinuxKernelCatalogerConfig",
+ "javascript.CatalogerConfig",
+ "nix.Config",
+}
+
+func TestDiscoverConfigs(t *testing.T) {
+ repoRoot, err := RepoRoot()
+ require.NoError(t, err)
+
+ configs, err := DiscoverConfigs(repoRoot)
+ require.NoError(t, err)
+
+ // verify we discovered multiple config structs
+ require.NotEmpty(t, configs, "should discover at least one config struct")
+
+ // check for known config structs that have app-config annotations
+ for _, expected := range expectedCatalogConfigs {
+ config, ok := configs[expected]
+ require.True(t, ok, "should discover config: %s", expected)
+ require.NotEmpty(t, config.Fields, "config %s should have fields", expected)
+ require.Equal(t, expected, config.PackageName+"."+config.StructName)
+ }
+
+ // verify golang.CatalogerConfig structure
+ golangConfig := configs["golang.CatalogerConfig"]
+ wantGolangConfig := ConfigInfo{
+ PackageName: "golang",
+ StructName: "CatalogerConfig",
+ }
+ if diff := cmp.Diff(wantGolangConfig.PackageName, golangConfig.PackageName); diff != "" {
+ t.Errorf("golang.CatalogerConfig.PackageName mismatch (-want +got):\n%s", diff)
+ }
+ if diff := cmp.Diff(wantGolangConfig.StructName, golangConfig.StructName); diff != "" {
+ t.Errorf("golang.CatalogerConfig.StructName mismatch (-want +got):\n%s", diff)
+ }
+ require.NotEmpty(t, golangConfig.Fields)
+
+ // check for specific field
+ var foundSearchLocalModCache bool
+ for _, field := range golangConfig.Fields {
+ if field.Name == "SearchLocalModCacheLicenses" {
+ foundSearchLocalModCache = true
+ wantField := ConfigField{
+ Name: "SearchLocalModCacheLicenses",
+ Type: "bool",
+ AppKey: "golang.search-local-mod-cache-licenses",
+ }
+ if diff := cmp.Diff(wantField.Name, field.Name); diff != "" {
+ t.Errorf("SearchLocalModCacheLicenses field Name mismatch (-want +got):\n%s", diff)
+ }
+ if diff := cmp.Diff(wantField.Type, field.Type); diff != "" {
+ t.Errorf("SearchLocalModCacheLicenses field Type mismatch (-want +got):\n%s", diff)
+ }
+ if diff := cmp.Diff(wantField.AppKey, field.AppKey); diff != "" {
+ t.Errorf("SearchLocalModCacheLicenses field AppKey mismatch (-want +got):\n%s", diff)
+ }
+ require.NotEmpty(t, field.Description)
+ require.Contains(t, field.Description, "searching for go package licenses")
+ }
+ }
+ require.True(t, foundSearchLocalModCache, "should find SearchLocalModCacheLicenses field")
+
+ // verify nested config struct
+ golangMainModuleConfig := configs["golang.MainModuleVersionConfig"]
+ wantMainModuleConfig := ConfigInfo{
+ PackageName: "golang",
+ StructName: "MainModuleVersionConfig",
+ }
+ if diff := cmp.Diff(wantMainModuleConfig.PackageName, golangMainModuleConfig.PackageName); diff != "" {
+ t.Errorf("golang.MainModuleVersionConfig.PackageName mismatch (-want +got):\n%s", diff)
+ }
+ if diff := cmp.Diff(wantMainModuleConfig.StructName, golangMainModuleConfig.StructName); diff != "" {
+ t.Errorf("golang.MainModuleVersionConfig.StructName mismatch (-want +got):\n%s", diff)
+ }
+ require.NotEmpty(t, golangMainModuleConfig.Fields)
+
+ // check for specific nested field
+ var foundFromLDFlags bool
+ for _, field := range golangMainModuleConfig.Fields {
+ if field.Name == "FromLDFlags" {
+ foundFromLDFlags = true
+ wantField := ConfigField{
+ Name: "FromLDFlags",
+ Type: "bool",
+ AppKey: "golang.main-module-version.from-ld-flags",
+ }
+ if diff := cmp.Diff(wantField.Name, field.Name); diff != "" {
+ t.Errorf("FromLDFlags field Name mismatch (-want +got):\n%s", diff)
+ }
+ if diff := cmp.Diff(wantField.Type, field.Type); diff != "" {
+ t.Errorf("FromLDFlags field Type mismatch (-want +got):\n%s", diff)
+ }
+ if diff := cmp.Diff(wantField.AppKey, field.AppKey); diff != "" {
+ t.Errorf("FromLDFlags field AppKey mismatch (-want +got):\n%s", diff)
+ }
+ require.NotEmpty(t, field.Description)
+ }
+ }
+ require.True(t, foundFromLDFlags, "should find FromLDFlags field in MainModuleVersionConfig")
+
+ // print summary for manual inspection
+ t.Logf("Discovered %d config structs:", len(configs))
+ for key, config := range configs {
+ t.Logf(" %s: %d fields", key, len(config.Fields))
+ for _, field := range config.Fields {
+ t.Logf(" - %s (%s): %s", field.Name, field.Type, field.AppKey)
+ if diff := cmp.Diff("", field.Description); diff == "" {
+ t.Logf(" WARNING: field %s has no description", field.Name)
+ }
+ }
+ }
+}
+
+func TestExtractPackageNameFromPath(t *testing.T) {
+ tests := []struct {
+ name string
+ filePath string
+ want string
+ }{
+ {
+ name: "golang package",
+ filePath: "syft/pkg/cataloger/golang/config.go",
+ want: "golang",
+ },
+ {
+ name: "java package",
+ filePath: "syft/pkg/cataloger/java/config.go",
+ want: "java",
+ },
+ {
+ name: "python cataloger",
+ filePath: "syft/pkg/cataloger/python/cataloger.go",
+ want: "python",
+ },
+ {
+ name: "kernel cataloger",
+ filePath: "syft/pkg/cataloger/kernel/cataloger.go",
+ want: "kernel",
+ },
+ {
+ name: "binary classifier",
+ filePath: "syft/pkg/cataloger/binary/classifier_cataloger.go",
+ want: "binary",
+ },
+ {
+ name: "not a cataloger path",
+ filePath: "syft/pkg/other/file.go",
+ want: "",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got := extractPackageNameFromPath(tt.filePath)
+ require.Equal(t, tt.want, got)
+ })
+ }
+}
+
+func TestFormatFieldType(t *testing.T) {
+ tests := []struct {
+ name string
+ expr ast.Expr
+ want string
+ }{
+ {
+ name: "basic identifier - string",
+ expr: &ast.Ident{Name: "string"},
+ want: "string",
+ },
+ {
+ name: "basic identifier - bool",
+ expr: &ast.Ident{Name: "bool"},
+ want: "bool",
+ },
+ {
+ name: "basic identifier - int",
+ expr: &ast.Ident{Name: "int"},
+ want: "int",
+ },
+ {
+ name: "selector expression - package.Type",
+ expr: &ast.SelectorExpr{
+ X: &ast.Ident{Name: "time"},
+ Sel: &ast.Ident{Name: "Time"},
+ },
+ want: "time.Time",
+ },
+ {
+ name: "selector expression - cataloging.Config",
+ expr: &ast.SelectorExpr{
+ X: &ast.Ident{Name: "cataloging"},
+ Sel: &ast.Ident{Name: "ArchiveSearchConfig"},
+ },
+ want: "cataloging.ArchiveSearchConfig",
+ },
+ {
+ name: "array of strings",
+ expr: &ast.ArrayType{
+ Elt: &ast.Ident{Name: "string"},
+ },
+ want: "[]string",
+ },
+ {
+ name: "array of ints",
+ expr: &ast.ArrayType{
+ Elt: &ast.Ident{Name: "int"},
+ },
+ want: "[]int",
+ },
+ {
+ name: "map[string]bool",
+ expr: &ast.MapType{
+ Key: &ast.Ident{Name: "string"},
+ Value: &ast.Ident{Name: "bool"},
+ },
+ want: "map[string]bool",
+ },
+ {
+ name: "map[string]int",
+ expr: &ast.MapType{
+ Key: &ast.Ident{Name: "string"},
+ Value: &ast.Ident{Name: "int"},
+ },
+ want: "map[string]int",
+ },
+ {
+ name: "pointer to type",
+ expr: &ast.StarExpr{
+ X: &ast.Ident{Name: "Config"},
+ },
+ want: "*Config",
+ },
+ {
+ name: "pointer to selector",
+ expr: &ast.StarExpr{
+ X: &ast.SelectorExpr{
+ X: &ast.Ident{Name: "time"},
+ Sel: &ast.Ident{Name: "Time"},
+ },
+ },
+ want: "*time.Time",
+ },
+ {
+ name: "interface{}",
+ expr: &ast.InterfaceType{
+ Methods: &ast.FieldList{},
+ },
+ want: "interface{}",
+ },
+ {
+ name: "nested array of arrays",
+ expr: &ast.ArrayType{
+ Elt: &ast.ArrayType{
+ Elt: &ast.Ident{Name: "string"},
+ },
+ },
+ want: "[][]string",
+ },
+ {
+ name: "map with array value",
+ expr: &ast.MapType{
+ Key: &ast.Ident{Name: "string"},
+ Value: &ast.ArrayType{
+ Elt: &ast.Ident{Name: "int"},
+ },
+ },
+ want: "map[string][]int",
+ },
+ {
+ name: "pointer to array",
+ expr: &ast.StarExpr{
+ X: &ast.ArrayType{
+ Elt: &ast.Ident{Name: "string"},
+ },
+ },
+ want: "*[]string",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got := formatFieldType(tt.expr)
+ require.Equal(t, tt.want, got)
+ })
+ }
+}
+
+func TestExtractFieldComments(t *testing.T) {
+ tests := []struct {
+ name string
+ commentGroup *ast.CommentGroup
+ wantDescription string
+ wantAppKey string
+ }{
+ {
+ name: "nil comment group",
+ commentGroup: nil,
+ wantDescription: "",
+ wantAppKey: "",
+ },
+ {
+ name: "empty comment group",
+ commentGroup: &ast.CommentGroup{
+ List: []*ast.Comment{},
+ },
+ wantDescription: "",
+ wantAppKey: "",
+ },
+ {
+ name: "app-config annotation only",
+ commentGroup: &ast.CommentGroup{
+ List: []*ast.Comment{
+ {Text: "// app-config: golang.search-local-mod-cache-licenses"},
+ },
+ },
+ wantDescription: "",
+ wantAppKey: "golang.search-local-mod-cache-licenses",
+ },
+ {
+ name: "description only",
+ commentGroup: &ast.CommentGroup{
+ List: []*ast.Comment{
+ {Text: "// enable searching for go package licenses in the local mod cache"},
+ },
+ },
+ wantDescription: "enable searching for go package licenses in the local mod cache",
+ wantAppKey: "",
+ },
+ {
+ name: "description and app-config",
+ commentGroup: &ast.CommentGroup{
+ List: []*ast.Comment{
+ {Text: "// enable searching for go package licenses in the local mod cache"},
+ {Text: "// app-config: golang.search-local-mod-cache-licenses"},
+ },
+ },
+ wantDescription: "enable searching for go package licenses in the local mod cache",
+ wantAppKey: "golang.search-local-mod-cache-licenses",
+ },
+ {
+ name: "app-config before description",
+ commentGroup: &ast.CommentGroup{
+ List: []*ast.Comment{
+ {Text: "// app-config: golang.search-local-mod-cache-licenses"},
+ {Text: "// enable searching for go package licenses in the local mod cache"},
+ },
+ },
+ wantDescription: "enable searching for go package licenses in the local mod cache",
+ wantAppKey: "golang.search-local-mod-cache-licenses",
+ },
+ {
+ name: "multi-line description",
+ commentGroup: &ast.CommentGroup{
+ List: []*ast.Comment{
+ {Text: "// this is the first line of the description."},
+ {Text: "// this is the second line of the description."},
+ {Text: "// app-config: test.multi-line"},
+ },
+ },
+ wantDescription: "this is the first line of the description. this is the second line of the description.",
+ wantAppKey: "test.multi-line",
+ },
+ {
+ name: "app-config with extra whitespace",
+ commentGroup: &ast.CommentGroup{
+ List: []*ast.Comment{
+ {Text: "// app-config: golang.test-key "},
+ },
+ },
+ wantDescription: "",
+ wantAppKey: "golang.test-key",
+ },
+ {
+ name: "description with special characters",
+ commentGroup: &ast.CommentGroup{
+ List: []*ast.Comment{
+ {Text: "// enable searching for Go's package licenses (*.mod files)"},
+ {Text: "// app-config: golang.search"},
+ },
+ },
+ wantDescription: "enable searching for Go's package licenses (*.mod files)",
+ wantAppKey: "golang.search",
+ },
+ {
+ name: "comment with empty lines",
+ commentGroup: &ast.CommentGroup{
+ List: []*ast.Comment{
+ {Text: "// first line"},
+ {Text: "//"},
+ {Text: "// second line"},
+ {Text: "// app-config: test.key"},
+ },
+ },
+ wantDescription: "first line second line",
+ wantAppKey: "test.key",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ gotDescription, gotAppKey := extractFieldComments(tt.commentGroup)
+ require.Equal(t, tt.wantDescription, gotDescription)
+ require.Equal(t, tt.wantAppKey, gotAppKey)
+ })
+ }
+}
+
+func TestDiscoverAllowedConfigStructs(t *testing.T) {
+ repoRoot, err := RepoRoot()
+ require.NoError(t, err)
+
+ allowedConfigs, err := DiscoverAllowedConfigStructs(repoRoot)
+ require.NoError(t, err)
+
+ // verify we found multiple config types
+ require.NotEmpty(t, allowedConfigs, "should discover at least one allowed config type")
+
+ // verify specific config types that should be in pkgcataloging.Config
+ expectedConfigs := []string{
+ "golang.CatalogerConfig",
+ "java.ArchiveCatalogerConfig",
+ "python.CatalogerConfig",
+ "dotnet.CatalogerConfig",
+ "kernel.LinuxKernelCatalogerConfig",
+ "javascript.CatalogerConfig",
+ }
+
+ for _, expected := range expectedConfigs {
+ require.True(t, allowedConfigs[expected], "should find %s in allowed configs", expected)
+ }
+
+ // log all discovered configs for manual inspection
+ t.Logf("Discovered %d allowed config types:", len(allowedConfigs))
+ for configType := range allowedConfigs {
+ t.Logf(" - %s", configType)
+ }
+}
diff --git a/internal/capabilities/generate/discover.go b/internal/capabilities/generate/discover_catalogers.go
similarity index 98%
rename from internal/capabilities/generate/discover.go
rename to internal/capabilities/generate/discover_catalogers.go
index 111142133..f66618875 100644
--- a/internal/capabilities/generate/discover.go
+++ b/internal/capabilities/generate/discover_catalogers.go
@@ -1,3 +1,4 @@
+// this file discovers generic catalogers from source code by walking syft/pkg/cataloger/ and using AST parsing to find generic.NewCataloger() calls and extract parser information.
package main
import (
diff --git a/internal/capabilities/generate/discover_catalogers_test.go b/internal/capabilities/generate/discover_catalogers_test.go
new file mode 100644
index 000000000..0f0c5d605
--- /dev/null
+++ b/internal/capabilities/generate/discover_catalogers_test.go
@@ -0,0 +1,389 @@
+package main
+
+import (
+ "go/ast"
+ "go/parser"
+ "go/token"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+// test helper functions
+
+// parseFuncDecl parses a function declaration from a code string
+func parseFuncDecl(t *testing.T, code string) *ast.FuncDecl {
+ t.Helper()
+ fset := token.NewFileSet()
+ file, err := parser.ParseFile(fset, "", "package test\n"+code, 0)
+ require.NoError(t, err)
+ require.Len(t, file.Decls, 1, "expected exactly one declaration")
+ funcDecl, ok := file.Decls[0].(*ast.FuncDecl)
+ require.True(t, ok, "expected declaration to be a function")
+ return funcDecl
+}
+
+// parseCallExpr parses a call expression from a code string
+func parseCallExpr(t *testing.T, code string) *ast.CallExpr {
+ t.Helper()
+ expr, err := parser.ParseExpr(code)
+ require.NoError(t, err)
+ callExpr, ok := expr.(*ast.CallExpr)
+ require.True(t, ok, "expected expression to be a call expression")
+ return callExpr
+}
+
+// parseCompositeLit parses a composite literal from a code string
+func parseCompositeLit(t *testing.T, code string) *ast.CompositeLit {
+ t.Helper()
+ expr, err := parser.ParseExpr(code)
+ require.NoError(t, err)
+ lit, ok := expr.(*ast.CompositeLit)
+ require.True(t, ok, "expected expression to be a composite literal")
+ return lit
+}
+
+// parseConstDecl parses a const declaration from a code string and returns the GenDecl
+func parseConstDecl(t *testing.T, code string) *ast.GenDecl {
+ t.Helper()
+ fset := token.NewFileSet()
+ file, err := parser.ParseFile(fset, "", "package test\n"+code, 0)
+ require.NoError(t, err)
+ require.Len(t, file.Decls, 1, "expected exactly one declaration")
+ genDecl, ok := file.Decls[0].(*ast.GenDecl)
+ require.True(t, ok, "expected declaration to be a general declaration")
+ return genDecl
+}
+
+func TestReturnsPackageCataloger(t *testing.T) {
+ tests := []struct {
+ name string
+ code string
+ want bool
+ }{
+ {
+ name: "returns pkg.Cataloger",
+ code: `func NewFoo() pkg.Cataloger { return nil }`,
+ want: true,
+ },
+ {
+ name: "returns bare Cataloger",
+ code: `func NewFoo() Cataloger { return nil }`,
+ want: true,
+ },
+ {
+ name: "returns multiple values",
+ code: `func NewFoo() (pkg.Cataloger, error) { return nil, nil }`,
+ want: false,
+ },
+ {
+ name: "returns error",
+ code: `func NewFoo() error { return nil }`,
+ want: false,
+ },
+ {
+ name: "returns pointer to Cataloger",
+ code: `func NewFoo() *pkg.Cataloger { return nil }`,
+ want: false,
+ },
+ {
+ name: "returns string",
+ code: `func NewFoo() string { return "" }`,
+ want: false,
+ },
+ {
+ name: "no return type",
+ code: `func NewFoo() { }`,
+ want: false,
+ },
+ {
+ name: "returns wrong package Cataloger",
+ code: `func NewFoo() other.Cataloger { return nil }`,
+ want: false,
+ },
+ {
+ name: "returns pkg.OtherType",
+ code: `func NewFoo() pkg.OtherType { return nil }`,
+ want: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ funcDecl := parseFuncDecl(t, tt.code)
+ got := returnsPackageCataloger(funcDecl)
+ require.Equal(t, tt.want, got)
+ })
+ }
+}
+
+func TestIsGenericNewCatalogerCall(t *testing.T) {
+ tests := []struct {
+ name string
+ code string
+ want bool
+ }{
+ {
+ name: "generic.NewCataloger call",
+ code: `generic.NewCataloger("foo")`,
+ want: true,
+ },
+ {
+ name: "generic.NewCataloger with no args",
+ code: `generic.NewCataloger()`,
+ want: true,
+ },
+ {
+ name: "other.NewCataloger call",
+ code: `other.NewCataloger("foo")`,
+ want: false,
+ },
+ {
+ name: "generic.OtherMethod call",
+ code: `generic.OtherMethod("foo")`,
+ want: false,
+ },
+ {
+ name: "bare NewCataloger call",
+ code: `NewCataloger("foo")`,
+ want: false,
+ },
+ {
+ name: "nested call",
+ code: `foo(generic.NewCataloger("bar"))`,
+ want: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ callExpr := parseCallExpr(t, tt.code)
+ got := isGenericNewCatalogerCall(callExpr)
+ require.Equal(t, tt.want, got)
+ })
+ }
+}
+
+func TestExtractStringSliceFromExpr(t *testing.T) {
+ tests := []struct {
+ name string
+ code string
+ want []string
+ }{
+ {
+ name: "strset.New with strings",
+ code: `strset.New([]string{"foo", "bar", "baz"})`,
+ want: []string{"foo", "bar", "baz"},
+ },
+ {
+ name: "strset.New with single string",
+ code: `strset.New([]string{"single"})`,
+ want: []string{"single"},
+ },
+ {
+ name: "strset.New with empty slice",
+ code: `strset.New([]string{})`,
+ want: nil,
+ },
+ {
+ name: "other.New with strings",
+ code: `other.New([]string{"x", "y"})`,
+ want: []string{"x", "y"},
+ },
+ {
+ name: "call with no args",
+ code: `strset.New()`,
+ want: nil,
+ },
+ {
+ name: "call with non-composite-literal arg",
+ code: `strset.New("not a slice")`,
+ want: nil,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ callExpr := parseCallExpr(t, tt.code)
+ got := extractStringSliceFromExpr(callExpr)
+ require.Equal(t, tt.want, got)
+ })
+ }
+}
+
+func TestSearchConstInDecl(t *testing.T) {
+ tests := []struct {
+ name string
+ code string
+ constName string
+ want string
+ }{
+ {
+ name: "single const",
+ code: `const Foo = "bar"`,
+ constName: "Foo",
+ want: "bar",
+ },
+ {
+ name: "grouped consts - first",
+ code: `const (
+ Foo = "bar"
+ Baz = "qux"
+ )`,
+ constName: "Foo",
+ want: "bar",
+ },
+ {
+ name: "grouped consts - second",
+ code: `const (
+ Foo = "bar"
+ Baz = "qux"
+ )`,
+ constName: "Baz",
+ want: "qux",
+ },
+ {
+ name: "const not found",
+ code: `const Foo = "bar"`,
+ constName: "Missing",
+ want: "",
+ },
+ {
+ name: "var declaration instead of const",
+ code: `var Foo = "bar"`,
+ constName: "Foo",
+ want: "",
+ },
+ {
+ name: "const with non-string value",
+ code: `const Foo = 42`,
+ constName: "Foo",
+ want: "",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ genDecl := parseConstDecl(t, tt.code)
+ got := searchConstInDecl(genDecl, tt.constName)
+ require.Equal(t, tt.want, got)
+ })
+ }
+}
+
+func TestGetConstValue(t *testing.T) {
+ tests := []struct {
+ name string
+ code string
+ constName string
+ want string
+ }{
+ {
+ name: "single const match",
+ code: `const Foo = "bar"`,
+ constName: "Foo",
+ want: "bar",
+ },
+ {
+ name: "no match",
+ code: `const Foo = "bar"`,
+ constName: "NotFoo",
+ want: "",
+ },
+ {
+ name: "non-string literal",
+ code: `const Foo = 123`,
+ constName: "Foo",
+ want: "",
+ },
+ {
+ name: "const with complex value",
+ code: `const Foo = Bar + "suffix"`,
+ constName: "Foo",
+ want: "",
+ },
+ {
+ name: "first of multiple in same spec",
+ code: `const Foo, Bar = "baz", "qux"`,
+ constName: "Foo",
+ want: "baz",
+ },
+ {
+ name: "second of multiple in same spec",
+ code: `const Foo, Bar = "baz", "qux"`,
+ constName: "Bar",
+ want: "qux",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ genDecl := parseConstDecl(t, tt.code)
+ require.Equal(t, token.CONST, genDecl.Tok)
+ require.NotEmpty(t, genDecl.Specs)
+
+ // getConstValue works on a single ValueSpec, so we need to find the right one
+ // in case of grouped constants, each const is its own spec
+ var got string
+ for _, spec := range genDecl.Specs {
+ valueSpec, ok := spec.(*ast.ValueSpec)
+ require.True(t, ok)
+
+ got = getConstValue(valueSpec, tt.constName)
+ if got != "" {
+ break
+ }
+ }
+
+ require.Equal(t, tt.want, got)
+ })
+ }
+}
+
+func TestResolveImportPath(t *testing.T) {
+ const testRepoRoot = "/repo/root"
+
+ tests := []struct {
+ name string
+ importPath string
+ want string
+ }{
+ {
+ name: "syft pkg cataloger golang",
+ importPath: "github.com/anchore/syft/syft/pkg/cataloger/golang",
+ want: "/repo/root/syft/pkg/cataloger/golang",
+ },
+ {
+ name: "syft internal capabilities",
+ importPath: "github.com/anchore/syft/internal/capabilities",
+ want: "/repo/root/internal/capabilities",
+ },
+ {
+ name: "syft root package",
+ importPath: "github.com/anchore/syft/syft",
+ want: "/repo/root/syft",
+ },
+ {
+ name: "external package",
+ importPath: "github.com/other/repo/pkg",
+ want: "",
+ },
+ {
+ name: "standard library",
+ importPath: "fmt",
+ want: "",
+ },
+ {
+ name: "empty import path",
+ importPath: "",
+ want: "",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got := resolveImportPath(tt.importPath, testRepoRoot)
+ require.Equal(t, tt.want, got)
+ })
+ }
+}
diff --git a/internal/capabilities/generate/metadata_discovery.go b/internal/capabilities/generate/discover_metadata.go
similarity index 98%
rename from internal/capabilities/generate/metadata_discovery.go
rename to internal/capabilities/generate/discover_metadata.go
index 894501e69..759e6f7ce 100644
--- a/internal/capabilities/generate/metadata_discovery.go
+++ b/internal/capabilities/generate/discover_metadata.go
@@ -1,3 +1,4 @@
+// this file discovers metadata and package types by reading test-observations.json files generated by pkgtest helpers during test execution.
package main
import (
diff --git a/internal/capabilities/generate/discover_metadata_test.go b/internal/capabilities/generate/discover_metadata_test.go
new file mode 100644
index 000000000..26805943c
--- /dev/null
+++ b/internal/capabilities/generate/discover_metadata_test.go
@@ -0,0 +1,320 @@
+package main
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestApplyParserObservations(t *testing.T) {
+ tests := []struct {
+ name string
+ cataloger DiscoveredCataloger
+ index *TestObservationIndex
+ wantFoundData bool
+ wantMetadataType string
+ wantPackageType string
+ }{
+ {
+ name: "parser observations applied to matching parser",
+ cataloger: DiscoveredCataloger{
+ Name: "test-cataloger",
+ PackageName: "testpkg",
+ Parsers: []DiscoveredParser{
+ {ParserFunction: "parseTestFile"},
+ },
+ },
+ index: func() *TestObservationIndex {
+ idx := newTestObservationIndex()
+ idx.setParserObservations("testpkg", "parseTestFile", &TypeObservation{
+ MetadataTypes: []string{"pkg.TestMetadata"},
+ PackageTypes: []string{"test-type"},
+ })
+ return idx
+ }(),
+ wantFoundData: true,
+ wantMetadataType: "pkg.TestMetadata",
+ wantPackageType: "test-type",
+ },
+ {
+ name: "no observations found for parser",
+ cataloger: DiscoveredCataloger{
+ Name: "test-cataloger",
+ PackageName: "testpkg",
+ Parsers: []DiscoveredParser{
+ {ParserFunction: "parseOtherFile"},
+ },
+ },
+ index: func() *TestObservationIndex {
+ idx := newTestObservationIndex()
+ idx.setParserObservations("testpkg", "parseTestFile", &TypeObservation{
+ MetadataTypes: []string{"pkg.TestMetadata"},
+ })
+ return idx
+ }(),
+ wantFoundData: false,
+ },
+ {
+ name: "multiple parsers with mixed observations",
+ cataloger: DiscoveredCataloger{
+ Name: "test-cataloger",
+ PackageName: "testpkg",
+ Parsers: []DiscoveredParser{
+ {ParserFunction: "parseFirst"},
+ {ParserFunction: "parseSecond"},
+ },
+ },
+ index: func() *TestObservationIndex {
+ idx := newTestObservationIndex()
+ idx.setParserObservations("testpkg", "parseFirst", &TypeObservation{
+ MetadataTypes: []string{"pkg.FirstMetadata"},
+ })
+ // parseSecond has no observations
+ return idx
+ }(),
+ wantFoundData: true,
+ wantMetadataType: "pkg.FirstMetadata",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ gotFoundData := applyParserObservations(&tt.cataloger, tt.index)
+ require.Equal(t, tt.wantFoundData, gotFoundData)
+
+ if tt.wantFoundData && tt.wantMetadataType != "" {
+ require.Contains(t, tt.cataloger.Parsers[0].MetadataTypes, tt.wantMetadataType)
+ }
+
+ if tt.wantFoundData && tt.wantPackageType != "" {
+ require.Contains(t, tt.cataloger.Parsers[0].PackageTypes, tt.wantPackageType)
+ }
+ })
+ }
+}
+
+func TestApplySingleParserCatalogerObservations(t *testing.T) {
+ tests := []struct {
+ name string
+ cataloger DiscoveredCataloger
+ catalogerObs *TypeObservation
+ wantFoundData bool
+ wantMetadataType []string
+ wantPackageType []string
+ }{
+ {
+ name: "cataloger-level observations applied to single parser",
+ cataloger: DiscoveredCataloger{
+ Name: "single-parser-cataloger",
+ Parsers: []DiscoveredParser{
+ {ParserFunction: "parseSingle"},
+ },
+ },
+ catalogerObs: &TypeObservation{
+ MetadataTypes: []string{"pkg.CatalogerMetadata"},
+ PackageTypes: []string{"cataloger-type"},
+ },
+ wantFoundData: true,
+ wantMetadataType: []string{"pkg.CatalogerMetadata"},
+ wantPackageType: []string{"cataloger-type"},
+ },
+ {
+ name: "cataloger-level merges with existing parser-level observations",
+ cataloger: DiscoveredCataloger{
+ Name: "single-parser-cataloger",
+ Parsers: []DiscoveredParser{
+ {
+ ParserFunction: "parseSingle",
+ MetadataTypes: []string{"pkg.ParserMetadata"},
+ PackageTypes: []string{"parser-type"},
+ },
+ },
+ },
+ catalogerObs: &TypeObservation{
+ MetadataTypes: []string{"pkg.CatalogerMetadata"},
+ PackageTypes: []string{"cataloger-type"},
+ },
+ wantFoundData: true,
+ wantMetadataType: []string{"pkg.CatalogerMetadata", "pkg.ParserMetadata"},
+ wantPackageType: []string{"cataloger-type", "parser-type"},
+ },
+ {
+ name: "empty cataloger observations",
+ cataloger: DiscoveredCataloger{
+ Name: "single-parser-cataloger",
+ Parsers: []DiscoveredParser{
+ {ParserFunction: "parseSingle"},
+ },
+ },
+ catalogerObs: &TypeObservation{},
+ wantFoundData: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ gotFoundData := applySingleParserCatalogerObservations(&tt.cataloger, tt.catalogerObs)
+ require.Equal(t, tt.wantFoundData, gotFoundData)
+
+ if tt.wantFoundData {
+ if len(tt.wantMetadataType) > 0 {
+ require.ElementsMatch(t, tt.wantMetadataType, tt.cataloger.Parsers[0].MetadataTypes)
+ }
+ if len(tt.wantPackageType) > 0 {
+ require.ElementsMatch(t, tt.wantPackageType, tt.cataloger.Parsers[0].PackageTypes)
+ }
+ }
+ })
+ }
+}
+
+func TestApplyMultiParserCatalogerObservations(t *testing.T) {
+ tests := []struct {
+ name string
+ cataloger DiscoveredCataloger
+ catalogerObs *TypeObservation
+ wantFoundData bool
+ // expectations for each parser by index
+ wantParser0HasMetadata bool
+ wantParser1HasMetadata bool
+ }{
+ {
+ name: "all parsers without data - cataloger-level applied to all",
+ cataloger: DiscoveredCataloger{
+ Name: "multi-parser-cataloger",
+ Parsers: []DiscoveredParser{
+ {ParserFunction: "parseFirst"},
+ {ParserFunction: "parseSecond"},
+ },
+ },
+ catalogerObs: &TypeObservation{
+ MetadataTypes: []string{"pkg.SharedMetadata"},
+ PackageTypes: []string{"shared-type"},
+ },
+ wantFoundData: true,
+ wantParser0HasMetadata: true,
+ wantParser1HasMetadata: true,
+ },
+ {
+ name: "some parsers have data - cataloger-level only fills gaps",
+ cataloger: DiscoveredCataloger{
+ Name: "multi-parser-cataloger",
+ Parsers: []DiscoveredParser{
+ {
+ ParserFunction: "parseFirst",
+ MetadataTypes: []string{"pkg.FirstMetadata"},
+ },
+ {ParserFunction: "parseSecond"}, // no data
+ },
+ },
+ catalogerObs: &TypeObservation{
+ MetadataTypes: []string{"pkg.SharedMetadata"},
+ },
+ wantFoundData: true,
+ wantParser0HasMetadata: false, // already has data, not overwritten
+ wantParser1HasMetadata: true, // gets cataloger-level data
+ },
+ {
+ name: "all parsers have data - cataloger-level not applied",
+ cataloger: DiscoveredCataloger{
+ Name: "multi-parser-cataloger",
+ Parsers: []DiscoveredParser{
+ {
+ ParserFunction: "parseFirst",
+ MetadataTypes: []string{"pkg.FirstMetadata"},
+ },
+ {
+ ParserFunction: "parseSecond",
+ MetadataTypes: []string{"pkg.SecondMetadata"},
+ },
+ },
+ },
+ catalogerObs: &TypeObservation{
+ MetadataTypes: []string{"pkg.SharedMetadata"},
+ },
+ wantFoundData: false,
+ wantParser0HasMetadata: false, // should not have shared metadata
+ wantParser1HasMetadata: false, // should not have shared metadata
+ },
+ {
+ name: "empty cataloger observations",
+ cataloger: DiscoveredCataloger{
+ Name: "multi-parser-cataloger",
+ Parsers: []DiscoveredParser{
+ {ParserFunction: "parseFirst"},
+ {ParserFunction: "parseSecond"},
+ },
+ },
+ catalogerObs: &TypeObservation{},
+ wantFoundData: false,
+ wantParser0HasMetadata: false,
+ wantParser1HasMetadata: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ gotFoundData := applyMultiParserCatalogerObservations(&tt.cataloger, tt.catalogerObs)
+ require.Equal(t, tt.wantFoundData, gotFoundData)
+
+ if tt.wantParser0HasMetadata {
+ require.Contains(t, tt.cataloger.Parsers[0].MetadataTypes, "pkg.SharedMetadata",
+ "parser 0 should have shared metadata")
+ } else if len(tt.catalogerObs.MetadataTypes) > 0 {
+ // if cataloger has metadata but we don't expect it in parser 0, verify it's not there
+ require.NotContains(t, tt.cataloger.Parsers[0].MetadataTypes, "pkg.SharedMetadata",
+ "parser 0 should not have shared metadata")
+ }
+
+ if tt.wantParser1HasMetadata {
+ require.Contains(t, tt.cataloger.Parsers[1].MetadataTypes, "pkg.SharedMetadata",
+ "parser 1 should have shared metadata")
+ } else if len(tt.catalogerObs.MetadataTypes) > 0 {
+ // if cataloger has metadata but we don't expect it in parser 1, verify it's not there
+ require.NotContains(t, tt.cataloger.Parsers[1].MetadataTypes, "pkg.SharedMetadata",
+ "parser 1 should not have shared metadata")
+ }
+ })
+ }
+}
+
+func TestMergeAndDeduplicateStrings(t *testing.T) {
+ tests := []struct {
+ name string
+ existing []string
+ additional []string
+ want []string
+ }{
+ {
+ name: "merge with duplicates",
+ existing: []string{"a", "b"},
+ additional: []string{"b", "c"},
+ want: []string{"a", "b", "c"},
+ },
+ {
+ name: "empty existing",
+ existing: []string{},
+ additional: []string{"a", "b"},
+ want: []string{"a", "b"},
+ },
+ {
+ name: "empty additional",
+ existing: []string{"a", "b"},
+ additional: []string{},
+ want: []string{"a", "b"},
+ },
+ {
+ name: "both empty",
+ existing: []string{},
+ additional: []string{},
+ want: []string{},
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got := mergeAndDeduplicateStrings(tt.existing, tt.additional)
+ require.ElementsMatch(t, tt.want, got)
+ })
+ }
+}
diff --git a/internal/capabilities/generate/io.go b/internal/capabilities/generate/io.go
index 8b96e55f6..005c5ba0a 100644
--- a/internal/capabilities/generate/io.go
+++ b/internal/capabilities/generate/io.go
@@ -1,3 +1,4 @@
+// this file handles YAML file reading and writing with comment preservation, using gopkg.in/yaml.v3's node tree to maintain all existing comments during regeneration.
package main
import (
diff --git a/internal/capabilities/generate/io_test.go b/internal/capabilities/generate/io_test.go
new file mode 100644
index 000000000..8375647e9
--- /dev/null
+++ b/internal/capabilities/generate/io_test.go
@@ -0,0 +1,553 @@
+package main
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/require"
+ "gopkg.in/yaml.v3"
+)
+
+func TestFindSectionNode(t *testing.T) {
+ tests := []struct {
+ name string
+ yamlContent string
+ sectionName string
+ wantFound bool
+ wantValue string // expected value for scalar nodes
+ }{
+ {
+ name: "finds existing configs section",
+ yamlContent: `
+configs:
+ key: value
+catalogers:
+ - name: test
+`,
+ sectionName: "configs",
+ wantFound: true,
+ },
+ {
+ name: "finds existing catalogers section",
+ yamlContent: `
+configs:
+ key: value
+catalogers:
+ - name: test
+`,
+ sectionName: "catalogers",
+ wantFound: true,
+ },
+ {
+ name: "returns nil for non-existent section",
+ yamlContent: `
+configs:
+ key: value
+`,
+ sectionName: "nonexistent",
+ wantFound: false,
+ },
+ {
+ name: "handles empty mapping",
+ yamlContent: `{}`,
+ sectionName: "any",
+ wantFound: false,
+ },
+ {
+ name: "finds section with scalar value",
+ yamlContent: `
+name: test-cataloger
+type: custom
+`,
+ sectionName: "name",
+ wantFound: true,
+ wantValue: "test-cataloger",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ var rootNode yaml.Node
+ err := yaml.Unmarshal([]byte(tt.yamlContent), &rootNode)
+ require.NoError(t, err)
+
+ // get the mapping node
+ var mappingNode *yaml.Node
+ if rootNode.Kind == yaml.DocumentNode && len(rootNode.Content) > 0 {
+ mappingNode = rootNode.Content[0]
+ } else {
+ mappingNode = &rootNode
+ }
+
+ got := findSectionNode(mappingNode, tt.sectionName)
+
+ if tt.wantFound {
+ require.NotNil(t, got)
+ if tt.wantValue != "" {
+ require.Equal(t, tt.wantValue, got.Value)
+ }
+ } else {
+ require.Nil(t, got)
+ }
+ })
+ }
+}
+
+func TestFindFieldValue(t *testing.T) {
+ tests := []struct {
+ name string
+ yamlContent string
+ fieldName string
+ want string
+ }{
+ {
+ name: "finds simple string field",
+ yamlContent: `
+name: test-cataloger
+type: custom
+`,
+ fieldName: "name",
+ want: "test-cataloger",
+ },
+ {
+ name: "finds type field",
+ yamlContent: `
+name: test-cataloger
+type: generic
+`,
+ fieldName: "type",
+ want: "generic",
+ },
+ {
+ name: "returns empty for non-existent field",
+ yamlContent: `
+name: test-cataloger
+`,
+ fieldName: "nonexistent",
+ want: "",
+ },
+ {
+ name: "finds parser_function field",
+ yamlContent: `
+parser_function: parseGoMod
+metadata_types: [GoModMetadata]
+`,
+ fieldName: "parser_function",
+ want: "parseGoMod",
+ },
+ {
+ name: "handles empty mapping",
+ yamlContent: `{}`,
+ fieldName: "any",
+ want: "",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ var rootNode yaml.Node
+ err := yaml.Unmarshal([]byte(tt.yamlContent), &rootNode)
+ require.NoError(t, err)
+
+ // get the mapping node
+ var mappingNode *yaml.Node
+ if rootNode.Kind == yaml.DocumentNode && len(rootNode.Content) > 0 {
+ mappingNode = rootNode.Content[0]
+ } else {
+ mappingNode = &rootNode
+ }
+
+ got := findFieldValue(mappingNode, tt.fieldName)
+ require.Equal(t, tt.want, got)
+ })
+ }
+}
+
+func TestPreserveMappingNodeComments(t *testing.T) {
+ tests := []struct {
+ name string
+ checkField string
+ wantHeadComment string
+ wantLineComment string
+ }{
+ {
+ name: "preserves line comment on field",
+ checkField: "name",
+ wantLineComment: "AUTO-GENERATED",
+ },
+ {
+ name: "preserves head comment on field",
+ checkField: "type",
+ wantHeadComment: "Important field",
+ wantLineComment: "AUTO-GENERATED",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // manually construct nodes with comments
+ existingMapping := &yaml.Node{
+ Kind: yaml.MappingNode,
+ Content: []*yaml.Node{
+ {Kind: yaml.ScalarNode, Value: "name", LineComment: "AUTO-GENERATED"},
+ {Kind: yaml.ScalarNode, Value: "test", HeadComment: "value comment"},
+ {Kind: yaml.ScalarNode, Value: "type", HeadComment: "Important field", LineComment: "AUTO-GENERATED"},
+ {Kind: yaml.ScalarNode, Value: "custom"},
+ },
+ }
+
+ newMapping := &yaml.Node{
+ Kind: yaml.MappingNode,
+ Content: []*yaml.Node{
+ {Kind: yaml.ScalarNode, Value: "name"},
+ {Kind: yaml.ScalarNode, Value: "test-new"},
+ {Kind: yaml.ScalarNode, Value: "type"},
+ {Kind: yaml.ScalarNode, Value: "generic"},
+ },
+ }
+
+ preserveMappingNodeComments(existingMapping, newMapping)
+
+ // find the field we're checking
+ keyNode, valueNode := findFieldNodes(newMapping, tt.checkField)
+ require.NotNil(t, keyNode, "field %s not found", tt.checkField)
+
+ // check comments were preserved
+ if tt.wantHeadComment != "" {
+ require.Equal(t, tt.wantHeadComment, keyNode.HeadComment)
+ }
+ if tt.wantLineComment != "" {
+ require.Equal(t, tt.wantLineComment, keyNode.LineComment)
+ }
+
+ // verify that value node comments are also preserved
+ if tt.checkField == "name" {
+ require.Equal(t, "value comment", valueNode.HeadComment)
+ }
+ })
+ }
+}
+
+func TestPreserveSequenceNodeComments(t *testing.T) {
+ tests := []struct {
+ name string
+ existingYAML string
+ newYAML string
+ wantHeadComment string
+ }{
+ {
+ name: "preserves parser comments by parser_function",
+ existingYAML: `
+- parser_function: parseGoMod # old parser
+ metadata_types: [GoModMetadata]
+- parser_function: parseGoSum
+ metadata_types: [GoSumMetadata]
+`,
+ newYAML: `
+- parser_function: parseGoMod
+ metadata_types: [GoModMetadataNew]
+- parser_function: parseGoSum
+ metadata_types: [GoSumMetadataNew]
+`,
+ // we'll verify in the test body that comments are preserved
+ },
+ {
+ name: "handles new parsers not in existing",
+ existingYAML: `
+- parser_function: parseGoMod
+ metadata_types: [GoModMetadata]
+`,
+ newYAML: `
+- parser_function: parseGoMod
+ metadata_types: [GoModMetadata]
+- parser_function: parseGoSum
+ metadata_types: [GoSumMetadata]
+`,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ var existingNode, newNode yaml.Node
+ err := yaml.Unmarshal([]byte(tt.existingYAML), &existingNode)
+ require.NoError(t, err)
+ err = yaml.Unmarshal([]byte(tt.newYAML), &newNode)
+ require.NoError(t, err)
+
+ // get sequence nodes
+ existingSeq := getSequenceNode(&existingNode)
+ newSeq := getSequenceNode(&newNode)
+
+ preserveSequenceNodeComments(existingSeq, newSeq)
+
+ // verify that the function ran without panicking
+ require.NotNil(t, newSeq)
+ })
+ }
+}
+
+func TestPreserveFieldComments(t *testing.T) {
+ tests := []struct {
+ name string
+ existingYAML string
+ newYAML string
+ wantPreserve bool
+ }{
+ {
+ name: "preserves mapping node comments",
+ existingYAML: `
+name: test # AUTO-GENERATED
+type: custom
+`,
+ newYAML: `
+name: test-new
+type: custom
+`,
+ wantPreserve: true,
+ },
+ {
+ name: "handles kind mismatch gracefully",
+ existingYAML: `
+- item1
+- item2
+`,
+ newYAML: `
+name: test
+`,
+ wantPreserve: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ var existingNode, newNode yaml.Node
+ err := yaml.Unmarshal([]byte(tt.existingYAML), &existingNode)
+ require.NoError(t, err)
+ err = yaml.Unmarshal([]byte(tt.newYAML), &newNode)
+ require.NoError(t, err)
+
+ existingContent := getContentNode(&existingNode)
+ newContent := getContentNode(&newNode)
+
+ preserveFieldComments(existingContent, newContent)
+
+ // verify the function completed without panicking
+ require.NotNil(t, newContent)
+ })
+ }
+}
+
+func TestUpdateOrAddSection(t *testing.T) {
+ tests := []struct {
+ name string
+ existingYAML string
+ newYAML string
+ sectionName string
+ wantUpdated bool
+ wantAdded bool
+ }{
+ {
+ name: "updates existing section",
+ existingYAML: `
+configs:
+ old: value
+catalogers:
+ - name: test
+`,
+ newYAML: `
+configs:
+ new: value
+`,
+ sectionName: "configs",
+ wantUpdated: true,
+ },
+ {
+ name: "adds new section",
+ existingYAML: `
+catalogers:
+ - name: test
+`,
+ newYAML: `
+configs:
+ new: value
+`,
+ sectionName: "configs",
+ wantAdded: true,
+ },
+ {
+ name: "handles application section",
+ existingYAML: `
+catalogers:
+ - name: test
+`,
+ newYAML: `
+application:
+ key: value
+`,
+ sectionName: "application",
+ wantAdded: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ var existingNode, newNode yaml.Node
+ err := yaml.Unmarshal([]byte(tt.existingYAML), &existingNode)
+ require.NoError(t, err)
+ err = yaml.Unmarshal([]byte(tt.newYAML), &newNode)
+ require.NoError(t, err)
+
+ existingMapping := getMappingNode(&existingNode)
+ newMapping := getMappingNode(&newNode)
+
+ updateOrAddSection(existingMapping, newMapping, tt.sectionName)
+
+ // verify the section exists in the result
+ resultSection := findSectionNode(existingMapping, tt.sectionName)
+ require.NotNil(t, resultSection, "section %s should exist after update", tt.sectionName)
+ })
+ }
+}
+
+func TestAddCatalogerFieldComment(t *testing.T) {
+ tests := []struct {
+ name string
+ fieldName string
+ fieldValue string
+ catalogerName string
+ wantLineComment string
+ }{
+ {
+ name: "ecosystem is MANUAL",
+ fieldName: "ecosystem",
+ catalogerName: "test-cataloger",
+ wantLineComment: "MANUAL",
+ },
+ {
+ name: "name is AUTO-GENERATED",
+ fieldName: "name",
+ catalogerName: "test-cataloger",
+ wantLineComment: autoGeneratedComment,
+ },
+ {
+ name: "type is AUTO-GENERATED",
+ fieldName: "type",
+ catalogerName: "test-cataloger",
+ wantLineComment: autoGeneratedComment,
+ },
+ {
+ name: "source is AUTO-GENERATED",
+ fieldName: "source",
+ catalogerName: "test-cataloger",
+ wantLineComment: autoGeneratedComment,
+ },
+ {
+ name: "config is AUTO-GENERATED",
+ fieldName: "config",
+ catalogerName: "test-cataloger",
+ wantLineComment: autoGeneratedComment,
+ },
+ {
+ name: "selectors is AUTO-GENERATED",
+ fieldName: "selectors",
+ catalogerName: "test-cataloger",
+ wantLineComment: autoGeneratedComment,
+ },
+ {
+ name: "parsers is AUTO-GENERATED structure",
+ fieldName: "parsers",
+ catalogerName: "test-cataloger",
+ wantLineComment: "AUTO-GENERATED structure",
+ },
+ {
+ name: "detectors for binary-classifier-cataloger is AUTO-GENERATED",
+ fieldName: "detectors",
+ catalogerName: "binary-classifier-cataloger",
+ wantLineComment: autoGeneratedComment,
+ },
+ {
+ name: "detectors for other catalogers is MANUAL",
+ fieldName: "detectors",
+ catalogerName: "java-archive-cataloger",
+ wantLineComment: "MANUAL - edit detectors here",
+ },
+ {
+ name: "metadata_types is AUTO-GENERATED",
+ fieldName: "metadata_types",
+ catalogerName: "test-cataloger",
+ wantLineComment: autoGeneratedComment,
+ },
+ {
+ name: "package_types is AUTO-GENERATED",
+ fieldName: "package_types",
+ catalogerName: "test-cataloger",
+ wantLineComment: autoGeneratedComment,
+ },
+ {
+ name: "json_schema_types is AUTO-GENERATED",
+ fieldName: "json_schema_types",
+ catalogerName: "test-cataloger",
+ wantLineComment: autoGeneratedComment,
+ },
+ {
+ name: "capabilities is MANUAL",
+ fieldName: "capabilities",
+ catalogerName: "test-cataloger",
+ wantLineComment: "MANUAL - edit capabilities here",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // create key and value nodes
+ keyNode := &yaml.Node{
+ Kind: yaml.ScalarNode,
+ Value: tt.fieldName,
+ }
+ valueNode := &yaml.Node{
+ Kind: yaml.ScalarNode,
+ Value: tt.fieldValue,
+ }
+
+ addCatalogerFieldComment(keyNode, valueNode, tt.catalogerName)
+
+ require.Equal(t, tt.wantLineComment, keyNode.LineComment)
+ })
+ }
+}
+
+// helper functions
+
+func getMappingNode(node *yaml.Node) *yaml.Node {
+ if node.Kind == yaml.DocumentNode && len(node.Content) > 0 {
+ return node.Content[0]
+ }
+ return node
+}
+
+func getSequenceNode(node *yaml.Node) *yaml.Node {
+ if node.Kind == yaml.DocumentNode && len(node.Content) > 0 {
+ return node.Content[0]
+ }
+ return node
+}
+
+func getContentNode(node *yaml.Node) *yaml.Node {
+ if node.Kind == yaml.DocumentNode && len(node.Content) > 0 {
+ return node.Content[0]
+ }
+ return node
+}
+
+func findFieldNodes(mappingNode *yaml.Node, fieldName string) (*yaml.Node, *yaml.Node) {
+ if mappingNode.Kind != yaml.MappingNode {
+ return nil, nil
+ }
+
+ for i := 0; i < len(mappingNode.Content); i += 2 {
+ if mappingNode.Content[i].Value == fieldName {
+ return mappingNode.Content[i], mappingNode.Content[i+1]
+ }
+ }
+
+ return nil, nil
+}
diff --git a/internal/capabilities/generate/main.go b/internal/capabilities/generate/main.go
index 6c5ec8605..eed4fca9c 100644
--- a/internal/capabilities/generate/main.go
+++ b/internal/capabilities/generate/main.go
@@ -1,3 +1,4 @@
+// this is the entry point for regenerating the packages.yaml file, which orchestrates discovery, merging, and validation of cataloger capabilities.
package main
import (
@@ -19,25 +20,6 @@ var (
dimStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("245")) // lighter grey (256-color)
)
-func printSuccessASCII() {
- fmt.Println()
- fmt.Println(successStyle.Render("✓ All validations passed!") + " 🎉")
- fmt.Println()
- fmt.Println(successStyle.Render(" ░█▀▀░█░█░█▀▀░█▀▀░█▀▀░█▀▀░█▀▀"))
- fmt.Println(successStyle.Render(" ░▀▀█░█░█░█░░░█░░░█▀▀░▀▀█░▀▀█"))
- fmt.Println(successStyle.Render(" ░▀▀▀░▀▀▀░▀▀▀░▀▀▀░▀▀▀░▀▀▀░▀▀▀"))
- fmt.Println()
-}
-
-func printFailureASCII() {
- fmt.Println(errorStyle.Render("✗ Validation failed") + " 😢")
- fmt.Println()
- fmt.Println(errorStyle.Render(" ░█▀▀░█▀█░▀█▀░█░░░█▀▀░█▀▄"))
- fmt.Println(errorStyle.Render(" ░█▀▀░█▀█░░█░░█░░░█▀▀░█░█"))
- fmt.Println(errorStyle.Render(" ░▀░░░▀░▀░▀▀▀░▀▀▀░▀▀▀░▀▀░"))
- fmt.Println()
-}
-
func main() {
repoRoot, err := RepoRoot()
if err != nil {
@@ -147,3 +129,22 @@ func hasEmptyCapabilities(caps capabilities.CapabilitySet) bool {
// if someone filled out the capabilities section (even with all false/empty values), that's intentional
return len(caps) == 0
}
+
+func printSuccessASCII() {
+ fmt.Println()
+ fmt.Println(successStyle.Render("✓ All validations passed!") + " 🎉")
+ fmt.Println()
+ fmt.Println(successStyle.Render(" ░█▀▀░█░█░█▀▀░█▀▀░█▀▀░█▀▀░█▀▀"))
+ fmt.Println(successStyle.Render(" ░▀▀█░█░█░█░░░█░░░█▀▀░▀▀█░▀▀█"))
+ fmt.Println(successStyle.Render(" ░▀▀▀░▀▀▀░▀▀▀░▀▀▀░▀▀▀░▀▀▀░▀▀▀"))
+ fmt.Println()
+}
+
+func printFailureASCII() {
+ fmt.Println(errorStyle.Render("✗ Validation failed") + " 😢")
+ fmt.Println()
+ fmt.Println(errorStyle.Render(" ░█▀▀░█▀█░▀█▀░█░░░█▀▀░█▀▄"))
+ fmt.Println(errorStyle.Render(" ░█▀▀░█▀█░░█░░█░░░█▀▀░█░█"))
+ fmt.Println(errorStyle.Render(" ░▀░░░▀░▀░▀▀▀░▀▀▀░▀▀▀░▀▀░"))
+ fmt.Println()
+}
diff --git a/internal/capabilities/generate/merge.go b/internal/capabilities/generate/merge.go
index d17a32308..cc9cfe906 100644
--- a/internal/capabilities/generate/merge.go
+++ b/internal/capabilities/generate/merge.go
@@ -1,3 +1,4 @@
+// this file contains the core merging logic that combines discovered cataloger data with existing packages.yaml, preserving all manual sections while updating auto-generated fields.
package main
import (
@@ -49,6 +50,74 @@ var catalogerConfigOverrides = map[string]string{
"nix-store-cataloger": "nix.Config",
}
+// ecosystemMapping maps patterns in cataloger names to ecosystem names.
+// order matters - more specific patterns should come first.
+type ecosystemMapping struct {
+ patterns []string // patterns to match in the cataloger name
+ ecosystem string // ecosystem to return if any pattern matches
+}
+
+// ecosystemMappings defines the pattern-to-ecosystem mappings.
+// note: order matters - check more specific patterns first
+var ecosystemMappings = []ecosystemMapping{
+ // language-based ecosystems
+ {[]string{"rust", "cargo"}, "rust"},
+ {[]string{"javascript", "node", "npm"}, "javascript"},
+ {[]string{"python"}, "python"},
+ {[]string{"java", "graalvm"}, "java"},
+ {[]string{"go-module", "golang"}, "go"},
+ {[]string{"ruby", "gem"}, "ruby"},
+ {[]string{"php", "composer", "pear", "pecl"}, "php"},
+ {[]string{"dotnet", ".net", "csharp"}, "dotnet"},
+ {[]string{"swift", "cocoapods"}, "swift"},
+ {[]string{"dart", "pubspec"}, "dart"},
+ {[]string{"elixir", "mix"}, "elixir"},
+ {[]string{"erlang", "rebar"}, "erlang"},
+ {[]string{"haskell", "cabal", "stack"}, "haskell"},
+ {[]string{"lua"}, "lua"},
+ {[]string{"ocaml", "opam"}, "ocaml"},
+ {[]string{"r-package"}, "r"},
+ {[]string{"swipl", "prolog"}, "prolog"},
+ {[]string{"cpp", "conan"}, "c++"},
+ {[]string{"kotlin"}, "kotlin"},
+
+ // os/distro-based ecosystems
+ {[]string{"apk", "alpine"}, "alpine"},
+ {[]string{"dpkg", "deb", "debian"}, "debian"},
+ {[]string{"rpm", "redhat"}, "rpm"},
+ {[]string{"alpm", "arch"}, "arch"},
+ {[]string{"portage", "gentoo"}, "gentoo"},
+ {[]string{"homebrew"}, "homebrew"},
+ {[]string{"snap"}, "snap"},
+
+ // other ecosystems
+ {[]string{"binary", "elf", "pe-binary"}, "binary"},
+ {[]string{"conda"}, "conda"},
+ {[]string{"nix"}, "nix"},
+ {[]string{"kernel"}, "linux"},
+ {[]string{"bitnami"}, "bitnami"},
+ {[]string{"terraform"}, "terraform"},
+ {[]string{"github"}, "github-actions"},
+ {[]string{"wordpress"}, "wordpress"},
+ {[]string{"sbom"}, "sbom"},
+}
+
+// inferEcosystem attempts to determine the ecosystem from a cataloger name
+func inferEcosystem(catalogerName string) string {
+ name := strings.ToLower(catalogerName)
+
+ for _, mapping := range ecosystemMappings {
+ for _, pattern := range mapping.patterns {
+ if strings.Contains(name, pattern) {
+ return mapping.ecosystem
+ }
+ }
+ }
+
+ // default
+ return "other"
+}
+
// Statistics contains information about the regeneration process
type Statistics struct {
TotalGenericCatalogers int
@@ -813,71 +882,3 @@ func formatOrphans(orphans []orphanInfo) string {
}
return strings.Join(lines, "\n")
}
-
-// ecosystemMapping maps patterns in cataloger names to ecosystem names.
-// order matters - more specific patterns should come first.
-type ecosystemMapping struct {
- patterns []string // patterns to match in the cataloger name
- ecosystem string // ecosystem to return if any pattern matches
-}
-
-// ecosystemMappings defines the pattern-to-ecosystem mappings.
-// note: order matters - check more specific patterns first
-var ecosystemMappings = []ecosystemMapping{
- // language-based ecosystems
- {[]string{"rust", "cargo"}, "rust"},
- {[]string{"javascript", "node", "npm"}, "javascript"},
- {[]string{"python"}, "python"},
- {[]string{"java", "graalvm"}, "java"},
- {[]string{"go-module", "golang"}, "go"},
- {[]string{"ruby", "gem"}, "ruby"},
- {[]string{"php", "composer", "pear", "pecl"}, "php"},
- {[]string{"dotnet", ".net", "csharp"}, "dotnet"},
- {[]string{"swift", "cocoapods"}, "swift"},
- {[]string{"dart", "pubspec"}, "dart"},
- {[]string{"elixir", "mix"}, "elixir"},
- {[]string{"erlang", "rebar"}, "erlang"},
- {[]string{"haskell", "cabal", "stack"}, "haskell"},
- {[]string{"lua"}, "lua"},
- {[]string{"ocaml", "opam"}, "ocaml"},
- {[]string{"r-package"}, "r"},
- {[]string{"swipl", "prolog"}, "prolog"},
- {[]string{"cpp", "conan"}, "c++"},
- {[]string{"kotlin"}, "kotlin"},
-
- // os/distro-based ecosystems
- {[]string{"apk", "alpine"}, "alpine"},
- {[]string{"dpkg", "deb", "debian"}, "debian"},
- {[]string{"rpm", "redhat"}, "rpm"},
- {[]string{"alpm", "arch"}, "arch"},
- {[]string{"portage", "gentoo"}, "gentoo"},
- {[]string{"homebrew"}, "homebrew"},
- {[]string{"snap"}, "snap"},
-
- // other ecosystems
- {[]string{"binary", "elf", "pe-binary"}, "binary"},
- {[]string{"conda"}, "conda"},
- {[]string{"nix"}, "nix"},
- {[]string{"kernel"}, "linux"},
- {[]string{"bitnami"}, "bitnami"},
- {[]string{"terraform"}, "terraform"},
- {[]string{"github"}, "github-actions"},
- {[]string{"wordpress"}, "wordpress"},
- {[]string{"sbom"}, "sbom"},
-}
-
-// inferEcosystem attempts to determine the ecosystem from a cataloger name
-func inferEcosystem(catalogerName string) string {
- name := strings.ToLower(catalogerName)
-
- for _, mapping := range ecosystemMappings {
- for _, pattern := range mapping.patterns {
- if strings.Contains(name, pattern) {
- return mapping.ecosystem
- }
- }
- }
-
- // default
- return "other"
-}
diff --git a/internal/capabilities/generate/merge_test.go b/internal/capabilities/generate/merge_test.go
index e0d8bc3fe..0ec32828e 100644
--- a/internal/capabilities/generate/merge_test.go
+++ b/internal/capabilities/generate/merge_test.go
@@ -374,3 +374,153 @@ func TestCatalogerConfigFieldUpdatedForNewCatalogers(t *testing.T) {
})
}
}
+
+func TestStripPURLVersion(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ want string
+ }{
+ {
+ name: "purl with version",
+ input: "pkg:generic/python@1.0.0",
+ want: "pkg:generic/python",
+ },
+ {
+ name: "purl without version",
+ input: "pkg:generic/python",
+ want: "pkg:generic/python",
+ },
+ {
+ name: "purl with multiple @ signs",
+ input: "pkg:generic/py@thon@1.0.0",
+ want: "pkg:generic/py@thon",
+ },
+ {
+ name: "empty string",
+ input: "",
+ want: "",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got := stripPURLVersion(tt.input)
+ require.Equal(t, tt.want, got)
+ })
+ }
+}
+
+func TestInferEcosystem(t *testing.T) {
+ tests := []struct {
+ name string
+ catalogerName string
+ want string
+ }{
+ {
+ name: "go module cataloger",
+ catalogerName: "go-module-binary-cataloger",
+ want: "go",
+ },
+ {
+ name: "python cataloger",
+ catalogerName: "python-package-cataloger",
+ want: "python",
+ },
+ {
+ name: "java archive cataloger",
+ catalogerName: "java-archive-cataloger",
+ want: "java",
+ },
+ {
+ name: "rust cargo cataloger",
+ catalogerName: "rust-cargo-lock-cataloger",
+ want: "rust",
+ },
+ {
+ name: "javascript npm cataloger",
+ catalogerName: "javascript-package-cataloger",
+ want: "javascript",
+ },
+ {
+ name: "ruby gem cataloger",
+ catalogerName: "ruby-gemspec-cataloger",
+ want: "ruby",
+ },
+ {
+ name: "debian dpkg cataloger",
+ catalogerName: "dpkg-db-cataloger",
+ want: "debian",
+ },
+ {
+ name: "alpine apk cataloger",
+ catalogerName: "apk-db-cataloger",
+ want: "alpine",
+ },
+ {
+ name: "linux kernel cataloger",
+ catalogerName: "linux-kernel-cataloger",
+ want: "linux",
+ },
+ {
+ name: "binary classifier cataloger",
+ catalogerName: "binary-classifier-cataloger",
+ want: "binary",
+ },
+ {
+ name: "github actions cataloger",
+ catalogerName: "github-actions-usage-cataloger",
+ want: "github-actions",
+ },
+ {
+ name: "unknown cataloger defaults to other",
+ catalogerName: "unknown-custom-cataloger",
+ want: "other",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got := inferEcosystem(tt.catalogerName)
+ require.Equal(t, tt.want, got)
+ })
+ }
+}
+
+func TestConvertToJSONSchemaTypesFromMetadata(t *testing.T) {
+ tests := []struct {
+ name string
+ metadataTypes []string
+ want []string
+ }{
+ {
+ name: "empty slice returns nil",
+ metadataTypes: []string{},
+ want: nil,
+ },
+ {
+ name: "nil slice returns nil",
+ metadataTypes: nil,
+ want: nil,
+ },
+ {
+ name: "single metadata type",
+ metadataTypes: []string{"pkg.AlpmDBEntry"},
+ want: []string{"AlpmDbEntry"},
+ },
+ {
+ name: "multiple metadata types",
+ metadataTypes: []string{"pkg.ApkDBEntry", "pkg.BinarySignature"},
+ want: []string{"ApkDbEntry", "BinarySignature"},
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got := convertToJSONSchemaTypesFromMetadata(tt.metadataTypes)
+ if diff := cmp.Diff(tt.want, got); diff != "" {
+ t.Errorf("convertToJSONSchemaTypesFromMetadata() mismatch (-want +got):\n%s", diff)
+ }
+ })
+ }
+}
diff --git a/internal/capabilities/generate/metadata_check.go b/internal/capabilities/generate/metadata_check.go
index 42c75bf33..0eb60cc72 100644
--- a/internal/capabilities/generate/metadata_check.go
+++ b/internal/capabilities/generate/metadata_check.go
@@ -1,3 +1,4 @@
+// this file validates that all known metadata and package types are documented in packages.yaml by checking coverage and reporting any missing types.
package main
import (
diff --git a/internal/capabilities/generate/test-fixtures/config-linking/cataloger-with-constant/python/cataloger.go b/internal/capabilities/generate/test-fixtures/config-linking/cataloger-with-constant/python/cataloger.go
new file mode 100644
index 000000000..fd76c5cb0
--- /dev/null
+++ b/internal/capabilities/generate/test-fixtures/config-linking/cataloger-with-constant/python/cataloger.go
@@ -0,0 +1,21 @@
+package python
+
+import (
+ "github.com/anchore/syft/syft/pkg"
+ "github.com/anchore/syft/syft/pkg/cataloger/generic"
+)
+
+const catalogerName = "python-package-cataloger"
+
+type CatalogerConfig struct {
+ Setting string
+}
+
+func NewPythonCataloger(cfg CatalogerConfig) pkg.Cataloger {
+ return generic.NewCataloger(catalogerName).
+ WithParserByGlobs(parse, "**/*.py")
+}
+
+func parse(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) {
+ return nil, nil, nil
+}
diff --git a/internal/capabilities/generate/test-fixtures/config-linking/conflicting-names/duplicate/cataloger1.go b/internal/capabilities/generate/test-fixtures/config-linking/conflicting-names/duplicate/cataloger1.go
new file mode 100644
index 000000000..496172064
--- /dev/null
+++ b/internal/capabilities/generate/test-fixtures/config-linking/conflicting-names/duplicate/cataloger1.go
@@ -0,0 +1,19 @@
+package duplicate
+
+import (
+ "github.com/anchore/syft/syft/pkg"
+ "github.com/anchore/syft/syft/pkg/cataloger/generic"
+)
+
+type Config1 struct {
+ Option1 bool
+}
+
+func NewDuplicateCataloger1(cfg Config1) pkg.Cataloger {
+ return generic.NewCataloger("duplicate-cataloger").
+ WithParserByGlobs(parse1, "**/*.txt")
+}
+
+func parse1(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) {
+ return nil, nil, nil
+}
diff --git a/internal/capabilities/generate/test-fixtures/config-linking/conflicting-names/duplicate/cataloger2.go b/internal/capabilities/generate/test-fixtures/config-linking/conflicting-names/duplicate/cataloger2.go
new file mode 100644
index 000000000..0c563c99a
--- /dev/null
+++ b/internal/capabilities/generate/test-fixtures/config-linking/conflicting-names/duplicate/cataloger2.go
@@ -0,0 +1,19 @@
+package duplicate
+
+import (
+ "github.com/anchore/syft/syft/pkg"
+ "github.com/anchore/syft/syft/pkg/cataloger/generic"
+)
+
+type Config2 struct {
+ Option2 string
+}
+
+func NewDuplicateCataloger2(cfg Config2) pkg.Cataloger {
+ return generic.NewCataloger("duplicate-cataloger").
+ WithParserByGlobs(parse2, "**/*.json")
+}
+
+func parse2(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) {
+ return nil, nil, nil
+}
diff --git a/internal/capabilities/generate/test-fixtures/config-linking/custom-cataloger-different-file/dotnet/cataloger.go b/internal/capabilities/generate/test-fixtures/config-linking/custom-cataloger-different-file/dotnet/cataloger.go
new file mode 100644
index 000000000..78d934313
--- /dev/null
+++ b/internal/capabilities/generate/test-fixtures/config-linking/custom-cataloger-different-file/dotnet/cataloger.go
@@ -0,0 +1,9 @@
+package dotnet
+
+import (
+ "github.com/anchore/syft/syft/pkg"
+)
+
+func NewDotnetCataloger(cfg CatalogerConfig) pkg.Cataloger {
+ return dotnetCataloger{cfg: cfg}
+}
diff --git a/internal/capabilities/generate/test-fixtures/config-linking/custom-cataloger-different-file/dotnet/types.go b/internal/capabilities/generate/test-fixtures/config-linking/custom-cataloger-different-file/dotnet/types.go
new file mode 100644
index 000000000..d32d318b7
--- /dev/null
+++ b/internal/capabilities/generate/test-fixtures/config-linking/custom-cataloger-different-file/dotnet/types.go
@@ -0,0 +1,23 @@
+package dotnet
+
+import (
+ "github.com/anchore/syft/syft/pkg"
+)
+
+const catalogerName = "dotnet-cataloger"
+
+type CatalogerConfig struct {
+ Option bool
+}
+
+type dotnetCataloger struct {
+ cfg CatalogerConfig
+}
+
+func (d dotnetCataloger) Name() string {
+ return catalogerName
+}
+
+func (d dotnetCataloger) Catalog(resolver any) ([]pkg.Package, []pkg.Relationship, error) {
+ return nil, nil, nil
+}
diff --git a/internal/capabilities/generate/test-fixtures/config-linking/custom-cataloger-same-file/java/cataloger.go b/internal/capabilities/generate/test-fixtures/config-linking/custom-cataloger-same-file/java/cataloger.go
new file mode 100644
index 000000000..a3530e6d7
--- /dev/null
+++ b/internal/capabilities/generate/test-fixtures/config-linking/custom-cataloger-same-file/java/cataloger.go
@@ -0,0 +1,27 @@
+package java
+
+import (
+ "github.com/anchore/syft/syft/pkg"
+)
+
+const pomCatalogerName = "java-pom-cataloger"
+
+type ArchiveCatalogerConfig struct {
+ IncludeArchives bool
+}
+
+type pomXMLCataloger struct {
+ cfg ArchiveCatalogerConfig
+}
+
+func (p pomXMLCataloger) Name() string {
+ return pomCatalogerName
+}
+
+func (p pomXMLCataloger) Catalog(resolver any) ([]pkg.Package, []pkg.Relationship, error) {
+ return nil, nil, nil
+}
+
+func NewPomCataloger(cfg ArchiveCatalogerConfig) pkg.Cataloger {
+ return pomXMLCataloger{cfg: cfg}
+}
diff --git a/internal/capabilities/generate/test-fixtures/config-linking/imported-config-type/kernel/cataloger.go b/internal/capabilities/generate/test-fixtures/config-linking/imported-config-type/kernel/cataloger.go
new file mode 100644
index 000000000..99b181854
--- /dev/null
+++ b/internal/capabilities/generate/test-fixtures/config-linking/imported-config-type/kernel/cataloger.go
@@ -0,0 +1,15 @@
+package kernel
+
+import (
+ "github.com/anchore/syft/syft/pkg"
+ "github.com/anchore/syft/syft/pkg/cataloger/generic"
+)
+
+func NewLinuxKernelCataloger(cfg LinuxKernelCatalogerConfig) pkg.Cataloger {
+ return generic.NewCataloger("linux-kernel-cataloger").
+ WithParserByGlobs(parse, "**/vmlinuz")
+}
+
+func parse(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) {
+ return nil, nil, nil
+}
diff --git a/internal/capabilities/generate/test-fixtures/config-linking/imported-config-type/kernel/config.go b/internal/capabilities/generate/test-fixtures/config-linking/imported-config-type/kernel/config.go
new file mode 100644
index 000000000..c77d49bff
--- /dev/null
+++ b/internal/capabilities/generate/test-fixtures/config-linking/imported-config-type/kernel/config.go
@@ -0,0 +1,5 @@
+package kernel
+
+type LinuxKernelCatalogerConfig struct {
+ KernelVersion string
+}
diff --git a/internal/capabilities/generate/test-fixtures/config-linking/mixed-naming-patterns/ruby/cataloger.go b/internal/capabilities/generate/test-fixtures/config-linking/mixed-naming-patterns/ruby/cataloger.go
new file mode 100644
index 000000000..cddb73dda
--- /dev/null
+++ b/internal/capabilities/generate/test-fixtures/config-linking/mixed-naming-patterns/ruby/cataloger.go
@@ -0,0 +1,19 @@
+package ruby
+
+import (
+ "github.com/anchore/syft/syft/pkg"
+ "github.com/anchore/syft/syft/pkg/cataloger/generic"
+)
+
+type Config struct {
+ Setting bool
+}
+
+func NewRubyCataloger(opts Config) pkg.Cataloger {
+ return generic.NewCataloger("ruby-cataloger").
+ WithParserByGlobs(parse, "**/Gemfile")
+}
+
+func parse(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) {
+ return nil, nil, nil
+}
diff --git a/internal/capabilities/generate/test-fixtures/config-linking/no-config-cataloger/javascript/cataloger.go b/internal/capabilities/generate/test-fixtures/config-linking/no-config-cataloger/javascript/cataloger.go
new file mode 100644
index 000000000..c1668a8f9
--- /dev/null
+++ b/internal/capabilities/generate/test-fixtures/config-linking/no-config-cataloger/javascript/cataloger.go
@@ -0,0 +1,15 @@
+package javascript
+
+import (
+ "github.com/anchore/syft/syft/pkg"
+ "github.com/anchore/syft/syft/pkg/cataloger/generic"
+)
+
+func NewJavaScriptCataloger() pkg.Cataloger {
+ return generic.NewCataloger("javascript-cataloger").
+ WithParserByGlobs(parse, "**/*.js")
+}
+
+func parse(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) {
+ return nil, nil, nil
+}
diff --git a/internal/capabilities/generate/test-fixtures/config-linking/non-config-first-param/binary/cataloger.go b/internal/capabilities/generate/test-fixtures/config-linking/non-config-first-param/binary/cataloger.go
new file mode 100644
index 000000000..f2b32e082
--- /dev/null
+++ b/internal/capabilities/generate/test-fixtures/config-linking/non-config-first-param/binary/cataloger.go
@@ -0,0 +1,17 @@
+package binary
+
+import (
+ "github.com/anchore/syft/syft/pkg"
+ "github.com/anchore/syft/syft/pkg/cataloger/generic"
+)
+
+type Parser struct{}
+
+func NewBinaryCataloger(parser Parser) pkg.Cataloger {
+ return generic.NewCataloger("binary-cataloger").
+ WithParserByGlobs(parse, "**/*")
+}
+
+func parse(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) {
+ return nil, nil, nil
+}
diff --git a/internal/capabilities/generate/test-fixtures/config-linking/selector-expression-config/rust/cataloger.go b/internal/capabilities/generate/test-fixtures/config-linking/selector-expression-config/rust/cataloger.go
new file mode 100644
index 000000000..88a481e0c
--- /dev/null
+++ b/internal/capabilities/generate/test-fixtures/config-linking/selector-expression-config/rust/cataloger.go
@@ -0,0 +1,16 @@
+package rust
+
+import (
+ "github.com/anchore/syft/syft/pkg"
+ "github.com/anchore/syft/syft/pkg/cataloger/generic"
+ "github.com/test/cargo"
+)
+
+func NewRustCataloger(cfg cargo.CatalogerConfig) pkg.Cataloger {
+ return generic.NewCataloger("rust-cataloger").
+ WithParserByGlobs(parse, "**/Cargo.toml")
+}
+
+func parse(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) {
+ return nil, nil, nil
+}
diff --git a/internal/capabilities/generate/test-fixtures/config-linking/simple-generic-cataloger/golang/cataloger.go b/internal/capabilities/generate/test-fixtures/config-linking/simple-generic-cataloger/golang/cataloger.go
new file mode 100644
index 000000000..47a0d7c85
--- /dev/null
+++ b/internal/capabilities/generate/test-fixtures/config-linking/simple-generic-cataloger/golang/cataloger.go
@@ -0,0 +1,19 @@
+package golang
+
+import (
+ "github.com/anchore/syft/syft/pkg"
+ "github.com/anchore/syft/syft/pkg/cataloger/generic"
+)
+
+type CatalogerConfig struct {
+ SomeOption bool
+}
+
+func NewGoModuleCataloger(cfg CatalogerConfig) pkg.Cataloger {
+ return generic.NewCataloger("go-module-cataloger").
+ WithParserByGlobs(parseGoMod, "**/go.mod")
+}
+
+func parseGoMod(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) {
+ return nil, nil, nil
+}
diff --git a/internal/capabilities/packages.yaml b/internal/capabilities/packages.yaml
index 3ecf6ad9f..f74b3639c 100644
--- a/internal/capabilities/packages.yaml
+++ b/internal/capabilities/packages.yaml
@@ -218,7 +218,6 @@ application: # AUTO-GENERATED - application-level config keys
description: enables Syft to use the network to fill in more detailed license information
- key: linux-kernel.catalog-modules
description: whether to catalog linux kernel modules found within lib/modules/** directories
- default: true
- key: nix.capture-owned-files
description: enumerate all files owned by packages found within Nix store paths
- key: python.guess-unpinned-requirements