diff --git a/DEVELOPING.md b/DEVELOPING.md index 8fd5b57f3..4ce1534dd 100644 --- a/DEVELOPING.md +++ b/DEVELOPING.md @@ -215,6 +215,24 @@ Interested in building a new cataloger? Checkout the [list of issues with the `n If you have questions about implementing a cataloger feel free to file an issue or reach out to us [on discourse](https://anchore.com/discourse)! +#### Documenting Cataloger Capabilities + +When adding a new cataloger or changing the capabilities of an existing one, you'll need to document its capabilities in `internal/capabilities/packages.yaml`. This includes: +- What metadata types it produces +- What package types it catalogs +- What dependency information it provides (depth, edges, kinds) +- Whether it extracts license information +- How configuration affects its behavior + +After implementing your cataloger: + +1. **Write tests using the `pkgtest` helpers** - this automatically generates test observations that feed into capability documentation +2. **Run `make generate-capabilities`** - this regenerates the `packages.yaml` file and validates your changes +3. **Manually edit capabilities** - add the `ecosystem` field and detailed `capabilities` sections in `packages.yaml` + +For detailed information about the capability documentation system, see [`internal/capabilities/generate/README.md`](internal/capabilities/generate/README.md). + + #### Searching for files All catalogers are provided an instance of the [`file.Resolver`](https://github.com/anchore/syft/blob/v0.70.0/syft/source/file_resolver.go#L8) to interface with the image and search for files. The implementations for these diff --git a/Taskfile.yaml b/Taskfile.yaml index a4a1201b4..e2166b953 100644 --- a/Taskfile.yaml +++ b/Taskfile.yaml @@ -521,6 +521,8 @@ tasks: generate-capabilities: desc: Generate the capabilities data file cmds: + # this is required to update test observations; such evidence is used to update the packages.yaml + - "go test ./syft/pkg/..." - "go generate ./internal/capabilities/..." - "gofmt -s -w ./internal/capabilities" diff --git a/cmd/syft/internal/options/catalog.go b/cmd/syft/internal/options/catalog.go index 652d6a40d..342733d5c 100644 --- a/cmd/syft/internal/options/catalog.go +++ b/cmd/syft/internal/options/catalog.go @@ -77,6 +77,8 @@ func DefaultCatalog() Catalog { Package: defaultPackageConfig(), License: defaultLicenseConfig(), LinuxKernel: defaultLinuxKernelConfig(), + JavaScript: defaultJavaScriptConfig(), + Python: defaultPythonConfig(), Nix: defaultNixConfig(), Dotnet: defaultDotnetConfig(), Golang: defaultGolangConfig(), diff --git a/cmd/syft/internal/options/javascript.go b/cmd/syft/internal/options/javascript.go index 982bffa29..73019dc91 100644 --- a/cmd/syft/internal/options/javascript.go +++ b/cmd/syft/internal/options/javascript.go @@ -1,6 +1,9 @@ package options -import "github.com/anchore/clio" +import ( + "github.com/anchore/clio" + "github.com/anchore/syft/syft/pkg/cataloger/javascript" +) type javaScriptConfig struct { SearchRemoteLicenses *bool `json:"search-remote-licenses" yaml:"search-remote-licenses" mapstructure:"search-remote-licenses"` @@ -12,6 +15,24 @@ var _ interface { clio.FieldDescriber } = (*javaScriptConfig)(nil) +func defaultJavaScriptConfig() javaScriptConfig { + def := javascript.DefaultCatalogerConfig() + var includeDevDependencies *bool + if def.IncludeDevDependencies { + includeDevDependencies = &def.IncludeDevDependencies + } + + var searchRemoteLicenses *bool + if def.SearchRemoteLicenses { + searchRemoteLicenses = &def.SearchRemoteLicenses + } + return javaScriptConfig{ + NpmBaseURL: def.NPMBaseURL, + SearchRemoteLicenses: searchRemoteLicenses, + IncludeDevDependencies: includeDevDependencies, + } +} + func (o *javaScriptConfig) DescribeFields(descriptions clio.FieldDescriptionSet) { descriptions.Add(&o.SearchRemoteLicenses, `enables Syft to use the network to fill in more detailed license information`) descriptions.Add(&o.NpmBaseURL, `base NPM url to use`) diff --git a/cmd/syft/internal/options/linux_kernel.go b/cmd/syft/internal/options/linux_kernel.go index 03f24dbf0..fab464aa6 100644 --- a/cmd/syft/internal/options/linux_kernel.go +++ b/cmd/syft/internal/options/linux_kernel.go @@ -1,14 +1,18 @@ package options -import "github.com/anchore/clio" +import ( + "github.com/anchore/clio" + "github.com/anchore/syft/syft/pkg/cataloger/kernel" +) type linuxKernelConfig struct { CatalogModules bool `json:"catalog-modules" yaml:"catalog-modules" mapstructure:"catalog-modules"` } func defaultLinuxKernelConfig() linuxKernelConfig { + def := kernel.DefaultLinuxKernelCatalogerConfig() return linuxKernelConfig{ - CatalogModules: true, + CatalogModules: def.CatalogModules, } } diff --git a/cmd/syft/internal/options/python.go b/cmd/syft/internal/options/python.go index c645cbfcd..f18174ced 100644 --- a/cmd/syft/internal/options/python.go +++ b/cmd/syft/internal/options/python.go @@ -1,6 +1,9 @@ package options -import "github.com/anchore/clio" +import ( + "github.com/anchore/clio" + "github.com/anchore/syft/syft/pkg/cataloger/python" +) type pythonConfig struct { GuessUnpinnedRequirements bool `json:"guess-unpinned-requirements" yaml:"guess-unpinned-requirements" mapstructure:"guess-unpinned-requirements"` @@ -10,6 +13,13 @@ var _ interface { clio.FieldDescriber } = (*pythonConfig)(nil) +func defaultPythonConfig() pythonConfig { + def := python.DefaultCatalogerConfig() + return pythonConfig{ + GuessUnpinnedRequirements: def.GuessUnpinnedRequirements, + } +} + func (o *pythonConfig) DescribeFields(descriptions clio.FieldDescriptionSet) { descriptions.Add(&o.GuessUnpinnedRequirements, `when running across entries in requirements.txt that do not specify a specific version (e.g. "sqlalchemy >= 1.0.0, <= 2.0.0, != 3.0.0, <= 3.0.0"), attempt to guess what the version could diff --git a/internal/capabilities/capabilities.go b/internal/capabilities/capabilities.go index 7d1842070..9669e6536 100644 --- a/internal/capabilities/capabilities.go +++ b/internal/capabilities/capabilities.go @@ -1,3 +1,4 @@ +// Package capabilities provides discovery and tracking of cataloger capabilities. package capabilities import ( @@ -11,6 +12,8 @@ import ( "github.com/anchore/syft/internal/task" ) +//go:generate go run ./generate + //go:embed packages.yaml var catalogersYAML []byte diff --git a/internal/capabilities/doc.go b/internal/capabilities/doc.go deleted file mode 100644 index 289d9e655..000000000 --- a/internal/capabilities/doc.go +++ /dev/null @@ -1,11 +0,0 @@ -// Package capabilities provides discovery and tracking of cataloger capabilities. -// -// Run 'go generate' in this directory to discover catalogers from source code and update -// the packages.yaml file with newly discovered generic catalogers. -// -// The packages.yaml file is the source of truth for cataloger capabilities. It contains -// both auto-generated metadata (cataloger names, parser functions, glob patterns) and -// manually-edited capability descriptions (what each cataloger can discover). -package capabilities - -//go:generate go run ./generate diff --git a/internal/capabilities/generate/README.md b/internal/capabilities/generate/README.md new file mode 100644 index 000000000..57f85b4b5 --- /dev/null +++ b/internal/capabilities/generate/README.md @@ -0,0 +1,1382 @@ +# Capabilities Generation System + +This internal tool is responsible for: +- partially generating the `packages.yaml` file, which documents what capabilities each cataloger in syft has +- running completeness / consistency tests of the claims from `packages.yaml` against actual test observation + +Syft has dozens of catalogers across many ecosystems. Each cataloger has different capabilities, such as: +- Some provide license information, others don't +- Some detect transitive dependencies, others only direct +- Some capabilities depend on configuration + +The `packages.yaml` contains all of these capability claims. + +The `capabilities` generation system itself: +1. **Discovers** cataloger information from source code using AST parsing +2. **Extracts** metadata about parsers, detectors, and configuration from code and tests +3. **Merges** discovered information with manually-maintained capability documentation +4. **Validates** that the generated document is complete and in sync with the codebase + +**Why do this?** +The short answer is to provide a foundation for the OSS documentation, where the source of truth for facts about the capabilities of Syft can be derived from verifiable claims from the tool itself. + + +## Quick Start + +**To regenerate packages.yaml after code changes:** +```bash +go generate ./internal/capabilities +``` + +**To run validation of capability claims:** +```bash +# update test evidence +go test ./syft/pkg/... + +# check claims against test evidence +go test ./internal/capabilities/generate +``` + +## Data Flow + +```mermaid +graph TB + subgraph "Source Code Inputs" + A1[syft/pkg/cataloger/*/
cataloger.go] + A2[syft/pkg/cataloger/*/
config.go] + A3[cmd/syft/internal/options/
catalog.go, ecosystem.go] + A4[syft task factories
AllCatalogers] + end + + subgraph "Test Inputs" + B1[test-fixtures/
test-observations.json] + end + + subgraph "Discovery Processes" + C1[discover_catalogers.go
AST Parse Catalogers] + C2[discover_cataloger_configs.go
AST Parse Configs] + C3[discover_app_config.go
AST Parse App Configs] + C4[discover_metadata.go
Read Observations] + C5[cataloger_config_linking.go
Link Catalogers to Configs] + C6[cataloger_names.go
Query Task Factories] + end + + subgraph "Discovered Data" + D1[Generic Catalogers
name, parsers, detectors] + D2[Config Structs
fields, app-config keys] + D3[App Config Fields
keys, descriptions, defaults] + D4[Metadata Types
per parser/cataloger] + D5[Package Types
per parser/cataloger] + D6[Cataloger-Config Links
mapping] + D7[Selectors
tags per cataloger] + end + + subgraph "Configuration/Overrides" + E1[catalogerTypeOverrides
catalogerConfigOverrides
catalogerConfigExceptions] + E2[metadataTypeCoverageExceptions
packageTypeCoverageExceptions
observationExceptions] + end + + subgraph "Merge Process" + F1[io.go
Load Existing YAML] + F2[merge.go
Merge Logic] + F3[Preserve MANUAL fields
Update AUTO-GENERATED] + end + + subgraph "Output" + G1[packages.yaml
Complete Catalog Document] + end + + subgraph "Validation" + H1[completeness_test.go
Comprehensive Tests] + H2[metadata_check.go
Type Coverage] + end + + A1 --> C1 + A2 --> C2 + A3 --> C3 + A4 --> C6 + B1 --> C4 + + C1 --> D1 + C2 --> D2 + C3 --> D3 + C4 --> D4 + C4 --> D5 + C5 --> D6 + C6 --> D7 + + D1 --> F2 + D2 --> F2 + D3 --> F2 + D4 --> F2 + D5 --> F2 + D6 --> F2 + D7 --> F2 + + E1 -.configure.-> F2 + E2 -.configure.-> H1 + + F1 --> F3 + F2 --> F3 + F3 --> G1 + + G1 --> H1 + G1 --> H2 + + style D1 fill:#e1f5ff + style D2 fill:#e1f5ff + style D3 fill:#e1f5ff + style D4 fill:#e1f5ff + style D5 fill:#e1f5ff + style D6 fill:#e1f5ff + style D7 fill:#e1f5ff + style G1 fill:#c8e6c9 + style E1 fill:#fff9c4 + style E2 fill:#fff9c4 +``` + +### Key Data Flows + +1. **Cataloger Discovery**: AST parser walks `syft/pkg/cataloger/` to find `generic.NewCataloger()` calls and extract parser information +2. **Config Discovery**: AST parser finds config structs and extracts fields with `// app-config:` annotations +3. **App Config Discovery**: AST parser extracts ecosystem configurations from options package, including descriptions and defaults +4. **Metadata Discovery**: JSON reader loads test observations that record what metadata/package types each parser produces +5. **Linking**: AST analyzer connects catalogers to their config structs by examining constructor parameters +6. **Merge**: Discovered data combines with existing YAML, preserving all manually-maintained capability sections +7. **Validation**: Comprehensive tests ensure the output is complete and synchronized with codebase + +## The `packages.yaml` File + +### Purpose + +`internal/capabilities/packages.yaml` is the canonical documentation of: +- Every cataloger in syft +- What files/patterns each cataloger detects +- What metadata and package types each cataloger produces +- What capabilities each cataloger has (licenses, dependencies, etc.) +- How configuration affects these capabilities + +### Structure + +```yaml +# File header with usage instructions (AUTO-GENERATED) + +application: # AUTO-GENERATED + # Application-level config keys with descriptions + - key: golang.search-local-mod-cache-licenses + description: search for go package licences in the GOPATH... + default_value: false + +configs: # AUTO-GENERATED + # Config struct definitions + golang.CatalogerConfig: + fields: + - key: SearchLocalModCacheLicenses + description: SearchLocalModCacheLicenses enables... + app_key: golang.search-local-mod-cache-licenses + +catalogers: # Mixed AUTO-GENERATED structure, MANUAL capabilities + - ecosystem: golang # MANUAL + name: go-module-cataloger # AUTO-GENERATED + type: generic # AUTO-GENERATED + source: # AUTO-GENERATED + file: syft/pkg/cataloger/golang/cataloger.go + function: NewGoModuleBinaryCataloger + config: golang.CatalogerConfig # AUTO-GENERATED + selectors: [go, golang, ...] # AUTO-GENERATED + parsers: # AUTO-GENERATED structure + - function: parseGoMod # AUTO-GENERATED + detector: # AUTO-GENERATED + method: glob + criteria: ["**/go.mod"] + metadata_types: # AUTO-GENERATED + - pkg.GolangModuleEntry + package_types: # AUTO-GENERATED + - go-module + json_schema_types: # AUTO-GENERATED + - GolangModEntry + capabilities: # MANUAL - preserved across regeneration + - name: license + default: false + conditions: + - when: {SearchRemoteLicenses: true} + value: true + comment: fetches licenses from proxy.golang.org + - name: dependency.depth + default: [direct, indirect] + - name: dependency.edges + default: complete +``` + +### AUTO-GENERATED vs MANUAL Fields + +#### AUTO-GENERATED Fields +These are updated on every regeneration: + +**Cataloger Level:** +- `name` - cataloger identifier +- `type` - "generic" or "custom" +- `source.file` - source file path +- `source.function` - constructor function name +- `config` - linked config struct name +- `selectors` - tags from task factories + +**Parser Level (generic catalogers):** +- `function` - parser function name (as used in the generic cataloger) +- `detector.method` - glob/path/mimetype +- `detector.criteria` - patterns matched +- `metadata_types` - from test-observations.json +- `package_types` - from test-observations.json +- `json_schema_types` - converted from metadata_types + +**Custom Cataloger Level:** +- `metadata_types` - from test-observations.json +- `package_types` - from test-observations.json +- `json_schema_types` - converted from metadata_types + +**Sections:** +- Entire `application:` section: a flat mapping of the application config keys relevant to catalogers +- Entire `configs:` section: a flat mapping of the API-level cataloger config keys, for each cataloger (map of maps) + +#### MANUAL Fields +These are preserved across regeneration and must be edited by hand: + +- `ecosystem` - ecosystem/language identifier (cataloger level) +- `capabilities` - capability definitions with conditions +- `detectors` - for custom catalogers (except binary-classifier-cataloger) +- `conditions` on detectors - when detector is active based on config + +### How Regeneration Works + +When you run `go generate ./internal/capabilities`: + +1. **Loads existing YAML** into both a struct (for logic) and a node tree (for comment preservation) +2. **Discovers all cataloger data** from source code and tests +3. **Merges** discovered data with existing: + - Updates AUTO-GENERATED fields + - **Preserves** all MANUAL fields (capabilities, ecosystem, etc.) + - Adds annotations (`# AUTO-GENERATED`, `# MANUAL`) to field comments +4. **Writes back** using the node tree to preserve all comments +5. **Validates** the result with completeness tests + +> [!NOTE] +> Don't forget to update test observation evidence with `go test ./syft/pkg/...` before regeneration. + +## Generation Process + +### High-Level Workflow + +``` +1. Discovery Phase + ├─ Parse cataloger source code (AST) + ├─ Find all parsers and detectors + ├─ Read test observations for metadata types + ├─ Discover config structs and fields + ├─ Discover app-level configurations + └─ Link catalogers to their configs + +2. Merge Phase + ├─ Load existing packages.yaml + ├─ Process each cataloger: + │ ├─ Update AUTO-GENERATED fields + │ └─ Preserve MANUAL fields + ├─ Add new catalogers + └─ Detect orphaned entries + +3. Write Phase + ├─ Update YAML node tree in-place + ├─ Add field annotations + └─ Write to disk + +4. Validation Phase + ├─ Check all catalogers present + ├─ Check metadata/package type coverage + └─ Run completeness tests +``` + +### Detailed Discovery Processes + +#### 1. Generic Cataloger Discovery (`discover_catalogers.go`) + +**What it finds:** catalogers using the `generic.NewCataloger()` pattern + +**Process:** +1. Walk `syft/pkg/cataloger/` recursively for `.go` files +2. Parse each file with Go AST parser (`go/ast`, `go/parser`) +3. Find functions matching pattern: `New*Cataloger() pkg.Cataloger` +4. Within function body, find `generic.NewCataloger(name, ...)` call +5. Extract cataloger name from first argument +6. Find all chained `WithParserBy*()` calls: + ```go + generic.NewCataloger("my-cataloger"). + WithParserByGlobs(parseMyFormat, "**/*.myformat"). + WithParserByMimeTypes(parseMyBinary, "application/x-mytype") + ``` +7. For each parser call: + - Extract parser function name (e.g., `parseMyFormat`) + - Extract detection method (Globs/Path/MimeTypes) + - Extract criteria (patterns or mime types) + - Resolve constant references across files if needed + +**Output:** `map[string]DiscoveredCataloger` with full parser information + +#### 2. Config Discovery (`discover_cataloger_configs.go`) + +**What it finds:** cataloger configuration structs + +**Process:** +1. Find all `.go` files in `syft/pkg/cataloger/*/` +2. Look for structs with "Config" in their name +3. For each config struct: + - Extract struct fields + - Look for `// app-config: key.name` annotations in field comments + - Extract field descriptions from doc comments +4. Filter results by whitelist (only configs referenced in `pkgcataloging.Config`) + +**Example source:** +```go +type CatalogerConfig struct { + // SearchLocalModCacheLicenses enables searching for go package licenses + // in the local GOPATH mod cache. + // app-config: golang.search-local-mod-cache-licenses + SearchLocalModCacheLicenses bool +} +``` + +**Output:** `map[string]ConfigInfo` with field details and app-config keys + +#### 3. App Config Discovery (`discover_app_config.go`) + +**What it finds:** application-level configuration from the options package + +**Process:** +1. Parse `cmd/syft/internal/options/catalog.go` to find `Catalog` struct +2. Extract ecosystem config fields (e.g., `Golang golangConfig`) +3. For each ecosystem: + - Find the config file (e.g., `golang.go`) + - Parse the config struct + - Find `DescribeFields() []FieldDescription` method + - Extract field descriptions from the returned descriptions + - Find `default*Config()` function and extract default values +4. Build full key paths (e.g., `golang.search-local-mod-cache-licenses`) + +**Example source:** +```go +// golang.go +type golangConfig struct { + SearchLocalModCacheLicenses bool `yaml:"search-local-mod-cache-licenses" ...` +} + +func (c golangConfig) DescribeFields(opts ...options.DescribeFieldsOption) []options.FieldDescription { + return []options.FieldDescription{ + { + Name: "search-local-mod-cache-licenses", + Description: "search for go package licences in the GOPATH...", + }, + } +} +``` + +**Output:** `[]AppConfigField` with keys, descriptions, and defaults + +#### 4. Cataloger-Config Linking (`cataloger_config_linking.go`) + +**What it finds:** which config struct each cataloger uses + +**Process:** +1. For each discovered cataloger, find its constructor function +2. Extract the first parameter type from the function signature +3. Filter for types that look like configs (contain "Config") +4. Build mapping: cataloger name → config struct name +5. Apply manual overrides from `catalogerConfigOverrides` map +6. Apply exceptions from `catalogerConfigExceptions` set + +**Example:** +```go +// Constructor signature: +func NewGoModuleBinaryCataloger(cfg golang.CatalogerConfig) pkg.Cataloger + +// Results in link: +"go-module-binary-cataloger" → "golang.CatalogerConfig" +``` + +**Output:** `map[string]string` (cataloger → config mapping) + +#### 5. Metadata Discovery (`discover_metadata.go`) + +**What it finds:** metadata types and package types each parser produces + +**Process:** +1. Find all `test-fixtures/test-observations.json` files +2. Parse JSON which contains: + ```json + { + "package": "golang", + "parsers": { + "parseGoMod": { + "metadata_types": ["pkg.GolangModuleEntry"], + "package_types": ["go-module"] + } + }, + "catalogers": { + "linux-kernel-cataloger": { + "metadata_types": ["pkg.LinuxKernel"], + "package_types": ["linux-kernel"] + } + } + } + ``` +3. Build index by package name and parser function +4. Apply to discovered catalogers: + - Parser-level observations → attached to specific parsers + - Cataloger-level observations → for custom catalogers +5. Convert metadata types to JSON schema types using `packagemetadata` registry + +**Why this exists:** the AST parser can't determine what types a parser produces just by reading code. This information comes from test execution. + +**Output:** populated `MetadataTypes` and `PackageTypes` on catalogers/parsers + +## Input Sources + +### 1. Source Code Inputs + +#### Cataloger Constructors (`syft/pkg/cataloger/*/cataloger.go`) + +**What's extracted:** +- Cataloger names +- Parser function names +- Detection methods (glob, path, mimetype) +- Detection criteria (patterns) + +**Example:** +```go +func NewGoModuleBinaryCataloger() pkg.Cataloger { + return generic.NewCataloger("go-module-binary-cataloger"). + WithParserByGlobs(parseGoBin, "**/go.mod"). + WithParserByMimeTypes(parseGoArchive, "application/x-archive") +} +``` + +#### Config Structs (`syft/pkg/cataloger/*/config.go`) + +**What's extracted:** +- Config struct fields +- Field types +- Field descriptions from comments +- App-config key mappings from annotations + +**Example:** +```go +type CatalogerConfig struct { + // SearchRemoteLicenses enables downloading go package licenses from the upstream + // go proxy (typically proxy.golang.org). + // app-config: golang.search-remote-licenses + SearchRemoteLicenses bool + + // LocalModCacheDir specifies the location of the local go module cache directory. + // When not set, syft will attempt to discover the GOPATH env or default to $HOME/go. + // app-config: golang.local-mod-cache-dir + LocalModCacheDir string +} +``` + +#### Options Package (`cmd/syft/internal/options/`) + +**What's extracted:** +- Ecosystem config structs +- App-level configuration keys +- Field descriptions from `DescribeFields()` methods +- Default values from `default*Config()` functions + +**Example:** +```go +// catalog.go +type Catalog struct { + Golang golangConfig `yaml:"golang" json:"golang" mapstructure:"golang"` +} + +// golang.go +func (c golangConfig) DescribeFields(opts ...options.DescribeFieldsOption) []options.FieldDescription { + return []options.FieldDescription{ + { + Name: "search-remote-licenses", + Description: "search for go package licences by retrieving the package from a network proxy", + }, + } +} +``` + +### 2. Test-Driven Inputs + +#### test-observations.json Files + +**Location:** `syft/pkg/cataloger/*/test-fixtures/test-observations.json` + +**Purpose:** records what metadata and package types each parser produces during test execution + +**How they're generated:** automatically by the `pkgtest.CatalogTester` helpers when tests run + +**Example test code:** +```go +func TestGoModuleCataloger(t *testing.T) { + tester := NewGoModuleBinaryCataloger() + + pkgtest.NewCatalogTester(). + FromDirectory(t, "test-fixtures/go-module-fixture"). + TestCataloger(t, tester) // Auto-writes observations on first run +} +``` + +**Example observations file:** +```json +{ + "package": "golang", + "parsers": { + "parseGoMod": { + "metadata_types": ["pkg.GolangModuleEntry"], + "package_types": ["go-module"] + }, + "parseGoSum": { + "metadata_types": ["pkg.GolangModuleEntry"], + "package_types": ["go-module"] + } + } +} +``` + +**Why this exists:** +- Metadata types can't be determined from AST parsing alone +- Ensures tests use the pkgtest helpers (enforced by `TestAllCatalogers HaveObservations`) +- Provides test coverage visibility + +### 3. Syft Runtime Inputs + +#### Task Factories (`allPackageCatalogerInfo()`) + +**What's extracted:** +- Canonical list of all catalogers (ensures sync with binary) +- Selectors (tags) for each cataloger + +**Example:** +```go +info := cataloger.CatalogerInfo{ + Name: "go-module-binary-cataloger", + Selectors: []string{"go", "golang", "binary", "language", "package"}, +} +``` + +### 4. Global Configuration Variables + +#### Merge Logic Overrides (`merge.go`) + +```go +// catalogerTypeOverrides forces a specific cataloger type when discovery gets it wrong +var catalogerTypeOverrides = map[string]string{ + "java-archive-cataloger": "custom", // technically generic but treated as custom +} + +// catalogerConfigExceptions lists catalogers that should NOT have config linked +var catalogerConfigExceptions = strset.New( + "binary-classifier-cataloger", +) + +// catalogerConfigOverrides manually specifies config when linking fails +var catalogerConfigOverrides = map[string]string{ + "dotnet-portable-executable-cataloger": "dotnet.CatalogerConfig", + "nix-store-cataloger": "nix.Config", +} +``` + +**When to update:** +- Add to `catalogerTypeOverrides` when a cataloger's type is misdetected +- Add to `catalogerConfigExceptions` when a cataloger shouldn't have config +- Add to `catalogerConfigOverrides` when automatic config linking fails + +#### Completeness Test Configuration (`completeness_test.go`) + +```go +// requireParserObservations controls observation validation strictness +// - true: fail if ANY parser is missing observations (strict) +// - false: only check custom catalogers (lenient, current mode) +const requireParserObservations = false + +// metadataTypeCoverageExceptions lists metadata types allowed to not be documented +var metadataTypeCoverageExceptions = strset.New( + reflect.TypeOf(pkg.MicrosoftKbPatch{}).Name(), +) + +// packageTypeCoverageExceptions lists package types allowed to not be documented +var packageTypeCoverageExceptions = strset.New( + string(pkg.JenkinsPluginPkg), + string(pkg.KbPkg), +) + +// observationExceptions maps cataloger/parser names to observation types to skip +// - nil value: skip ALL observation checks for this cataloger/parser +// - set value: skip only specified observation types +var observationExceptions = map[string]*strset.Set{ + "graalvm-native-image-cataloger": nil, // skip all checks + "linux-kernel-cataloger": strset.New("relationships"), // skip only relationships +} +``` + +**When to update:** +- Add to exceptions when a type is intentionally not documented +- Add to `observationExceptions` when a cataloger lacks reliable test fixtures +- Set `requireParserObservations = true` when ready to enforce full parser coverage + +## Completeness Tests + +### Purpose + +The `completeness_test.go` file ensures `packages.yaml` stays in perfect sync with the codebase. These tests catch: +- New catalogers that haven't been documented +- Orphaned cataloger entries (cataloger was removed but YAML wasn't updated) +- Missing metadata/package type documentation +- Invalid capability field references +- Catalogers not using test helpers + +### Test Categories + +#### 1. Synchronization Tests + +**`TestCatalogersInSync`** +- Ensures all catalogers from `syft cataloger list` appear in YAML +- Ensures all catalogers in YAML exist in the binary +- Ensures all capabilities sections are filled (no TODOs/nulls) + +**Failure means:** you added/removed a cataloger but didn't regenerate packages.yaml + +**Fix:** run `go generate ./internal/capabilities` + +--- + +**`TestCapabilitiesAreUpToDate`** +- Runs only in CI +- Ensures regeneration succeeds +- Ensures generated file has no uncommitted changes + +**Failure means:** packages.yaml wasn't regenerated after code changes + +**Fix:** run `go generate ./internal/capabilities` and commit changes + +#### 2. Coverage Tests + +**`TestPackageTypeCoverage`** +- Ensures all types in `pkg.AllPkgs` are documented in some cataloger +- Allows exceptions via `packageTypeCoverageExceptions` + +**Failure means:** you added a new package type but no cataloger documents it + +**Fix:** either add a cataloger entry or add to exceptions if intentionally not supported + +--- + +**`TestMetadataTypeCoverage`** +- Ensures all types in `packagemetadata.AllTypes()` are documented +- Allows exceptions via `metadataTypeCoverageExceptions` + +**Failure means:** you added a new metadata type but no cataloger produces it + +**Fix:** either add metadata_types to a cataloger or add to exceptions + +--- + +**`TestMetadataTypesHaveJSONSchemaTypes`** +- Ensures metadata_types and json_schema_types are synchronized +- Validates every metadata type has a corresponding json_schema_type with correct conversion +- Checks both cataloger-level and parser-level types + +**Failure means:** metadata_types and json_schema_types are out of sync + +**Fix:** run `go generate ./internal/capabilities` to regenerate synchronized types + +#### 3. Structure Tests + +**`TestCatalogerStructure`** +- Validates generic vs custom cataloger structure rules: + - Generic catalogers must have parsers, no cataloger-level capabilities + - Custom catalogers must have detectors and cataloger-level capabilities +- Ensures ecosystem is always set + +**Failure means:** cataloger structure doesn't follow conventions + +**Fix:** correct the cataloger structure in packages.yaml + +--- + +**`TestCatalogerDataQuality`** +- Checks for duplicate cataloger names +- Validates detector formats for custom catalogers +- Checks for duplicate parser functions within catalogers + +**Failure means:** data integrity issue in packages.yaml + +**Fix:** remove duplicates or fix detector definitions + +#### 4. Config Tests + +**`TestConfigCompleteness`** +- Ensures all configs in the `configs:` section are referenced by a cataloger +- Ensures all cataloger config references exist +- Ensures all app-key references exist in `application:` section + +**Failure means:** orphaned config or broken reference + +**Fix:** remove unused configs or add missing entries + +--- + +**`TestAppConfigFieldsHaveDescriptions`** +- Ensures all application config fields have descriptions + +**Failure means:** missing `DescribeFields()` entry + +**Fix:** add description in the ecosystem's `DescribeFields()` method + +--- + +**`TestAppConfigKeyFormat`** +- Validates config keys follow format: `ecosystem.field-name` +- Ensures kebab-case (no underscores or spaces) + +**Failure means:** malformed config key + +**Fix:** rename the config key to follow conventions + +#### 5. Capability Tests + +**`TestCapabilityConfigFieldReferences`** +- Validates that config fields referenced in capability conditions actually exist +- Checks both cataloger-level and parser-level capabilities + +**Example failure:** +```yaml +capabilities: + - name: license + conditions: + - when: {NonExistentField: true} # ← this field doesn't exist in config struct + value: true +``` + +**Fix:** correct the field name to match the actual config struct + +--- + +**`TestCapabilityFieldNaming`** +- Ensures capability field names follow known patterns: + - `license` + - `dependency.depth` + - `dependency.edges` + - `dependency.kinds` + - `package_manager.files.listing` + - `package_manager.files.digests` + - `package_manager.package_integrity_hash` + +**Failure means:** typo in capability field name + +**Fix:** correct the typo or add new field to known list + +--- + +**`TestCapabilityValueTypes`** +- Validates capability values match expected types: + - Boolean fields: `license`, `package_manager.*` + - Array fields: `dependency.depth`, `dependency.kinds` + - String fields: `dependency.edges` + +**Example failure:** +```yaml +capabilities: + - name: license + default: "yes" # ← should be boolean true/false +``` + +**Fix:** use correct type for the field + +--- + +**`TestCapabilityEvidenceFieldReferences`** +- Validates that evidence references point to real struct fields +- Uses AST parsing to verify field paths exist + +**Example:** +```yaml +capabilities: + - name: package_manager.files.digests + default: true + evidence: + - AlpmDBEntry.Files[].Digests # ← validates this path exists +``` + +**Failure means:** typo in evidence reference or struct was changed + +**Fix:** correct the evidence reference or update after struct changes + +#### 6. Observations Test + +**`TestCatalogersHaveTestObservations`** +- Ensures all custom catalogers have test observations +- Optionally checks parsers (controlled by `requireParserObservations`) +- Allows exceptions via `observationExceptions` + +**Failure means:** cataloger tests aren't using pkgtest helpers + +**Fix:** update tests to use `pkgtest.CatalogTester`: +```go +pkgtest.NewCatalogTester(). + FromDirectory(t, "test-fixtures/my-fixture"). + TestCataloger(t, myCataloger) +``` + +### How to Fix Test Failures + +#### General Approach +1. **Read the test error message** - it usually tells you exactly what's wrong +2. **Check if regeneration needed** - most failures fixed by: `go generate ./internal/capabilities` +3. **Check for code/test changes** - did you add/modify a cataloger? +4. **Consider exceptions** - is this intentionally unsupported? + +#### Common Failures and Fixes + +| Failure | Most Likely Cause | Fix | +|---------|------------------|-----| +| Cataloger not in YAML | Added new cataloger | Regenerate | +| Orphaned YAML entry | Removed cataloger | Regenerate | +| Missing metadata type | Added type but no test observations | Add pkgtest usage or exception | +| Missing observations | Test not using pkgtest | Update test to use `CatalogTester` | +| Config field reference | Typo in capability condition | Fix field name in YAML | +| Incomplete capabilities | Missing capability definition | Add capabilities section to YAML | + +## Manual Maintenance + +### What Requires Manual Editing + +these fields in `packages.yaml` are **MANUAL** and must be maintained by hand: + +#### 1. Ecosystem Field (Cataloger Level) +```yaml +catalogers: + - ecosystem: golang # MANUAL - identify the ecosystem +``` + +**Guidelines:** use the ecosystem/language name (golang, python, java, rust, etc.) + +#### 2. Capabilities Sections + +**For Generic Catalogers** (parser level): +```yaml +parsers: + - function: parseGoMod + capabilities: # MANUAL + - name: license + default: false + conditions: + - when: {SearchRemoteLicenses: true} + value: true + comment: fetches licenses from proxy.golang.org + - name: dependency.depth + default: [direct, indirect] + - name: dependency.edges + default: complete +``` + +**For Custom Catalogers** (cataloger level): +```yaml +catalogers: + - name: linux-kernel-cataloger + type: custom + capabilities: # MANUAL + - name: license + default: true +``` + +#### 3. Detectors for Custom Catalogers + +**For most custom catalogers:** +```yaml +detectors: # MANUAL + - method: glob + criteria: + - '**/lib/modules/**/modules.builtin' + comment: kernel modules directory +``` + +**Exception:** `binary-classifier-cataloger` has AUTO-GENERATED detectors extracted from source + +#### 4. Detector Conditions + +when a detector should only be active with certain configuration: +```yaml +detectors: + - method: glob + criteria: ['**/*.zip'] + conditions: # MANUAL + - when: {IncludeZipFiles: true} + comment: ZIP detection requires explicit config +``` + +### Capabilities Format and Guidelines + +#### Standard Capability Fields + +**Boolean Fields:** +```yaml +- name: license + default: true # always available + # OR + default: false # never available + # OR + default: false + conditions: + - when: {SearchRemoteLicenses: true} + value: true + comment: requires network access to fetch licenses +``` + +**Array Fields (dependency.depth):** +```yaml +- name: dependency.depth + default: [direct] # only immediate dependencies + # OR + default: [direct, indirect] # full transitive closure + # OR + default: [] # no dependency information +``` + +**String Fields (dependency.edges):** +```yaml +- name: dependency.edges + default: "" # dependencies found but no edges between them + # OR + default: flat # single level of dependencies with edges to root only + # OR + default: reduced # transitive reduction (redundant edges removed) + # OR + default: complete # all relationships with accurate direct/indirect edges +``` + +**Array Fields (dependency.kinds):** +```yaml +- name: dependency.kinds + default: [runtime] # production dependencies only + # OR + default: [runtime, dev] # production and development + # OR + default: [runtime, dev, build, test] # all dependency types +``` + +#### Using Conditions + +Conditions allow capabilities to vary based on configuration values: + +```yaml +capabilities: + - name: license + default: false + conditions: + - when: {SearchLocalModCacheLicenses: true} + value: true + comment: searches for licenses in GOPATH mod cache + - when: {SearchRemoteLicenses: true} + value: true + comment: fetches licenses from proxy.golang.org + comment: license scanning requires configuration +``` + +**Rules:** +- Conditions are evaluated in array order (first match wins) +- Multiple fields WITHIN a `when` clause use AND logic (all must match) +- Multiple conditions in the array use OR logic (first matching condition) +- If no conditions match, the `default` value is used + +#### Adding Evidence + +evidence documents which struct fields provide the capability: + +```yaml +- name: package_manager.files.listing + default: true + evidence: + - AlpmDBEntry.Files + comment: file listings stored in Files array +``` + +**For nested fields:** +```yaml +evidence: + - CondaMetaPackage.PathsData.Paths +``` + +**For array element fields:** +```yaml +evidence: + - AlpmDBEntry.Files[].Digests +``` + +### Best Practices + +1. **Be specific in comments:** explain WHY, not just WHAT +2. **Document conditions clearly:** explain what configuration enables the capability +3. **Use evidence references:** helps verify capabilities are accurate +4. **Test after edits:** run `go test ./internal/capabilities/generate` to validate + +## Development Workflows + +### Adding a New Cataloger + +#### If Using `generic.NewCataloger()`: + +**What happens automatically:** +1. Generator discovers the cataloger via AST parsing +2. Extracts parsers, detectors, and patterns +3. Adds entry to packages.yaml with structure +4. Links to config (if constructor has config parameter) +5. Extracts metadata types from test-observations.json (if test uses pkgtest) + +**What you must do manually:** +1. Set the `ecosystem` field in packages.yaml +2. Add `capabilities` sections to each parser +3. Run `go generate ./internal/capabilities` +4. Commit the updated packages.yaml + +**Example workflow:** +```bash +# 1. Write cataloger code +vim syft/pkg/cataloger/mynew/cataloger.go + +# 2. Write tests using pkgtest (generates observations) +vim syft/pkg/cataloger/mynew/cataloger_test.go + +# 3. Run tests to generate observations +go test ./syft/pkg/cataloger/mynew + +# 4. Regenerate packages.yaml +go generate ./internal/capabilities + +# 5. Edit packages.yaml manually +vim internal/capabilities/packages.yaml +# - Set ecosystem field +# - Add capabilities sections + +# 6. Validate +go test ./internal/capabilities/generate + +# 7. Commit +git add internal/capabilities/packages.yaml +git add syft/pkg/cataloger/mynew/test-fixtures/test-observations.json +git commit +``` + +#### If Writing a Custom Cataloger: + +**What happens automatically:** +1. Generator creates entry with name and type +2. Extracts metadata types from test-observations.json + +**What you must do manually:** +1. Set `ecosystem` +2. Add `detectors` array with detection methods +3. Add `capabilities` section (cataloger level, not parser level) +4. Run `go generate ./internal/capabilities` + +### Modifying an Existing Cataloger + +#### If Changing Parser Detection Patterns: + +**Impact:** AUTO-GENERATED field, automatically updated + +**Workflow:** +```bash +# 1. Change the code +vim syft/pkg/cataloger/something/cataloger.go + +# 2. Regenerate +go generate ./internal/capabilities + +# 3. Review changes +git diff internal/capabilities/packages.yaml + +# 4. Commit +git add internal/capabilities/packages.yaml +git commit +``` + +#### If Changing Metadata Type: + +**Impact:** AUTO-GENERATED field, updated via test observations + +**Workflow:** +```bash +# 1. Change the code +vim syft/pkg/cataloger/something/parser.go + +# 2. Update tests (if needed) +vim syft/pkg/cataloger/something/parser_test.go + +# 3. Run tests to update observations +go test ./syft/pkg/cataloger/something + +# 4. Regenerate +go generate ./internal/capabilities + +# 5. Commit +git add internal/capabilities/packages.yaml +git add syft/pkg/cataloger/something/test-fixtures/test-observations.json +git commit +``` + +#### If Changing Capabilities: + +**Impact:** MANUAL field, preserved across regeneration + +**Workflow:** +```bash +# 1. Edit packages.yaml directly +vim internal/capabilities/packages.yaml + +# 2. Validate +go test ./internal/capabilities/generate + +# 3. Commit +git commit internal/capabilities/packages.yaml +``` + +### Adding New Capability Fields + +if you need to add a completely new capability field (e.g., `package_manager.build_tool_info`): + +**Steps:** +1. Add field name to known fields in `TestCapabilityFieldNaming` (completeness_test.go) +2. Add value type validation to `validateCapabilityValueType()` (completeness_test.go) +3. Update file header documentation in packages.yaml +4. Add the field to relevant catalogers in packages.yaml +5. Update any runtime code that consumes capabilities + +### When to Update Exceptions + +#### Add to `catalogerTypeOverrides`: +- Discovery incorrectly classifies a cataloger's type +- Example: cataloger uses generic framework but behaves like custom + +#### Add to `catalogerConfigExceptions`: +- Cataloger should not have config linked +- Example: simple catalogers with no configuration + +#### Add to `catalogerConfigOverrides`: +- Automatic config linking fails +- Cataloger in a subpackage or unusual structure +- Example: dotnet catalogers split across multiple packages + +#### Add to `metadataTypeCoverageExceptions`: +- Metadata type is deprecated or intentionally unused +- Example: `MicrosoftKbPatch` (special case type) + +#### Add to `packageTypeCoverageExceptions`: +- Package type is deprecated or special case +- Example: `JenkinsPluginPkg`, `KbPkg` + +#### Add to `observationExceptions`: +- Cataloger lacks reliable test fixtures (e.g., requires specific binaries) +- Cataloger produces relationships but they're not standard dependencies +- Example: `graalvm-native-image-cataloger` (requires native images) + +## File Inventory + +### Core Generation + +- **`main.go`**: entry point, orchestrates regeneration, prints status messages +- **`merge.go`**: core merging logic, preserves manual sections while updating auto-generated +- **`io.go`**: YAML reading/writing with comment preservation using gopkg.in/yaml.v3 + +### Discovery + +- **`discover_catalogers.go`**: AST parsing to discover generic catalogers and parsers from source code +- **`discover_cataloger_configs.go`**: AST parsing to discover cataloger config structs +- **`discover_app_config.go`**: AST parsing to discover application-level config from options package +- **`cataloger_config_linking.go`**: links catalogers to config structs by analyzing constructors +- **`discover_metadata.go`**: reads test-observations.json files to get metadata/package types + +### Validation & Utilities + +- **`completeness_test.go`**: comprehensive test suite ensuring packages.yaml is complete and synced +- **`cataloger_names.go`**: helper to get all cataloger names from syft task factories +- **`metadata_check.go`**: validates metadata and package type coverage + +### Tests + +- **`config_discovery_test.go`**: tests for config discovery +- **`cataloger_config_linking_test.go`**: tests for config linking +- **`detector_validation_test.go`**: tests for detector validation +- **`merge_test.go`**: tests for merge logic + +## Troubleshooting + +### "Cataloger X not found in packages.yaml" + +**Cause:** you added a new cataloger but didn't regenerate packages.yaml + +**Fix:** +```bash +go generate ./internal/capabilities +``` + +### "Cataloger X in YAML but not in binary" + +**Cause:** you removed a cataloger but didn't regenerate + +**Fix:** +```bash +go generate ./internal/capabilities +# Review the diff - the cataloger entry should be removed +``` + +### "Metadata type X not represented in any cataloger" + +**Cause:** you added a new metadata type but: +- No cataloger produces it yet, OR +- Tests don't use pkgtest helpers (so observations aren't generated) + +**Fix Option 1 - Add test observations:** +```go +// Update test to use pkgtest +pkgtest.NewCatalogTester(). + FromDirectory(t, "test-fixtures/my-fixture"). + TestCataloger(t, myCataloger) + +// Run tests +go test ./syft/pkg/cataloger/mypackage + +// Regenerate +go generate ./internal/capabilities +``` + +**Fix Option 2 - Add exception (if intentionally unused):** +```go +// completeness_test.go +var metadataTypeCoverageExceptions = strset.New( + reflect.TypeOf(pkg.MyNewType{}).Name(), +) +``` + +### "Parser X has no test observations" + +**Cause:** test doesn't use pkgtest helpers + +**Fix:** +```go +// Before: +func TestMyParser(t *testing.T) { + // manual test code +} + +// After: +func TestMyParser(t *testing.T) { + cataloger := NewMyCataloger() + pkgtest.NewCatalogTester(). + FromDirectory(t, "test-fixtures/my-fixture"). + TestCataloger(t, cataloger) +} +``` + +### "Config field X not found in struct Y" + +**Cause:** capability condition references a non-existent config field + +**Fix:** edit packages.yaml and correct the field name: +```yaml +# Before: +conditions: + - when: {SerachRemoteLicenses: true} # typo! + +# After: +conditions: + - when: {SearchRemoteLicenses: true} +``` + +### "Evidence field X.Y not found in struct X" + +**Cause:** +- Typo in evidence reference, OR +- Struct was refactored and field moved/renamed + +**Fix:** edit packages.yaml and correct the evidence reference: +```yaml +# Before: +evidence: + - AlpmDBEntry.FileListing # wrong field name + +# After: +evidence: + - AlpmDBEntry.Files +``` + +### "packages.yaml has uncommitted changes after regeneration" + +**Cause:** packages.yaml is out of date (usually caught in CI) + +**Fix:** +```bash +go generate ./internal/capabilities +git add internal/capabilities/packages.yaml +git commit -m "chore: regenerate capabilities" +``` + +### Generator Fails with "struct X not found" + +**Cause:** config linking trying to link to a non-existent struct + +**Fix Option 1 - Add override:** +```go +// merge.go +var catalogerConfigOverrides = map[string]string{ + "my-cataloger": "mypackage.MyConfig", +} +``` + +**Fix Option 2 - Add exception:** +```go +// merge.go +var catalogerConfigExceptions = strset.New( + "my-cataloger", // doesn't use config +) +``` + +### "Parser capabilities must be defined" + +**Cause:** parser in packages.yaml has no capabilities section + +**Fix:** add capabilities to the parser: +```yaml +parsers: + - function: parseMyFormat + capabilities: + - name: license + default: false + - name: dependency.depth + default: [] + # ... (add all required capability fields) +``` + +### Understanding Error Messages + +most test failures include detailed guidance. Look for: +- **List of missing items:** tells you exactly what to add/remove +- **Suggestions:** usually includes the command to fix (e.g., "Run 'go generate ./internal/capabilities'") +- **File locations:** tells you which file to edit + +**General debugging approach:** +1. Read the full error message +2. Check if it's fixed by regeneration +3. Check for recent code/test changes +4. Consider if it should be an exception +5. Ask for help if still stuck (include full error message) + +--- + +## Questions or Issues? + +if you encounter problems not covered here: +1. Check test error messages (they're usually quite helpful) +2. Look at recent commits for examples of similar changes +3. Ask in the team chat with the full error message diff --git a/internal/capabilities/generate/cataloger_config_linking.go b/internal/capabilities/generate/cataloger_config_linking.go index b9dadc698..41b5b5db8 100644 --- a/internal/capabilities/generate/cataloger_config_linking.go +++ b/internal/capabilities/generate/cataloger_config_linking.go @@ -1,3 +1,4 @@ +// this file links catalogers to their configuration structs by analyzing constructor function signatures to determine which config struct each cataloger uses. package main import ( @@ -16,8 +17,18 @@ import ( // Returns empty string for catalogers that don't take a config parameter. func LinkCatalogersToConfigs(repoRoot string) (map[string]string, error) { catalogerRoot := filepath.Join(repoRoot, "syft", "pkg", "cataloger") + return LinkCatalogersToConfigsFromPath(catalogerRoot, repoRoot) +} - // find all .go files under syft/pkg/cataloger/ recursively +// LinkCatalogersToConfigsFromPath analyzes cataloger constructor functions in the specified directory +// to determine which config struct each cataloger uses. This is the parameterized version that allows +// testing with custom fixture directories. +// Returns a map where key is the cataloger name (e.g., "go-module-binary-cataloger") +// and value is the config struct reference (e.g., "golang.CatalogerConfig"). +// Returns empty string for catalogers that don't take a config parameter. +// The baseRoot parameter is used for relative path calculation to determine package names. +func LinkCatalogersToConfigsFromPath(catalogerRoot, baseRoot string) (map[string]string, error) { + // find all .go files under the cataloger root recursively var files []string err := filepath.Walk(catalogerRoot, func(path string, info os.FileInfo, err error) error { if err != nil { @@ -35,7 +46,7 @@ func LinkCatalogersToConfigs(repoRoot string) (map[string]string, error) { linkages := make(map[string]string) for _, file := range files { - links, err := linkCatalogersInFile(file, repoRoot) + links, err := linkCatalogersInFile(file, baseRoot) if err != nil { return nil, fmt.Errorf("failed to parse %s: %w", file, err) } @@ -324,7 +335,5 @@ func looksLikeConfigType(typeName string) bool { structName := parts[len(parts)-1] // check for common config patterns - return strings.Contains(structName, "Config") || - strings.HasSuffix(structName, "Config") || - strings.HasPrefix(structName, "Config") + return strings.Contains(structName, "Config") } diff --git a/internal/capabilities/generate/cataloger_config_linking_test.go b/internal/capabilities/generate/cataloger_config_linking_test.go index 22621a8c0..aae014686 100644 --- a/internal/capabilities/generate/cataloger_config_linking_test.go +++ b/internal/capabilities/generate/cataloger_config_linking_test.go @@ -1,12 +1,20 @@ package main import ( + "go/ast" + "go/parser" + "go/token" + "path/filepath" "testing" "github.com/stretchr/testify/require" ) func TestLinkCatalogersToConfigs(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test in short mode") + } + repoRoot, err := RepoRoot() require.NoError(t, err) @@ -124,7 +132,107 @@ func TestLinkCatalogersToConfigs(t *testing.T) { require.GreaterOrEqual(t, len(withConfig), 6, "should find at least 6 catalogers with configs") } +func TestLinkCatalogersToConfigsFromPath(t *testing.T) { + tests := []struct { + name string + fixturePath string + expectedLinkages map[string]string + wantErr require.ErrorAssertionFunc + }{ + { + name: "simple generic cataloger with local config", + fixturePath: "simple-generic-cataloger", + expectedLinkages: map[string]string{ + "go-module-cataloger": "golang.CatalogerConfig", + }, + }, + { + name: "cataloger name from constant", + fixturePath: "cataloger-with-constant", + expectedLinkages: map[string]string{ + "python-package-cataloger": "python.CatalogerConfig", + }, + }, + { + name: "custom cataloger with Name() in same file", + fixturePath: "custom-cataloger-same-file", + expectedLinkages: map[string]string{ + "java-pom-cataloger": "java.ArchiveCatalogerConfig", + }, + }, + { + name: "custom cataloger with Name() in different file - not detected", + fixturePath: "custom-cataloger-different-file", + expectedLinkages: map[string]string{ + // empty - current limitation, cannot detect cross-file Names + }, + }, + { + name: "cataloger without config parameter", + fixturePath: "no-config-cataloger", + expectedLinkages: map[string]string{ + "javascript-cataloger": "", // empty string means no config + }, + }, + { + name: "imported config type", + fixturePath: "imported-config-type", + expectedLinkages: map[string]string{ + "linux-kernel-cataloger": "kernel.LinuxKernelCatalogerConfig", + }, + }, + { + name: "non-config first parameter", + fixturePath: "non-config-first-param", + expectedLinkages: map[string]string{ + "binary-cataloger": "", // Parser not a config type + }, + }, + { + name: "conflicting cataloger names", + fixturePath: "conflicting-names", + wantErr: require.Error, + }, + { + name: "mixed naming patterns", + fixturePath: "mixed-naming-patterns", + expectedLinkages: map[string]string{ + "ruby-cataloger": "ruby.Config", + }, + }, + { + name: "selector expression config", + fixturePath: "selector-expression-config", + expectedLinkages: map[string]string{ + "rust-cataloger": "cargo.CatalogerConfig", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + + fixtureDir := filepath.Join("test-fixtures", "config-linking", tt.fixturePath) + linkages, err := LinkCatalogersToConfigsFromPath(fixtureDir, fixtureDir) + tt.wantErr(t, err) + + if err != nil { + return + } + + require.Equal(t, tt.expectedLinkages, linkages) + }) + } +} + func TestExtractConfigTypeName(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test in short mode") + } + tests := []struct { name string catalogerName string @@ -240,3 +348,162 @@ func TestLooksLikeConfigType(t *testing.T) { }) } } + +func TestExtractReceiverTypeName(t *testing.T) { + tests := []struct { + name string + receiver string // receiver code snippet + want string + }{ + { + name: "value receiver", + receiver: "func (c Cataloger) Name() string { return \"\" }", + want: "Cataloger", + }, + { + name: "pointer receiver", + receiver: "func (c *Cataloger) Name() string { return \"\" }", + want: "Cataloger", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // parse the function to get the receiver type + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, "", "package test\n"+tt.receiver, 0) + require.NoError(t, err) + + // extract the function declaration + require.Len(t, file.Decls, 1) + funcDecl, ok := file.Decls[0].(*ast.FuncDecl) + require.True(t, ok) + + // get receiver type + var recvType ast.Expr + if funcDecl.Recv != nil && len(funcDecl.Recv.List) > 0 { + recvType = funcDecl.Recv.List[0].Type + } + + got := extractReceiverTypeName(recvType) + require.Equal(t, tt.want, got) + }) + } +} + +func TestExtractConfigTypeNameHelper(t *testing.T) { + tests := []struct { + name string + funcSig string // function signature with parameter + localPackageName string + want string + }{ + { + name: "local type", + funcSig: "func New(cfg CatalogerConfig) pkg.Cataloger { return nil }", + localPackageName: "python", + want: "python.CatalogerConfig", + }, + { + name: "imported type", + funcSig: "func New(cfg java.ArchiveCatalogerConfig) pkg.Cataloger { return nil }", + localPackageName: "python", + want: "java.ArchiveCatalogerConfig", + }, + { + name: "imported type - kernel package", + funcSig: "func New(cfg kernel.LinuxKernelCatalogerConfig) pkg.Cataloger { return nil }", + localPackageName: "other", + want: "kernel.LinuxKernelCatalogerConfig", + }, + { + name: "no parameters", + funcSig: "func New() pkg.Cataloger { return nil }", + localPackageName: "python", + want: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // parse the function to get parameter type + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, "", "package test\n"+tt.funcSig, 0) + require.NoError(t, err) + + // extract the function declaration + require.Len(t, file.Decls, 1) + funcDecl, ok := file.Decls[0].(*ast.FuncDecl) + require.True(t, ok) + + // get first parameter type + var paramType ast.Expr + if funcDecl.Type.Params != nil && len(funcDecl.Type.Params.List) > 0 { + paramType = funcDecl.Type.Params.List[0].Type + } + + got := extractConfigTypeName(paramType, tt.localPackageName) + require.Equal(t, tt.want, got) + }) + } +} + +func TestExtractReturnTypeName(t *testing.T) { + tests := []struct { + name string + funcDef string // complete function definition + want string + }{ + { + name: "pointer to composite literal", + funcDef: `func New() pkg.Cataloger { + return &javaCataloger{name: "test"} + }`, + want: "javaCataloger", + }, + { + name: "composite literal", + funcDef: `func New() pkg.Cataloger { + return pythonCataloger{name: "test"} + }`, + want: "pythonCataloger", + }, + { + name: "variable return", + funcDef: `func New() pkg.Cataloger { + c := &Cataloger{} + return c + }`, + want: "", + }, + { + name: "nil return", + funcDef: `func New() pkg.Cataloger { + return nil + }`, + want: "", + }, + { + name: "empty function body", + funcDef: `func New() pkg.Cataloger {}`, + want: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // parse the function + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, "", "package test\n"+tt.funcDef, 0) + require.NoError(t, err) + + // extract the function declaration + require.Len(t, file.Decls, 1) + funcDecl, ok := file.Decls[0].(*ast.FuncDecl) + require.True(t, ok) + + got := extractReturnTypeName(funcDecl) + require.Equal(t, tt.want, got) + }) + } +} diff --git a/internal/capabilities/generate/cataloger_names.go b/internal/capabilities/generate/cataloger_names.go index 9f1bfa432..85c31056a 100644 --- a/internal/capabilities/generate/cataloger_names.go +++ b/internal/capabilities/generate/cataloger_names.go @@ -1,3 +1,4 @@ +// this file retrieves the canonical list of cataloger names and their selectors from syft's task factories. package main import ( diff --git a/internal/capabilities/generate/completeness_test.go b/internal/capabilities/generate/completeness_test.go index 193165359..ba11708d7 100644 --- a/internal/capabilities/generate/completeness_test.go +++ b/internal/capabilities/generate/completeness_test.go @@ -1,3 +1,4 @@ +// this file verifies the claims made in packages.yaml against test observations and source code, ensuring cataloger capabilities are accurate and complete. package main import ( @@ -344,8 +345,8 @@ func TestCatalogerDataQuality(t *testing.T) { }) } -// TestRegenerateCapabilitiesDoesNotFail verifies that regeneration runs successfully -func TestRegenerateCapabilitiesDoesNotFail(t *testing.T) { +// TestCapabilitiesAreUpToDate verifies that regeneration runs successfully +func TestCapabilitiesAreUpToDate(t *testing.T) { if os.Getenv("CI") == "" { t.Skip("skipping regeneration test in local environment") } @@ -366,9 +367,9 @@ func TestRegenerateCapabilitiesDoesNotFail(t *testing.T) { require.NoError(t, err, "packages.yaml has uncommitted changes after regeneration. Run 'go generate ./internal/capabilities' locally and commit the changes.") } -// TestAllCatalogersHaveObservations verifies that all catalogers have test observations, +// TestCatalogersHaveTestObservations verifies that all catalogers have test observations, // ensuring they are using the pkgtest helpers -func TestAllCatalogersHaveObservations(t *testing.T) { +func TestCatalogersHaveTestObservations(t *testing.T) { repoRoot, err := RepoRoot() require.NoError(t, err) @@ -1303,3 +1304,70 @@ func TestCapabilityEvidenceFieldReferences(t *testing.T) { }) } } + +// TestDetectorConfigFieldReferences validates that config field names referenced in detector +// conditions actually exist in the cataloger's config struct +func TestDetectorConfigFieldReferences(t *testing.T) { + repoRoot, err := RepoRoot() + require.NoError(t, err) + + // load the packages.yaml + doc, _, err := loadCapabilities(filepath.Join(repoRoot, "internal/capabilities/packages.yaml")) + require.NoError(t, err) + + // collect all validation errors before failing + var errors []string + + // check each cataloger's detectors + for _, cataloger := range doc.Catalogers { + if cataloger.Type != "custom" { + continue // only custom catalogers have detectors + } + + for detectorIdx, detector := range cataloger.Detectors { + // if detector has no conditions, skip validation + if len(detector.Conditions) == 0 { + continue + } + + // detector has conditions - cataloger must have a config + if cataloger.Config == "" { + errors = append(errors, + fmt.Sprintf("Cataloger %q detector %d has conditions but cataloger has no config struct", + cataloger.Name, detectorIdx)) + continue + } + + // load the cataloger's config struct + configEntry, exists := doc.Configs[cataloger.Config] + if !exists { + errors = append(errors, + fmt.Sprintf("Cataloger %q references config %q which doesn't exist", + cataloger.Name, cataloger.Config)) + continue + } + + // build a set of valid config field names + validFields := make(map[string]bool) + for _, field := range configEntry.Fields { + validFields[field.Key] = true + } + + // validate each condition + for condIdx, condition := range detector.Conditions { + for fieldName := range condition.When { + if !validFields[fieldName] { + errors = append(errors, + fmt.Sprintf("Cataloger %q detector %d condition %d references config field %q which doesn't exist in config struct %q", + cataloger.Name, detectorIdx, condIdx, fieldName, cataloger.Config)) + } + } + } + } + } + + // report all errors at once + if len(errors) > 0 { + require.Fail(t, "Detector config field reference validation failed", strings.Join(errors, "\n")) + } +} diff --git a/internal/capabilities/generate/config_discovery_test.go b/internal/capabilities/generate/config_discovery_test.go deleted file mode 100644 index 556df8727..000000000 --- a/internal/capabilities/generate/config_discovery_test.go +++ /dev/null @@ -1,133 +0,0 @@ -package main - -import ( - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/stretchr/testify/require" -) - -func TestDiscoverConfigs(t *testing.T) { - repoRoot, err := RepoRoot() - require.NoError(t, err) - - configs, err := DiscoverConfigs(repoRoot) - require.NoError(t, err) - - // verify we discovered multiple config structs - require.NotEmpty(t, configs, "should discover at least one config struct") - - // check for known config structs that have app-config annotations - expectedConfigs := []string{ - "golang.CatalogerConfig", - "golang.MainModuleVersionConfig", - "java.ArchiveCatalogerConfig", - "python.CatalogerConfig", - "dotnet.CatalogerConfig", - "kernel.LinuxKernelCatalogerConfig", - "javascript.CatalogerConfig", - "nix.Config", - } - - for _, expected := range expectedConfigs { - config, ok := configs[expected] - require.True(t, ok, "should discover config: %s", expected) - require.NotEmpty(t, config.Fields, "config %s should have fields", expected) - require.Equal(t, expected, config.PackageName+"."+config.StructName) - } - - // verify golang.CatalogerConfig fields - golangConfig := configs["golang.CatalogerConfig"] - require.Equal(t, "golang", golangConfig.PackageName) - require.Equal(t, "CatalogerConfig", golangConfig.StructName) - require.NotEmpty(t, golangConfig.Fields) - - // check for specific field - var foundSearchLocalModCache bool - for _, field := range golangConfig.Fields { - if field.Name == "SearchLocalModCacheLicenses" { - foundSearchLocalModCache = true - require.Equal(t, "bool", field.Type) - require.Equal(t, "golang.search-local-mod-cache-licenses", field.AppKey) - require.NotEmpty(t, field.Description) - require.Contains(t, field.Description, "searching for go package licenses") - } - } - require.True(t, foundSearchLocalModCache, "should find SearchLocalModCacheLicenses field") - - // verify nested config struct - golangMainModuleConfig := configs["golang.MainModuleVersionConfig"] - require.Equal(t, "golang", golangMainModuleConfig.PackageName) - require.Equal(t, "MainModuleVersionConfig", golangMainModuleConfig.StructName) - require.NotEmpty(t, golangMainModuleConfig.Fields) - - // check for specific nested field - var foundFromLDFlags bool - for _, field := range golangMainModuleConfig.Fields { - if field.Name == "FromLDFlags" { - foundFromLDFlags = true - require.Equal(t, "bool", field.Type) - require.Equal(t, "golang.main-module-version.from-ld-flags", field.AppKey) - require.NotEmpty(t, field.Description) - } - } - require.True(t, foundFromLDFlags, "should find FromLDFlags field in MainModuleVersionConfig") - - // print summary for manual inspection - t.Logf("Discovered %d config structs:", len(configs)) - for key, config := range configs { - t.Logf(" %s: %d fields", key, len(config.Fields)) - for _, field := range config.Fields { - t.Logf(" - %s (%s): %s", field.Name, field.Type, field.AppKey) - if diff := cmp.Diff("", field.Description); diff == "" { - t.Logf(" WARNING: field %s has no description", field.Name) - } - } - } -} - -func TestExtractPackageNameFromPath(t *testing.T) { - tests := []struct { - name string - filePath string - want string - }{ - { - name: "golang package", - filePath: "syft/pkg/cataloger/golang/config.go", - want: "golang", - }, - { - name: "java package", - filePath: "syft/pkg/cataloger/java/config.go", - want: "java", - }, - { - name: "python cataloger", - filePath: "syft/pkg/cataloger/python/cataloger.go", - want: "python", - }, - { - name: "kernel cataloger", - filePath: "syft/pkg/cataloger/kernel/cataloger.go", - want: "kernel", - }, - { - name: "binary classifier", - filePath: "syft/pkg/cataloger/binary/classifier_cataloger.go", - want: "binary", - }, - { - name: "not a cataloger path", - filePath: "syft/pkg/other/file.go", - want: "", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := extractPackageNameFromPath(tt.filePath) - require.Equal(t, tt.want, got) - }) - } -} diff --git a/internal/capabilities/generate/detector_validation_test.go b/internal/capabilities/generate/detector_validation_test.go deleted file mode 100644 index 63ce1a6cb..000000000 --- a/internal/capabilities/generate/detector_validation_test.go +++ /dev/null @@ -1,77 +0,0 @@ -package main - -import ( - "fmt" - "path/filepath" - "strings" - "testing" - - "github.com/stretchr/testify/require" -) - -// TestDetectorConfigFieldReferences validates that config field names referenced in detector -// conditions actually exist in the cataloger's config struct -func TestDetectorConfigFieldReferences(t *testing.T) { - repoRoot, err := RepoRoot() - require.NoError(t, err) - - // load the packages.yaml - doc, _, err := loadCapabilities(filepath.Join(repoRoot, "internal/capabilities/packages.yaml")) - require.NoError(t, err) - - // collect all validation errors before failing - var errors []string - - // check each cataloger's detectors - for _, cataloger := range doc.Catalogers { - if cataloger.Type != "custom" { - continue // only custom catalogers have detectors - } - - for detectorIdx, detector := range cataloger.Detectors { - // if detector has no conditions, skip validation - if len(detector.Conditions) == 0 { - continue - } - - // detector has conditions - cataloger must have a config - if cataloger.Config == "" { - errors = append(errors, - fmt.Sprintf("Cataloger %q detector %d has conditions but cataloger has no config struct", - cataloger.Name, detectorIdx)) - continue - } - - // load the cataloger's config struct - configEntry, exists := doc.Configs[cataloger.Config] - if !exists { - errors = append(errors, - fmt.Sprintf("Cataloger %q references config %q which doesn't exist", - cataloger.Name, cataloger.Config)) - continue - } - - // build a set of valid config field names - validFields := make(map[string]bool) - for _, field := range configEntry.Fields { - validFields[field.Key] = true - } - - // validate each condition - for condIdx, condition := range detector.Conditions { - for fieldName := range condition.When { - if !validFields[fieldName] { - errors = append(errors, - fmt.Sprintf("Cataloger %q detector %d condition %d references config field %q which doesn't exist in config struct %q", - cataloger.Name, detectorIdx, condIdx, fieldName, cataloger.Config)) - } - } - } - } - } - - // report all errors at once - if len(errors) > 0 { - require.Fail(t, "Detector config field reference validation failed", strings.Join(errors, "\n")) - } -} diff --git a/internal/capabilities/generate/app_config_discovery.go b/internal/capabilities/generate/discover_app_config.go similarity index 65% rename from internal/capabilities/generate/app_config_discovery.go rename to internal/capabilities/generate/discover_app_config.go index 6a9c07e13..8c14b7a02 100644 --- a/internal/capabilities/generate/app_config_discovery.go +++ b/internal/capabilities/generate/discover_app_config.go @@ -1,3 +1,4 @@ +// this file discovers application-level configuration from cmd/syft/internal/options/ by parsing ecosystem config structs, their DescribeFields() methods, and default value functions. package main import ( @@ -5,6 +6,7 @@ import ( "go/ast" "go/parser" "go/token" + "os" "path/filepath" "reflect" "sort" @@ -18,30 +20,214 @@ type AppConfigField struct { DefaultValue interface{} // extracted from Default*() functions } +// extractEcosystemConfigFieldsFromCatalog parses catalog.go and extracts the ecosystem-specific +// config fields from the Catalog struct, returning a map of struct type name to YAML tag +func extractEcosystemConfigFieldsFromCatalog(catalogFilePath string) (map[string]string, error) { + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, catalogFilePath, nil, parser.ParseComments) + if err != nil { + return nil, fmt.Errorf("failed to parse catalog.go: %w", err) + } + + // find the Catalog struct + catalogStruct := findConfigStruct(f, "Catalog") + if catalogStruct == nil { + return nil, fmt.Errorf("catalog struct not found in %s", catalogFilePath) + } + + // extract ecosystem config fields from the Catalog struct + // these are between the "ecosystem-specific cataloger configuration" comment and the next section + ecosystemConfigs := make(map[string]string) + inEcosystemSection := false + + for _, field := range catalogStruct.Fields.List { + // check for ecosystem section marker comment + if field.Doc != nil { + for _, comment := range field.Doc.List { + if strings.Contains(comment.Text, "ecosystem-specific cataloger configuration") { + inEcosystemSection = true + break + } + // check if we've hit the next section (any comment marking a new section) + if inEcosystemSection && strings.HasPrefix(comment.Text, "// configuration for") { + inEcosystemSection = false + break + } + } + } + + if !inEcosystemSection { + continue + } + + // extract field type and yaml tag + if len(field.Names) == 0 { + continue + } + + // get the type name (e.g., "golangConfig") + var typeName string + if ident, ok := field.Type.(*ast.Ident); ok { + typeName = ident.Name + } else { + continue + } + + // get the yaml tag + yamlTag := extractYAMLTag(field) + if yamlTag == "" || yamlTag == "-" { + continue + } + + ecosystemConfigs[typeName] = yamlTag + } + + return ecosystemConfigs, nil +} + +// findFilesWithCatalogerImports scans the options directory for .go files that import +// from "github.com/anchore/syft/syft/pkg/cataloger/*" packages +func findFilesWithCatalogerImports(optionsDir string) ([]string, error) { + entries, err := os.ReadDir(optionsDir) + if err != nil { + return nil, fmt.Errorf("failed to read options directory: %w", err) + } + + var candidateFiles []string + for _, entry := range entries { + if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".go") { + continue + } + + filePath := filepath.Join(optionsDir, entry.Name()) + + // parse the file to check imports + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, filePath, nil, parser.ImportsOnly) + if err != nil { + continue // skip files that can't be parsed + } + + // check if file imports from cataloger packages + for _, imp := range f.Imports { + importPath := strings.Trim(imp.Path.Value, `"`) + if strings.HasPrefix(importPath, "github.com/anchore/syft/syft/pkg/cataloger/") { + candidateFiles = append(candidateFiles, filePath) + break + } + } + } + + return candidateFiles, nil +} + +// extractConfigStructTypes parses a Go file and returns all struct type names defined in it +func extractConfigStructTypes(filePath string) ([]string, error) { + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, filePath, nil, 0) + if err != nil { + return nil, fmt.Errorf("failed to parse %s: %w", filePath, err) + } + + var structTypes []string + for _, decl := range f.Decls { + genDecl, ok := decl.(*ast.GenDecl) + if !ok || genDecl.Tok != token.TYPE { + continue + } + + for _, spec := range genDecl.Specs { + typeSpec, ok := spec.(*ast.TypeSpec) + if !ok { + continue + } + + // check if it's a struct type + if _, ok := typeSpec.Type.(*ast.StructType); ok { + structTypes = append(structTypes, typeSpec.Name.Name) + } + } + } + + return structTypes, nil +} + +// discoverCatalogerConfigs discovers cataloger config files by: +// 1. Finding files with cataloger imports in options directory +// 2. Extracting ecosystem config fields from Catalog struct +// 3. Matching file structs against Catalog fields +// Returns a map of file path to top-level YAML key +func discoverCatalogerConfigs(repoRoot string) (map[string]string, error) { + optionsDir := filepath.Join(repoRoot, "cmd", "syft", "internal", "options") + catalogFilePath := filepath.Join(optionsDir, "catalog.go") + + // get ecosystem config fields from Catalog struct + ecosystemConfigs, err := extractEcosystemConfigFieldsFromCatalog(catalogFilePath) + if err != nil { + return nil, err + } + + if len(ecosystemConfigs) == 0 { + return nil, fmt.Errorf("no ecosystem config fields found in Catalog struct") + } + + // find files with cataloger imports + candidateFiles, err := findFilesWithCatalogerImports(optionsDir) + if err != nil { + return nil, err + } + + // match candidate files against Catalog ecosystem fields + fileToKey := make(map[string]string) + foundStructs := make(map[string]bool) + + for _, filePath := range candidateFiles { + structTypes, err := extractConfigStructTypes(filePath) + if err != nil { + return nil, err + } + + // check if any struct type matches an ecosystem config + for _, structType := range structTypes { + if yamlKey, exists := ecosystemConfigs[structType]; exists { + fileToKey[filePath] = yamlKey + foundStructs[structType] = true + break + } + } + } + + // validate that all ecosystem configs were found + var missingConfigs []string + for structType := range ecosystemConfigs { + if !foundStructs[structType] { + missingConfigs = append(missingConfigs, structType) + } + } + + if len(missingConfigs) > 0 { + sort.Strings(missingConfigs) + return nil, fmt.Errorf("could not find files for ecosystem configs: %s", strings.Join(missingConfigs, ", ")) + } + + return fileToKey, nil +} + // DiscoverAppConfigs discovers all application-level cataloger configuration fields // from the options package func DiscoverAppConfigs(repoRoot string) ([]AppConfigField, error) { - optionsDir := filepath.Join(repoRoot, "cmd", "syft", "internal", "options") - - // parse all .go files in the options directory to extract configuration fields - configs := []AppConfigField{} - - // define the config files we want to parse with their top-level keys - configFiles := map[string]string{ - "dotnet.go": "dotnet", - "golang.go": "golang", - "java.go": "java", - "javascript.go": "javascript", - "linux_kernel.go": "linux-kernel", - "nix.go": "nix", - "python.go": "python", + // discover cataloger config files dynamically + configFiles, err := discoverCatalogerConfigs(repoRoot) + if err != nil { + return nil, fmt.Errorf("failed to discover cataloger configs: %w", err) } - for filename, topLevelKey := range configFiles { - filePath := filepath.Join(optionsDir, filename) + // extract configuration fields from each discovered file + var configs []AppConfigField + for filePath, topLevelKey := range configFiles { fields, err := extractAppConfigFields(filePath, topLevelKey) if err != nil { - return nil, fmt.Errorf("failed to extract config from %s: %w", filename, err) + return nil, fmt.Errorf("failed to extract config from %s: %w", filePath, err) } configs = append(configs, fields...) } diff --git a/internal/capabilities/generate/discover_app_config_test.go b/internal/capabilities/generate/discover_app_config_test.go new file mode 100644 index 000000000..de353571f --- /dev/null +++ b/internal/capabilities/generate/discover_app_config_test.go @@ -0,0 +1,413 @@ +package main + +import ( + "go/ast" + "go/parser" + "go/token" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestDetermineExpectedConfigName(t *testing.T) { + tests := []struct { + name string + topLevelKey string + wantName string + }{ + { + name: "linux-kernel special case", + topLevelKey: "linux-kernel", + wantName: "linuxKernelConfig", + }, + { + name: "javascript special case", + topLevelKey: "javascript", + wantName: "javaScriptConfig", + }, + { + name: "standard config golang", + topLevelKey: "golang", + wantName: "golangConfig", + }, + { + name: "standard config python", + topLevelKey: "python", + wantName: "pythonConfig", + }, + { + name: "standard config java", + topLevelKey: "java", + wantName: "javaConfig", + }, + { + name: "standard config dotnet", + topLevelKey: "dotnet", + wantName: "dotnetConfig", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := determineExpectedConfigName(tt.topLevelKey) + require.Equal(t, tt.wantName, got) + }) + } +} + +func TestCleanDescription(t *testing.T) { + tests := []struct { + name string + desc string + want string + }{ + { + name: "single line no extra whitespace", + desc: "this is a description", + want: "this is a description", + }, + { + name: "multiple spaces collapsed", + desc: "this has multiple spaces", + want: "this has multiple spaces", + }, + { + name: "multi-line description", + desc: "this is a\nmulti-line\ndescription", + want: "this is a multi-line description", + }, + { + name: "leading and trailing whitespace", + desc: " \t description with spaces \t ", + want: "description with spaces", + }, + { + name: "tabs and newlines", + desc: "description\t\twith\n\ttabs", + want: "description with tabs", + }, + { + name: "empty string", + desc: "", + want: "", + }, + { + name: "only whitespace", + desc: " \n\t ", + want: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := cleanDescription(tt.desc) + require.Equal(t, tt.want, got) + }) + } +} + +func TestExtractYAMLTag(t *testing.T) { + tests := []struct { + name string + tagStr string + want string + }{ + { + name: "simple yaml tag", + tagStr: "`yaml:\"field-name\"`", + want: "field-name", + }, + { + name: "yaml tag with omitempty", + tagStr: "`yaml:\"field-name,omitempty\"`", + want: "field-name", + }, + { + name: "yaml tag with multiple options", + tagStr: "`yaml:\"field-name,omitempty,inline\"`", + want: "field-name", + }, + { + name: "yaml tag dash means skip", + tagStr: "`yaml:\"-\"`", + want: "-", + }, + { + name: "no yaml tag", + tagStr: "`json:\"field-name\"`", + want: "", + }, + { + name: "empty tag", + tagStr: "", + want: "", + }, + { + name: "yaml tag with json tag", + tagStr: "`yaml:\"yaml-name\" json:\"json-name\"`", + want: "yaml-name", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // construct a minimal ast.Field with the tag + field := &ast.Field{} + if tt.tagStr != "" { + field.Tag = &ast.BasicLit{ + Kind: token.STRING, + Value: tt.tagStr, + } + } + + got := extractYAMLTag(field) + require.Equal(t, tt.want, got) + }) + } +} + +func TestIsNestedStruct(t *testing.T) { + tests := []struct { + name string + expr ast.Expr + want bool + }{ + { + name: "custom struct type", + expr: &ast.Ident{Name: "MainModuleVersion"}, + want: true, + }, + { + name: "string type", + expr: &ast.Ident{Name: "string"}, + want: false, + }, + { + name: "int type", + expr: &ast.Ident{Name: "int"}, + want: false, + }, + { + name: "bool type", + expr: &ast.Ident{Name: "bool"}, + want: false, + }, + { + name: "pointer type", + expr: &ast.StarExpr{X: &ast.Ident{Name: "Config"}}, + want: false, + }, + { + name: "array type", + expr: &ast.ArrayType{Elt: &ast.Ident{Name: "string"}}, + want: false, + }, + { + name: "map type", + expr: &ast.MapType{ + Key: &ast.Ident{Name: "string"}, + Value: &ast.Ident{Name: "string"}, + }, + want: false, + }, + { + name: "int32 type", + expr: &ast.Ident{Name: "int32"}, + want: false, + }, + { + name: "uint64 type", + expr: &ast.Ident{Name: "uint64"}, + want: false, + }, + { + name: "float64 type", + expr: &ast.Ident{Name: "float64"}, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isNestedStruct(tt.expr) + require.Equal(t, tt.want, got) + }) + } +} + +func TestExtractStringLiteral(t *testing.T) { + tests := []struct { + name string + expr ast.Expr + want string + }{ + { + name: "double quoted string", + expr: &ast.BasicLit{ + Kind: token.STRING, + Value: `"hello world"`, + }, + want: "hello world", + }, + { + name: "backtick string", + expr: &ast.BasicLit{ + Kind: token.STRING, + Value: "`hello world`", + }, + want: "hello world", + }, + { + name: "empty string", + expr: &ast.BasicLit{ + Kind: token.STRING, + Value: `""`, + }, + want: "", + }, + { + name: "string with spaces", + expr: &ast.BasicLit{ + Kind: token.STRING, + Value: `" spaces "`, + }, + want: " spaces ", + }, + { + name: "not a string literal (int)", + expr: &ast.BasicLit{ + Kind: token.INT, + Value: "42", + }, + want: "", + }, + { + name: "not a basic lit", + expr: &ast.Ident{Name: "someVar"}, + want: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := extractStringLiteral(tt.expr) + require.Equal(t, tt.want, got) + }) + } +} + +func TestExtractFieldPathFromRef(t *testing.T) { + tests := []struct { + name string + src string + want string + }{ + { + name: "simple field reference", + src: "&o.Field", + want: "Field", + }, + { + name: "nested field reference", + src: "&o.Parent.Field", + want: "Parent.Field", + }, + { + name: "deeply nested field reference", + src: "&o.MainModuleVersion.FromLDFlags", + want: "MainModuleVersion.FromLDFlags", + }, + { + name: "three levels deep", + src: "&o.Level1.Level2.Level3", + want: "Level1.Level2.Level3", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // parse the expression + expr, err := parser.ParseExpr(tt.src) + require.NoError(t, err) + + got := extractFieldPathFromRef(expr) + require.Equal(t, tt.want, got) + }) + } +} + +func TestExtractAppValue(t *testing.T) { + tests := []struct { + name string + src string + want interface{} + }{ + { + name: "string literal", + src: `"hello"`, + want: "hello", + }, + { + name: "int literal", + src: "42", + want: "42", + }, + { + name: "float literal", + src: "3.14", + want: "3.14", + }, + { + name: "bool true", + src: "true", + want: true, + }, + { + name: "bool false", + src: "false", + want: false, + }, + { + name: "nil value", + src: "nil", + want: nil, + }, + { + name: "empty string", + src: `""`, + want: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // parse the expression + expr, err := parser.ParseExpr(tt.src) + require.NoError(t, err) + + got := extractAppValue(expr) + require.Equal(t, tt.want, got) + }) + } +} + +func TestExtractAppValue_NestedStruct(t *testing.T) { + // test nested struct separately since it returns a map + src := `struct{Field1 string; Field2 bool}{Field1: "value", Field2: true}` + + // parse as a composite literal + expr, err := parser.ParseExpr(src) + require.NoError(t, err) + + // extract the composite literal + compositeLit, ok := expr.(*ast.CompositeLit) + require.True(t, ok) + + got := extractAppValue(compositeLit) + + // verify it's a map with the expected values + gotMap, ok := got.(map[string]interface{}) + require.True(t, ok) + require.Equal(t, "value", gotMap["Field1"]) + require.Equal(t, true, gotMap["Field2"]) +} diff --git a/internal/capabilities/generate/config_discovery.go b/internal/capabilities/generate/discover_cataloger_configs.go similarity index 98% rename from internal/capabilities/generate/config_discovery.go rename to internal/capabilities/generate/discover_cataloger_configs.go index 14747ba4b..f1d6edf2f 100644 --- a/internal/capabilities/generate/config_discovery.go +++ b/internal/capabilities/generate/discover_cataloger_configs.go @@ -1,3 +1,4 @@ +// this file discovers cataloger configuration structs using AST parsing to find Config structs and extract fields with app-config annotations. package main import ( @@ -135,9 +136,7 @@ func discoverConfigsInFile(path, repoRoot string) (map[string]ConfigInfo, error) // isConfigStruct determines if a struct name looks like a configuration struct func isConfigStruct(name string) bool { // check for common config patterns - return strings.Contains(name, "Config") || - strings.HasSuffix(name, "Config") || - strings.HasPrefix(name, "Config") + return strings.Contains(name, "Config") } // extractCatalogerConfigFields parses struct fields and extracts their metadata diff --git a/internal/capabilities/generate/discover_cataloger_configs_test.go b/internal/capabilities/generate/discover_cataloger_configs_test.go new file mode 100644 index 000000000..fadcae887 --- /dev/null +++ b/internal/capabilities/generate/discover_cataloger_configs_test.go @@ -0,0 +1,455 @@ +package main + +import ( + "go/ast" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/require" +) + +// expected config structs that should be discovered with app-config annotations +var expectedCatalogConfigs = []string{ + "golang.CatalogerConfig", + "golang.MainModuleVersionConfig", + "java.ArchiveCatalogerConfig", + "python.CatalogerConfig", + "dotnet.CatalogerConfig", + "kernel.LinuxKernelCatalogerConfig", + "javascript.CatalogerConfig", + "nix.Config", +} + +func TestDiscoverConfigs(t *testing.T) { + repoRoot, err := RepoRoot() + require.NoError(t, err) + + configs, err := DiscoverConfigs(repoRoot) + require.NoError(t, err) + + // verify we discovered multiple config structs + require.NotEmpty(t, configs, "should discover at least one config struct") + + // check for known config structs that have app-config annotations + for _, expected := range expectedCatalogConfigs { + config, ok := configs[expected] + require.True(t, ok, "should discover config: %s", expected) + require.NotEmpty(t, config.Fields, "config %s should have fields", expected) + require.Equal(t, expected, config.PackageName+"."+config.StructName) + } + + // verify golang.CatalogerConfig structure + golangConfig := configs["golang.CatalogerConfig"] + wantGolangConfig := ConfigInfo{ + PackageName: "golang", + StructName: "CatalogerConfig", + } + if diff := cmp.Diff(wantGolangConfig.PackageName, golangConfig.PackageName); diff != "" { + t.Errorf("golang.CatalogerConfig.PackageName mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(wantGolangConfig.StructName, golangConfig.StructName); diff != "" { + t.Errorf("golang.CatalogerConfig.StructName mismatch (-want +got):\n%s", diff) + } + require.NotEmpty(t, golangConfig.Fields) + + // check for specific field + var foundSearchLocalModCache bool + for _, field := range golangConfig.Fields { + if field.Name == "SearchLocalModCacheLicenses" { + foundSearchLocalModCache = true + wantField := ConfigField{ + Name: "SearchLocalModCacheLicenses", + Type: "bool", + AppKey: "golang.search-local-mod-cache-licenses", + } + if diff := cmp.Diff(wantField.Name, field.Name); diff != "" { + t.Errorf("SearchLocalModCacheLicenses field Name mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(wantField.Type, field.Type); diff != "" { + t.Errorf("SearchLocalModCacheLicenses field Type mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(wantField.AppKey, field.AppKey); diff != "" { + t.Errorf("SearchLocalModCacheLicenses field AppKey mismatch (-want +got):\n%s", diff) + } + require.NotEmpty(t, field.Description) + require.Contains(t, field.Description, "searching for go package licenses") + } + } + require.True(t, foundSearchLocalModCache, "should find SearchLocalModCacheLicenses field") + + // verify nested config struct + golangMainModuleConfig := configs["golang.MainModuleVersionConfig"] + wantMainModuleConfig := ConfigInfo{ + PackageName: "golang", + StructName: "MainModuleVersionConfig", + } + if diff := cmp.Diff(wantMainModuleConfig.PackageName, golangMainModuleConfig.PackageName); diff != "" { + t.Errorf("golang.MainModuleVersionConfig.PackageName mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(wantMainModuleConfig.StructName, golangMainModuleConfig.StructName); diff != "" { + t.Errorf("golang.MainModuleVersionConfig.StructName mismatch (-want +got):\n%s", diff) + } + require.NotEmpty(t, golangMainModuleConfig.Fields) + + // check for specific nested field + var foundFromLDFlags bool + for _, field := range golangMainModuleConfig.Fields { + if field.Name == "FromLDFlags" { + foundFromLDFlags = true + wantField := ConfigField{ + Name: "FromLDFlags", + Type: "bool", + AppKey: "golang.main-module-version.from-ld-flags", + } + if diff := cmp.Diff(wantField.Name, field.Name); diff != "" { + t.Errorf("FromLDFlags field Name mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(wantField.Type, field.Type); diff != "" { + t.Errorf("FromLDFlags field Type mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(wantField.AppKey, field.AppKey); diff != "" { + t.Errorf("FromLDFlags field AppKey mismatch (-want +got):\n%s", diff) + } + require.NotEmpty(t, field.Description) + } + } + require.True(t, foundFromLDFlags, "should find FromLDFlags field in MainModuleVersionConfig") + + // print summary for manual inspection + t.Logf("Discovered %d config structs:", len(configs)) + for key, config := range configs { + t.Logf(" %s: %d fields", key, len(config.Fields)) + for _, field := range config.Fields { + t.Logf(" - %s (%s): %s", field.Name, field.Type, field.AppKey) + if diff := cmp.Diff("", field.Description); diff == "" { + t.Logf(" WARNING: field %s has no description", field.Name) + } + } + } +} + +func TestExtractPackageNameFromPath(t *testing.T) { + tests := []struct { + name string + filePath string + want string + }{ + { + name: "golang package", + filePath: "syft/pkg/cataloger/golang/config.go", + want: "golang", + }, + { + name: "java package", + filePath: "syft/pkg/cataloger/java/config.go", + want: "java", + }, + { + name: "python cataloger", + filePath: "syft/pkg/cataloger/python/cataloger.go", + want: "python", + }, + { + name: "kernel cataloger", + filePath: "syft/pkg/cataloger/kernel/cataloger.go", + want: "kernel", + }, + { + name: "binary classifier", + filePath: "syft/pkg/cataloger/binary/classifier_cataloger.go", + want: "binary", + }, + { + name: "not a cataloger path", + filePath: "syft/pkg/other/file.go", + want: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := extractPackageNameFromPath(tt.filePath) + require.Equal(t, tt.want, got) + }) + } +} + +func TestFormatFieldType(t *testing.T) { + tests := []struct { + name string + expr ast.Expr + want string + }{ + { + name: "basic identifier - string", + expr: &ast.Ident{Name: "string"}, + want: "string", + }, + { + name: "basic identifier - bool", + expr: &ast.Ident{Name: "bool"}, + want: "bool", + }, + { + name: "basic identifier - int", + expr: &ast.Ident{Name: "int"}, + want: "int", + }, + { + name: "selector expression - package.Type", + expr: &ast.SelectorExpr{ + X: &ast.Ident{Name: "time"}, + Sel: &ast.Ident{Name: "Time"}, + }, + want: "time.Time", + }, + { + name: "selector expression - cataloging.Config", + expr: &ast.SelectorExpr{ + X: &ast.Ident{Name: "cataloging"}, + Sel: &ast.Ident{Name: "ArchiveSearchConfig"}, + }, + want: "cataloging.ArchiveSearchConfig", + }, + { + name: "array of strings", + expr: &ast.ArrayType{ + Elt: &ast.Ident{Name: "string"}, + }, + want: "[]string", + }, + { + name: "array of ints", + expr: &ast.ArrayType{ + Elt: &ast.Ident{Name: "int"}, + }, + want: "[]int", + }, + { + name: "map[string]bool", + expr: &ast.MapType{ + Key: &ast.Ident{Name: "string"}, + Value: &ast.Ident{Name: "bool"}, + }, + want: "map[string]bool", + }, + { + name: "map[string]int", + expr: &ast.MapType{ + Key: &ast.Ident{Name: "string"}, + Value: &ast.Ident{Name: "int"}, + }, + want: "map[string]int", + }, + { + name: "pointer to type", + expr: &ast.StarExpr{ + X: &ast.Ident{Name: "Config"}, + }, + want: "*Config", + }, + { + name: "pointer to selector", + expr: &ast.StarExpr{ + X: &ast.SelectorExpr{ + X: &ast.Ident{Name: "time"}, + Sel: &ast.Ident{Name: "Time"}, + }, + }, + want: "*time.Time", + }, + { + name: "interface{}", + expr: &ast.InterfaceType{ + Methods: &ast.FieldList{}, + }, + want: "interface{}", + }, + { + name: "nested array of arrays", + expr: &ast.ArrayType{ + Elt: &ast.ArrayType{ + Elt: &ast.Ident{Name: "string"}, + }, + }, + want: "[][]string", + }, + { + name: "map with array value", + expr: &ast.MapType{ + Key: &ast.Ident{Name: "string"}, + Value: &ast.ArrayType{ + Elt: &ast.Ident{Name: "int"}, + }, + }, + want: "map[string][]int", + }, + { + name: "pointer to array", + expr: &ast.StarExpr{ + X: &ast.ArrayType{ + Elt: &ast.Ident{Name: "string"}, + }, + }, + want: "*[]string", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := formatFieldType(tt.expr) + require.Equal(t, tt.want, got) + }) + } +} + +func TestExtractFieldComments(t *testing.T) { + tests := []struct { + name string + commentGroup *ast.CommentGroup + wantDescription string + wantAppKey string + }{ + { + name: "nil comment group", + commentGroup: nil, + wantDescription: "", + wantAppKey: "", + }, + { + name: "empty comment group", + commentGroup: &ast.CommentGroup{ + List: []*ast.Comment{}, + }, + wantDescription: "", + wantAppKey: "", + }, + { + name: "app-config annotation only", + commentGroup: &ast.CommentGroup{ + List: []*ast.Comment{ + {Text: "// app-config: golang.search-local-mod-cache-licenses"}, + }, + }, + wantDescription: "", + wantAppKey: "golang.search-local-mod-cache-licenses", + }, + { + name: "description only", + commentGroup: &ast.CommentGroup{ + List: []*ast.Comment{ + {Text: "// enable searching for go package licenses in the local mod cache"}, + }, + }, + wantDescription: "enable searching for go package licenses in the local mod cache", + wantAppKey: "", + }, + { + name: "description and app-config", + commentGroup: &ast.CommentGroup{ + List: []*ast.Comment{ + {Text: "// enable searching for go package licenses in the local mod cache"}, + {Text: "// app-config: golang.search-local-mod-cache-licenses"}, + }, + }, + wantDescription: "enable searching for go package licenses in the local mod cache", + wantAppKey: "golang.search-local-mod-cache-licenses", + }, + { + name: "app-config before description", + commentGroup: &ast.CommentGroup{ + List: []*ast.Comment{ + {Text: "// app-config: golang.search-local-mod-cache-licenses"}, + {Text: "// enable searching for go package licenses in the local mod cache"}, + }, + }, + wantDescription: "enable searching for go package licenses in the local mod cache", + wantAppKey: "golang.search-local-mod-cache-licenses", + }, + { + name: "multi-line description", + commentGroup: &ast.CommentGroup{ + List: []*ast.Comment{ + {Text: "// this is the first line of the description."}, + {Text: "// this is the second line of the description."}, + {Text: "// app-config: test.multi-line"}, + }, + }, + wantDescription: "this is the first line of the description. this is the second line of the description.", + wantAppKey: "test.multi-line", + }, + { + name: "app-config with extra whitespace", + commentGroup: &ast.CommentGroup{ + List: []*ast.Comment{ + {Text: "// app-config: golang.test-key "}, + }, + }, + wantDescription: "", + wantAppKey: "golang.test-key", + }, + { + name: "description with special characters", + commentGroup: &ast.CommentGroup{ + List: []*ast.Comment{ + {Text: "// enable searching for Go's package licenses (*.mod files)"}, + {Text: "// app-config: golang.search"}, + }, + }, + wantDescription: "enable searching for Go's package licenses (*.mod files)", + wantAppKey: "golang.search", + }, + { + name: "comment with empty lines", + commentGroup: &ast.CommentGroup{ + List: []*ast.Comment{ + {Text: "// first line"}, + {Text: "//"}, + {Text: "// second line"}, + {Text: "// app-config: test.key"}, + }, + }, + wantDescription: "first line second line", + wantAppKey: "test.key", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotDescription, gotAppKey := extractFieldComments(tt.commentGroup) + require.Equal(t, tt.wantDescription, gotDescription) + require.Equal(t, tt.wantAppKey, gotAppKey) + }) + } +} + +func TestDiscoverAllowedConfigStructs(t *testing.T) { + repoRoot, err := RepoRoot() + require.NoError(t, err) + + allowedConfigs, err := DiscoverAllowedConfigStructs(repoRoot) + require.NoError(t, err) + + // verify we found multiple config types + require.NotEmpty(t, allowedConfigs, "should discover at least one allowed config type") + + // verify specific config types that should be in pkgcataloging.Config + expectedConfigs := []string{ + "golang.CatalogerConfig", + "java.ArchiveCatalogerConfig", + "python.CatalogerConfig", + "dotnet.CatalogerConfig", + "kernel.LinuxKernelCatalogerConfig", + "javascript.CatalogerConfig", + } + + for _, expected := range expectedConfigs { + require.True(t, allowedConfigs[expected], "should find %s in allowed configs", expected) + } + + // log all discovered configs for manual inspection + t.Logf("Discovered %d allowed config types:", len(allowedConfigs)) + for configType := range allowedConfigs { + t.Logf(" - %s", configType) + } +} diff --git a/internal/capabilities/generate/discover.go b/internal/capabilities/generate/discover_catalogers.go similarity index 98% rename from internal/capabilities/generate/discover.go rename to internal/capabilities/generate/discover_catalogers.go index 111142133..f66618875 100644 --- a/internal/capabilities/generate/discover.go +++ b/internal/capabilities/generate/discover_catalogers.go @@ -1,3 +1,4 @@ +// this file discovers generic catalogers from source code by walking syft/pkg/cataloger/ and using AST parsing to find generic.NewCataloger() calls and extract parser information. package main import ( diff --git a/internal/capabilities/generate/discover_catalogers_test.go b/internal/capabilities/generate/discover_catalogers_test.go new file mode 100644 index 000000000..0f0c5d605 --- /dev/null +++ b/internal/capabilities/generate/discover_catalogers_test.go @@ -0,0 +1,389 @@ +package main + +import ( + "go/ast" + "go/parser" + "go/token" + "testing" + + "github.com/stretchr/testify/require" +) + +// test helper functions + +// parseFuncDecl parses a function declaration from a code string +func parseFuncDecl(t *testing.T, code string) *ast.FuncDecl { + t.Helper() + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, "", "package test\n"+code, 0) + require.NoError(t, err) + require.Len(t, file.Decls, 1, "expected exactly one declaration") + funcDecl, ok := file.Decls[0].(*ast.FuncDecl) + require.True(t, ok, "expected declaration to be a function") + return funcDecl +} + +// parseCallExpr parses a call expression from a code string +func parseCallExpr(t *testing.T, code string) *ast.CallExpr { + t.Helper() + expr, err := parser.ParseExpr(code) + require.NoError(t, err) + callExpr, ok := expr.(*ast.CallExpr) + require.True(t, ok, "expected expression to be a call expression") + return callExpr +} + +// parseCompositeLit parses a composite literal from a code string +func parseCompositeLit(t *testing.T, code string) *ast.CompositeLit { + t.Helper() + expr, err := parser.ParseExpr(code) + require.NoError(t, err) + lit, ok := expr.(*ast.CompositeLit) + require.True(t, ok, "expected expression to be a composite literal") + return lit +} + +// parseConstDecl parses a const declaration from a code string and returns the GenDecl +func parseConstDecl(t *testing.T, code string) *ast.GenDecl { + t.Helper() + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, "", "package test\n"+code, 0) + require.NoError(t, err) + require.Len(t, file.Decls, 1, "expected exactly one declaration") + genDecl, ok := file.Decls[0].(*ast.GenDecl) + require.True(t, ok, "expected declaration to be a general declaration") + return genDecl +} + +func TestReturnsPackageCataloger(t *testing.T) { + tests := []struct { + name string + code string + want bool + }{ + { + name: "returns pkg.Cataloger", + code: `func NewFoo() pkg.Cataloger { return nil }`, + want: true, + }, + { + name: "returns bare Cataloger", + code: `func NewFoo() Cataloger { return nil }`, + want: true, + }, + { + name: "returns multiple values", + code: `func NewFoo() (pkg.Cataloger, error) { return nil, nil }`, + want: false, + }, + { + name: "returns error", + code: `func NewFoo() error { return nil }`, + want: false, + }, + { + name: "returns pointer to Cataloger", + code: `func NewFoo() *pkg.Cataloger { return nil }`, + want: false, + }, + { + name: "returns string", + code: `func NewFoo() string { return "" }`, + want: false, + }, + { + name: "no return type", + code: `func NewFoo() { }`, + want: false, + }, + { + name: "returns wrong package Cataloger", + code: `func NewFoo() other.Cataloger { return nil }`, + want: false, + }, + { + name: "returns pkg.OtherType", + code: `func NewFoo() pkg.OtherType { return nil }`, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + funcDecl := parseFuncDecl(t, tt.code) + got := returnsPackageCataloger(funcDecl) + require.Equal(t, tt.want, got) + }) + } +} + +func TestIsGenericNewCatalogerCall(t *testing.T) { + tests := []struct { + name string + code string + want bool + }{ + { + name: "generic.NewCataloger call", + code: `generic.NewCataloger("foo")`, + want: true, + }, + { + name: "generic.NewCataloger with no args", + code: `generic.NewCataloger()`, + want: true, + }, + { + name: "other.NewCataloger call", + code: `other.NewCataloger("foo")`, + want: false, + }, + { + name: "generic.OtherMethod call", + code: `generic.OtherMethod("foo")`, + want: false, + }, + { + name: "bare NewCataloger call", + code: `NewCataloger("foo")`, + want: false, + }, + { + name: "nested call", + code: `foo(generic.NewCataloger("bar"))`, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + callExpr := parseCallExpr(t, tt.code) + got := isGenericNewCatalogerCall(callExpr) + require.Equal(t, tt.want, got) + }) + } +} + +func TestExtractStringSliceFromExpr(t *testing.T) { + tests := []struct { + name string + code string + want []string + }{ + { + name: "strset.New with strings", + code: `strset.New([]string{"foo", "bar", "baz"})`, + want: []string{"foo", "bar", "baz"}, + }, + { + name: "strset.New with single string", + code: `strset.New([]string{"single"})`, + want: []string{"single"}, + }, + { + name: "strset.New with empty slice", + code: `strset.New([]string{})`, + want: nil, + }, + { + name: "other.New with strings", + code: `other.New([]string{"x", "y"})`, + want: []string{"x", "y"}, + }, + { + name: "call with no args", + code: `strset.New()`, + want: nil, + }, + { + name: "call with non-composite-literal arg", + code: `strset.New("not a slice")`, + want: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + callExpr := parseCallExpr(t, tt.code) + got := extractStringSliceFromExpr(callExpr) + require.Equal(t, tt.want, got) + }) + } +} + +func TestSearchConstInDecl(t *testing.T) { + tests := []struct { + name string + code string + constName string + want string + }{ + { + name: "single const", + code: `const Foo = "bar"`, + constName: "Foo", + want: "bar", + }, + { + name: "grouped consts - first", + code: `const ( + Foo = "bar" + Baz = "qux" + )`, + constName: "Foo", + want: "bar", + }, + { + name: "grouped consts - second", + code: `const ( + Foo = "bar" + Baz = "qux" + )`, + constName: "Baz", + want: "qux", + }, + { + name: "const not found", + code: `const Foo = "bar"`, + constName: "Missing", + want: "", + }, + { + name: "var declaration instead of const", + code: `var Foo = "bar"`, + constName: "Foo", + want: "", + }, + { + name: "const with non-string value", + code: `const Foo = 42`, + constName: "Foo", + want: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + genDecl := parseConstDecl(t, tt.code) + got := searchConstInDecl(genDecl, tt.constName) + require.Equal(t, tt.want, got) + }) + } +} + +func TestGetConstValue(t *testing.T) { + tests := []struct { + name string + code string + constName string + want string + }{ + { + name: "single const match", + code: `const Foo = "bar"`, + constName: "Foo", + want: "bar", + }, + { + name: "no match", + code: `const Foo = "bar"`, + constName: "NotFoo", + want: "", + }, + { + name: "non-string literal", + code: `const Foo = 123`, + constName: "Foo", + want: "", + }, + { + name: "const with complex value", + code: `const Foo = Bar + "suffix"`, + constName: "Foo", + want: "", + }, + { + name: "first of multiple in same spec", + code: `const Foo, Bar = "baz", "qux"`, + constName: "Foo", + want: "baz", + }, + { + name: "second of multiple in same spec", + code: `const Foo, Bar = "baz", "qux"`, + constName: "Bar", + want: "qux", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + genDecl := parseConstDecl(t, tt.code) + require.Equal(t, token.CONST, genDecl.Tok) + require.NotEmpty(t, genDecl.Specs) + + // getConstValue works on a single ValueSpec, so we need to find the right one + // in case of grouped constants, each const is its own spec + var got string + for _, spec := range genDecl.Specs { + valueSpec, ok := spec.(*ast.ValueSpec) + require.True(t, ok) + + got = getConstValue(valueSpec, tt.constName) + if got != "" { + break + } + } + + require.Equal(t, tt.want, got) + }) + } +} + +func TestResolveImportPath(t *testing.T) { + const testRepoRoot = "/repo/root" + + tests := []struct { + name string + importPath string + want string + }{ + { + name: "syft pkg cataloger golang", + importPath: "github.com/anchore/syft/syft/pkg/cataloger/golang", + want: "/repo/root/syft/pkg/cataloger/golang", + }, + { + name: "syft internal capabilities", + importPath: "github.com/anchore/syft/internal/capabilities", + want: "/repo/root/internal/capabilities", + }, + { + name: "syft root package", + importPath: "github.com/anchore/syft/syft", + want: "/repo/root/syft", + }, + { + name: "external package", + importPath: "github.com/other/repo/pkg", + want: "", + }, + { + name: "standard library", + importPath: "fmt", + want: "", + }, + { + name: "empty import path", + importPath: "", + want: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := resolveImportPath(tt.importPath, testRepoRoot) + require.Equal(t, tt.want, got) + }) + } +} diff --git a/internal/capabilities/generate/metadata_discovery.go b/internal/capabilities/generate/discover_metadata.go similarity index 98% rename from internal/capabilities/generate/metadata_discovery.go rename to internal/capabilities/generate/discover_metadata.go index 894501e69..759e6f7ce 100644 --- a/internal/capabilities/generate/metadata_discovery.go +++ b/internal/capabilities/generate/discover_metadata.go @@ -1,3 +1,4 @@ +// this file discovers metadata and package types by reading test-observations.json files generated by pkgtest helpers during test execution. package main import ( diff --git a/internal/capabilities/generate/discover_metadata_test.go b/internal/capabilities/generate/discover_metadata_test.go new file mode 100644 index 000000000..26805943c --- /dev/null +++ b/internal/capabilities/generate/discover_metadata_test.go @@ -0,0 +1,320 @@ +package main + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestApplyParserObservations(t *testing.T) { + tests := []struct { + name string + cataloger DiscoveredCataloger + index *TestObservationIndex + wantFoundData bool + wantMetadataType string + wantPackageType string + }{ + { + name: "parser observations applied to matching parser", + cataloger: DiscoveredCataloger{ + Name: "test-cataloger", + PackageName: "testpkg", + Parsers: []DiscoveredParser{ + {ParserFunction: "parseTestFile"}, + }, + }, + index: func() *TestObservationIndex { + idx := newTestObservationIndex() + idx.setParserObservations("testpkg", "parseTestFile", &TypeObservation{ + MetadataTypes: []string{"pkg.TestMetadata"}, + PackageTypes: []string{"test-type"}, + }) + return idx + }(), + wantFoundData: true, + wantMetadataType: "pkg.TestMetadata", + wantPackageType: "test-type", + }, + { + name: "no observations found for parser", + cataloger: DiscoveredCataloger{ + Name: "test-cataloger", + PackageName: "testpkg", + Parsers: []DiscoveredParser{ + {ParserFunction: "parseOtherFile"}, + }, + }, + index: func() *TestObservationIndex { + idx := newTestObservationIndex() + idx.setParserObservations("testpkg", "parseTestFile", &TypeObservation{ + MetadataTypes: []string{"pkg.TestMetadata"}, + }) + return idx + }(), + wantFoundData: false, + }, + { + name: "multiple parsers with mixed observations", + cataloger: DiscoveredCataloger{ + Name: "test-cataloger", + PackageName: "testpkg", + Parsers: []DiscoveredParser{ + {ParserFunction: "parseFirst"}, + {ParserFunction: "parseSecond"}, + }, + }, + index: func() *TestObservationIndex { + idx := newTestObservationIndex() + idx.setParserObservations("testpkg", "parseFirst", &TypeObservation{ + MetadataTypes: []string{"pkg.FirstMetadata"}, + }) + // parseSecond has no observations + return idx + }(), + wantFoundData: true, + wantMetadataType: "pkg.FirstMetadata", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotFoundData := applyParserObservations(&tt.cataloger, tt.index) + require.Equal(t, tt.wantFoundData, gotFoundData) + + if tt.wantFoundData && tt.wantMetadataType != "" { + require.Contains(t, tt.cataloger.Parsers[0].MetadataTypes, tt.wantMetadataType) + } + + if tt.wantFoundData && tt.wantPackageType != "" { + require.Contains(t, tt.cataloger.Parsers[0].PackageTypes, tt.wantPackageType) + } + }) + } +} + +func TestApplySingleParserCatalogerObservations(t *testing.T) { + tests := []struct { + name string + cataloger DiscoveredCataloger + catalogerObs *TypeObservation + wantFoundData bool + wantMetadataType []string + wantPackageType []string + }{ + { + name: "cataloger-level observations applied to single parser", + cataloger: DiscoveredCataloger{ + Name: "single-parser-cataloger", + Parsers: []DiscoveredParser{ + {ParserFunction: "parseSingle"}, + }, + }, + catalogerObs: &TypeObservation{ + MetadataTypes: []string{"pkg.CatalogerMetadata"}, + PackageTypes: []string{"cataloger-type"}, + }, + wantFoundData: true, + wantMetadataType: []string{"pkg.CatalogerMetadata"}, + wantPackageType: []string{"cataloger-type"}, + }, + { + name: "cataloger-level merges with existing parser-level observations", + cataloger: DiscoveredCataloger{ + Name: "single-parser-cataloger", + Parsers: []DiscoveredParser{ + { + ParserFunction: "parseSingle", + MetadataTypes: []string{"pkg.ParserMetadata"}, + PackageTypes: []string{"parser-type"}, + }, + }, + }, + catalogerObs: &TypeObservation{ + MetadataTypes: []string{"pkg.CatalogerMetadata"}, + PackageTypes: []string{"cataloger-type"}, + }, + wantFoundData: true, + wantMetadataType: []string{"pkg.CatalogerMetadata", "pkg.ParserMetadata"}, + wantPackageType: []string{"cataloger-type", "parser-type"}, + }, + { + name: "empty cataloger observations", + cataloger: DiscoveredCataloger{ + Name: "single-parser-cataloger", + Parsers: []DiscoveredParser{ + {ParserFunction: "parseSingle"}, + }, + }, + catalogerObs: &TypeObservation{}, + wantFoundData: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotFoundData := applySingleParserCatalogerObservations(&tt.cataloger, tt.catalogerObs) + require.Equal(t, tt.wantFoundData, gotFoundData) + + if tt.wantFoundData { + if len(tt.wantMetadataType) > 0 { + require.ElementsMatch(t, tt.wantMetadataType, tt.cataloger.Parsers[0].MetadataTypes) + } + if len(tt.wantPackageType) > 0 { + require.ElementsMatch(t, tt.wantPackageType, tt.cataloger.Parsers[0].PackageTypes) + } + } + }) + } +} + +func TestApplyMultiParserCatalogerObservations(t *testing.T) { + tests := []struct { + name string + cataloger DiscoveredCataloger + catalogerObs *TypeObservation + wantFoundData bool + // expectations for each parser by index + wantParser0HasMetadata bool + wantParser1HasMetadata bool + }{ + { + name: "all parsers without data - cataloger-level applied to all", + cataloger: DiscoveredCataloger{ + Name: "multi-parser-cataloger", + Parsers: []DiscoveredParser{ + {ParserFunction: "parseFirst"}, + {ParserFunction: "parseSecond"}, + }, + }, + catalogerObs: &TypeObservation{ + MetadataTypes: []string{"pkg.SharedMetadata"}, + PackageTypes: []string{"shared-type"}, + }, + wantFoundData: true, + wantParser0HasMetadata: true, + wantParser1HasMetadata: true, + }, + { + name: "some parsers have data - cataloger-level only fills gaps", + cataloger: DiscoveredCataloger{ + Name: "multi-parser-cataloger", + Parsers: []DiscoveredParser{ + { + ParserFunction: "parseFirst", + MetadataTypes: []string{"pkg.FirstMetadata"}, + }, + {ParserFunction: "parseSecond"}, // no data + }, + }, + catalogerObs: &TypeObservation{ + MetadataTypes: []string{"pkg.SharedMetadata"}, + }, + wantFoundData: true, + wantParser0HasMetadata: false, // already has data, not overwritten + wantParser1HasMetadata: true, // gets cataloger-level data + }, + { + name: "all parsers have data - cataloger-level not applied", + cataloger: DiscoveredCataloger{ + Name: "multi-parser-cataloger", + Parsers: []DiscoveredParser{ + { + ParserFunction: "parseFirst", + MetadataTypes: []string{"pkg.FirstMetadata"}, + }, + { + ParserFunction: "parseSecond", + MetadataTypes: []string{"pkg.SecondMetadata"}, + }, + }, + }, + catalogerObs: &TypeObservation{ + MetadataTypes: []string{"pkg.SharedMetadata"}, + }, + wantFoundData: false, + wantParser0HasMetadata: false, // should not have shared metadata + wantParser1HasMetadata: false, // should not have shared metadata + }, + { + name: "empty cataloger observations", + cataloger: DiscoveredCataloger{ + Name: "multi-parser-cataloger", + Parsers: []DiscoveredParser{ + {ParserFunction: "parseFirst"}, + {ParserFunction: "parseSecond"}, + }, + }, + catalogerObs: &TypeObservation{}, + wantFoundData: false, + wantParser0HasMetadata: false, + wantParser1HasMetadata: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotFoundData := applyMultiParserCatalogerObservations(&tt.cataloger, tt.catalogerObs) + require.Equal(t, tt.wantFoundData, gotFoundData) + + if tt.wantParser0HasMetadata { + require.Contains(t, tt.cataloger.Parsers[0].MetadataTypes, "pkg.SharedMetadata", + "parser 0 should have shared metadata") + } else if len(tt.catalogerObs.MetadataTypes) > 0 { + // if cataloger has metadata but we don't expect it in parser 0, verify it's not there + require.NotContains(t, tt.cataloger.Parsers[0].MetadataTypes, "pkg.SharedMetadata", + "parser 0 should not have shared metadata") + } + + if tt.wantParser1HasMetadata { + require.Contains(t, tt.cataloger.Parsers[1].MetadataTypes, "pkg.SharedMetadata", + "parser 1 should have shared metadata") + } else if len(tt.catalogerObs.MetadataTypes) > 0 { + // if cataloger has metadata but we don't expect it in parser 1, verify it's not there + require.NotContains(t, tt.cataloger.Parsers[1].MetadataTypes, "pkg.SharedMetadata", + "parser 1 should not have shared metadata") + } + }) + } +} + +func TestMergeAndDeduplicateStrings(t *testing.T) { + tests := []struct { + name string + existing []string + additional []string + want []string + }{ + { + name: "merge with duplicates", + existing: []string{"a", "b"}, + additional: []string{"b", "c"}, + want: []string{"a", "b", "c"}, + }, + { + name: "empty existing", + existing: []string{}, + additional: []string{"a", "b"}, + want: []string{"a", "b"}, + }, + { + name: "empty additional", + existing: []string{"a", "b"}, + additional: []string{}, + want: []string{"a", "b"}, + }, + { + name: "both empty", + existing: []string{}, + additional: []string{}, + want: []string{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := mergeAndDeduplicateStrings(tt.existing, tt.additional) + require.ElementsMatch(t, tt.want, got) + }) + } +} diff --git a/internal/capabilities/generate/io.go b/internal/capabilities/generate/io.go index 8b96e55f6..005c5ba0a 100644 --- a/internal/capabilities/generate/io.go +++ b/internal/capabilities/generate/io.go @@ -1,3 +1,4 @@ +// this file handles YAML file reading and writing with comment preservation, using gopkg.in/yaml.v3's node tree to maintain all existing comments during regeneration. package main import ( diff --git a/internal/capabilities/generate/io_test.go b/internal/capabilities/generate/io_test.go new file mode 100644 index 000000000..8375647e9 --- /dev/null +++ b/internal/capabilities/generate/io_test.go @@ -0,0 +1,553 @@ +package main + +import ( + "testing" + + "github.com/stretchr/testify/require" + "gopkg.in/yaml.v3" +) + +func TestFindSectionNode(t *testing.T) { + tests := []struct { + name string + yamlContent string + sectionName string + wantFound bool + wantValue string // expected value for scalar nodes + }{ + { + name: "finds existing configs section", + yamlContent: ` +configs: + key: value +catalogers: + - name: test +`, + sectionName: "configs", + wantFound: true, + }, + { + name: "finds existing catalogers section", + yamlContent: ` +configs: + key: value +catalogers: + - name: test +`, + sectionName: "catalogers", + wantFound: true, + }, + { + name: "returns nil for non-existent section", + yamlContent: ` +configs: + key: value +`, + sectionName: "nonexistent", + wantFound: false, + }, + { + name: "handles empty mapping", + yamlContent: `{}`, + sectionName: "any", + wantFound: false, + }, + { + name: "finds section with scalar value", + yamlContent: ` +name: test-cataloger +type: custom +`, + sectionName: "name", + wantFound: true, + wantValue: "test-cataloger", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var rootNode yaml.Node + err := yaml.Unmarshal([]byte(tt.yamlContent), &rootNode) + require.NoError(t, err) + + // get the mapping node + var mappingNode *yaml.Node + if rootNode.Kind == yaml.DocumentNode && len(rootNode.Content) > 0 { + mappingNode = rootNode.Content[0] + } else { + mappingNode = &rootNode + } + + got := findSectionNode(mappingNode, tt.sectionName) + + if tt.wantFound { + require.NotNil(t, got) + if tt.wantValue != "" { + require.Equal(t, tt.wantValue, got.Value) + } + } else { + require.Nil(t, got) + } + }) + } +} + +func TestFindFieldValue(t *testing.T) { + tests := []struct { + name string + yamlContent string + fieldName string + want string + }{ + { + name: "finds simple string field", + yamlContent: ` +name: test-cataloger +type: custom +`, + fieldName: "name", + want: "test-cataloger", + }, + { + name: "finds type field", + yamlContent: ` +name: test-cataloger +type: generic +`, + fieldName: "type", + want: "generic", + }, + { + name: "returns empty for non-existent field", + yamlContent: ` +name: test-cataloger +`, + fieldName: "nonexistent", + want: "", + }, + { + name: "finds parser_function field", + yamlContent: ` +parser_function: parseGoMod +metadata_types: [GoModMetadata] +`, + fieldName: "parser_function", + want: "parseGoMod", + }, + { + name: "handles empty mapping", + yamlContent: `{}`, + fieldName: "any", + want: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var rootNode yaml.Node + err := yaml.Unmarshal([]byte(tt.yamlContent), &rootNode) + require.NoError(t, err) + + // get the mapping node + var mappingNode *yaml.Node + if rootNode.Kind == yaml.DocumentNode && len(rootNode.Content) > 0 { + mappingNode = rootNode.Content[0] + } else { + mappingNode = &rootNode + } + + got := findFieldValue(mappingNode, tt.fieldName) + require.Equal(t, tt.want, got) + }) + } +} + +func TestPreserveMappingNodeComments(t *testing.T) { + tests := []struct { + name string + checkField string + wantHeadComment string + wantLineComment string + }{ + { + name: "preserves line comment on field", + checkField: "name", + wantLineComment: "AUTO-GENERATED", + }, + { + name: "preserves head comment on field", + checkField: "type", + wantHeadComment: "Important field", + wantLineComment: "AUTO-GENERATED", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // manually construct nodes with comments + existingMapping := &yaml.Node{ + Kind: yaml.MappingNode, + Content: []*yaml.Node{ + {Kind: yaml.ScalarNode, Value: "name", LineComment: "AUTO-GENERATED"}, + {Kind: yaml.ScalarNode, Value: "test", HeadComment: "value comment"}, + {Kind: yaml.ScalarNode, Value: "type", HeadComment: "Important field", LineComment: "AUTO-GENERATED"}, + {Kind: yaml.ScalarNode, Value: "custom"}, + }, + } + + newMapping := &yaml.Node{ + Kind: yaml.MappingNode, + Content: []*yaml.Node{ + {Kind: yaml.ScalarNode, Value: "name"}, + {Kind: yaml.ScalarNode, Value: "test-new"}, + {Kind: yaml.ScalarNode, Value: "type"}, + {Kind: yaml.ScalarNode, Value: "generic"}, + }, + } + + preserveMappingNodeComments(existingMapping, newMapping) + + // find the field we're checking + keyNode, valueNode := findFieldNodes(newMapping, tt.checkField) + require.NotNil(t, keyNode, "field %s not found", tt.checkField) + + // check comments were preserved + if tt.wantHeadComment != "" { + require.Equal(t, tt.wantHeadComment, keyNode.HeadComment) + } + if tt.wantLineComment != "" { + require.Equal(t, tt.wantLineComment, keyNode.LineComment) + } + + // verify that value node comments are also preserved + if tt.checkField == "name" { + require.Equal(t, "value comment", valueNode.HeadComment) + } + }) + } +} + +func TestPreserveSequenceNodeComments(t *testing.T) { + tests := []struct { + name string + existingYAML string + newYAML string + wantHeadComment string + }{ + { + name: "preserves parser comments by parser_function", + existingYAML: ` +- parser_function: parseGoMod # old parser + metadata_types: [GoModMetadata] +- parser_function: parseGoSum + metadata_types: [GoSumMetadata] +`, + newYAML: ` +- parser_function: parseGoMod + metadata_types: [GoModMetadataNew] +- parser_function: parseGoSum + metadata_types: [GoSumMetadataNew] +`, + // we'll verify in the test body that comments are preserved + }, + { + name: "handles new parsers not in existing", + existingYAML: ` +- parser_function: parseGoMod + metadata_types: [GoModMetadata] +`, + newYAML: ` +- parser_function: parseGoMod + metadata_types: [GoModMetadata] +- parser_function: parseGoSum + metadata_types: [GoSumMetadata] +`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var existingNode, newNode yaml.Node + err := yaml.Unmarshal([]byte(tt.existingYAML), &existingNode) + require.NoError(t, err) + err = yaml.Unmarshal([]byte(tt.newYAML), &newNode) + require.NoError(t, err) + + // get sequence nodes + existingSeq := getSequenceNode(&existingNode) + newSeq := getSequenceNode(&newNode) + + preserveSequenceNodeComments(existingSeq, newSeq) + + // verify that the function ran without panicking + require.NotNil(t, newSeq) + }) + } +} + +func TestPreserveFieldComments(t *testing.T) { + tests := []struct { + name string + existingYAML string + newYAML string + wantPreserve bool + }{ + { + name: "preserves mapping node comments", + existingYAML: ` +name: test # AUTO-GENERATED +type: custom +`, + newYAML: ` +name: test-new +type: custom +`, + wantPreserve: true, + }, + { + name: "handles kind mismatch gracefully", + existingYAML: ` +- item1 +- item2 +`, + newYAML: ` +name: test +`, + wantPreserve: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var existingNode, newNode yaml.Node + err := yaml.Unmarshal([]byte(tt.existingYAML), &existingNode) + require.NoError(t, err) + err = yaml.Unmarshal([]byte(tt.newYAML), &newNode) + require.NoError(t, err) + + existingContent := getContentNode(&existingNode) + newContent := getContentNode(&newNode) + + preserveFieldComments(existingContent, newContent) + + // verify the function completed without panicking + require.NotNil(t, newContent) + }) + } +} + +func TestUpdateOrAddSection(t *testing.T) { + tests := []struct { + name string + existingYAML string + newYAML string + sectionName string + wantUpdated bool + wantAdded bool + }{ + { + name: "updates existing section", + existingYAML: ` +configs: + old: value +catalogers: + - name: test +`, + newYAML: ` +configs: + new: value +`, + sectionName: "configs", + wantUpdated: true, + }, + { + name: "adds new section", + existingYAML: ` +catalogers: + - name: test +`, + newYAML: ` +configs: + new: value +`, + sectionName: "configs", + wantAdded: true, + }, + { + name: "handles application section", + existingYAML: ` +catalogers: + - name: test +`, + newYAML: ` +application: + key: value +`, + sectionName: "application", + wantAdded: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var existingNode, newNode yaml.Node + err := yaml.Unmarshal([]byte(tt.existingYAML), &existingNode) + require.NoError(t, err) + err = yaml.Unmarshal([]byte(tt.newYAML), &newNode) + require.NoError(t, err) + + existingMapping := getMappingNode(&existingNode) + newMapping := getMappingNode(&newNode) + + updateOrAddSection(existingMapping, newMapping, tt.sectionName) + + // verify the section exists in the result + resultSection := findSectionNode(existingMapping, tt.sectionName) + require.NotNil(t, resultSection, "section %s should exist after update", tt.sectionName) + }) + } +} + +func TestAddCatalogerFieldComment(t *testing.T) { + tests := []struct { + name string + fieldName string + fieldValue string + catalogerName string + wantLineComment string + }{ + { + name: "ecosystem is MANUAL", + fieldName: "ecosystem", + catalogerName: "test-cataloger", + wantLineComment: "MANUAL", + }, + { + name: "name is AUTO-GENERATED", + fieldName: "name", + catalogerName: "test-cataloger", + wantLineComment: autoGeneratedComment, + }, + { + name: "type is AUTO-GENERATED", + fieldName: "type", + catalogerName: "test-cataloger", + wantLineComment: autoGeneratedComment, + }, + { + name: "source is AUTO-GENERATED", + fieldName: "source", + catalogerName: "test-cataloger", + wantLineComment: autoGeneratedComment, + }, + { + name: "config is AUTO-GENERATED", + fieldName: "config", + catalogerName: "test-cataloger", + wantLineComment: autoGeneratedComment, + }, + { + name: "selectors is AUTO-GENERATED", + fieldName: "selectors", + catalogerName: "test-cataloger", + wantLineComment: autoGeneratedComment, + }, + { + name: "parsers is AUTO-GENERATED structure", + fieldName: "parsers", + catalogerName: "test-cataloger", + wantLineComment: "AUTO-GENERATED structure", + }, + { + name: "detectors for binary-classifier-cataloger is AUTO-GENERATED", + fieldName: "detectors", + catalogerName: "binary-classifier-cataloger", + wantLineComment: autoGeneratedComment, + }, + { + name: "detectors for other catalogers is MANUAL", + fieldName: "detectors", + catalogerName: "java-archive-cataloger", + wantLineComment: "MANUAL - edit detectors here", + }, + { + name: "metadata_types is AUTO-GENERATED", + fieldName: "metadata_types", + catalogerName: "test-cataloger", + wantLineComment: autoGeneratedComment, + }, + { + name: "package_types is AUTO-GENERATED", + fieldName: "package_types", + catalogerName: "test-cataloger", + wantLineComment: autoGeneratedComment, + }, + { + name: "json_schema_types is AUTO-GENERATED", + fieldName: "json_schema_types", + catalogerName: "test-cataloger", + wantLineComment: autoGeneratedComment, + }, + { + name: "capabilities is MANUAL", + fieldName: "capabilities", + catalogerName: "test-cataloger", + wantLineComment: "MANUAL - edit capabilities here", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // create key and value nodes + keyNode := &yaml.Node{ + Kind: yaml.ScalarNode, + Value: tt.fieldName, + } + valueNode := &yaml.Node{ + Kind: yaml.ScalarNode, + Value: tt.fieldValue, + } + + addCatalogerFieldComment(keyNode, valueNode, tt.catalogerName) + + require.Equal(t, tt.wantLineComment, keyNode.LineComment) + }) + } +} + +// helper functions + +func getMappingNode(node *yaml.Node) *yaml.Node { + if node.Kind == yaml.DocumentNode && len(node.Content) > 0 { + return node.Content[0] + } + return node +} + +func getSequenceNode(node *yaml.Node) *yaml.Node { + if node.Kind == yaml.DocumentNode && len(node.Content) > 0 { + return node.Content[0] + } + return node +} + +func getContentNode(node *yaml.Node) *yaml.Node { + if node.Kind == yaml.DocumentNode && len(node.Content) > 0 { + return node.Content[0] + } + return node +} + +func findFieldNodes(mappingNode *yaml.Node, fieldName string) (*yaml.Node, *yaml.Node) { + if mappingNode.Kind != yaml.MappingNode { + return nil, nil + } + + for i := 0; i < len(mappingNode.Content); i += 2 { + if mappingNode.Content[i].Value == fieldName { + return mappingNode.Content[i], mappingNode.Content[i+1] + } + } + + return nil, nil +} diff --git a/internal/capabilities/generate/main.go b/internal/capabilities/generate/main.go index 6c5ec8605..eed4fca9c 100644 --- a/internal/capabilities/generate/main.go +++ b/internal/capabilities/generate/main.go @@ -1,3 +1,4 @@ +// this is the entry point for regenerating the packages.yaml file, which orchestrates discovery, merging, and validation of cataloger capabilities. package main import ( @@ -19,25 +20,6 @@ var ( dimStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("245")) // lighter grey (256-color) ) -func printSuccessASCII() { - fmt.Println() - fmt.Println(successStyle.Render("✓ All validations passed!") + " 🎉") - fmt.Println() - fmt.Println(successStyle.Render(" ░█▀▀░█░█░█▀▀░█▀▀░█▀▀░█▀▀░█▀▀")) - fmt.Println(successStyle.Render(" ░▀▀█░█░█░█░░░█░░░█▀▀░▀▀█░▀▀█")) - fmt.Println(successStyle.Render(" ░▀▀▀░▀▀▀░▀▀▀░▀▀▀░▀▀▀░▀▀▀░▀▀▀")) - fmt.Println() -} - -func printFailureASCII() { - fmt.Println(errorStyle.Render("✗ Validation failed") + " 😢") - fmt.Println() - fmt.Println(errorStyle.Render(" ░█▀▀░█▀█░▀█▀░█░░░█▀▀░█▀▄")) - fmt.Println(errorStyle.Render(" ░█▀▀░█▀█░░█░░█░░░█▀▀░█░█")) - fmt.Println(errorStyle.Render(" ░▀░░░▀░▀░▀▀▀░▀▀▀░▀▀▀░▀▀░")) - fmt.Println() -} - func main() { repoRoot, err := RepoRoot() if err != nil { @@ -147,3 +129,22 @@ func hasEmptyCapabilities(caps capabilities.CapabilitySet) bool { // if someone filled out the capabilities section (even with all false/empty values), that's intentional return len(caps) == 0 } + +func printSuccessASCII() { + fmt.Println() + fmt.Println(successStyle.Render("✓ All validations passed!") + " 🎉") + fmt.Println() + fmt.Println(successStyle.Render(" ░█▀▀░█░█░█▀▀░█▀▀░█▀▀░█▀▀░█▀▀")) + fmt.Println(successStyle.Render(" ░▀▀█░█░█░█░░░█░░░█▀▀░▀▀█░▀▀█")) + fmt.Println(successStyle.Render(" ░▀▀▀░▀▀▀░▀▀▀░▀▀▀░▀▀▀░▀▀▀░▀▀▀")) + fmt.Println() +} + +func printFailureASCII() { + fmt.Println(errorStyle.Render("✗ Validation failed") + " 😢") + fmt.Println() + fmt.Println(errorStyle.Render(" ░█▀▀░█▀█░▀█▀░█░░░█▀▀░█▀▄")) + fmt.Println(errorStyle.Render(" ░█▀▀░█▀█░░█░░█░░░█▀▀░█░█")) + fmt.Println(errorStyle.Render(" ░▀░░░▀░▀░▀▀▀░▀▀▀░▀▀▀░▀▀░")) + fmt.Println() +} diff --git a/internal/capabilities/generate/merge.go b/internal/capabilities/generate/merge.go index d17a32308..cc9cfe906 100644 --- a/internal/capabilities/generate/merge.go +++ b/internal/capabilities/generate/merge.go @@ -1,3 +1,4 @@ +// this file contains the core merging logic that combines discovered cataloger data with existing packages.yaml, preserving all manual sections while updating auto-generated fields. package main import ( @@ -49,6 +50,74 @@ var catalogerConfigOverrides = map[string]string{ "nix-store-cataloger": "nix.Config", } +// ecosystemMapping maps patterns in cataloger names to ecosystem names. +// order matters - more specific patterns should come first. +type ecosystemMapping struct { + patterns []string // patterns to match in the cataloger name + ecosystem string // ecosystem to return if any pattern matches +} + +// ecosystemMappings defines the pattern-to-ecosystem mappings. +// note: order matters - check more specific patterns first +var ecosystemMappings = []ecosystemMapping{ + // language-based ecosystems + {[]string{"rust", "cargo"}, "rust"}, + {[]string{"javascript", "node", "npm"}, "javascript"}, + {[]string{"python"}, "python"}, + {[]string{"java", "graalvm"}, "java"}, + {[]string{"go-module", "golang"}, "go"}, + {[]string{"ruby", "gem"}, "ruby"}, + {[]string{"php", "composer", "pear", "pecl"}, "php"}, + {[]string{"dotnet", ".net", "csharp"}, "dotnet"}, + {[]string{"swift", "cocoapods"}, "swift"}, + {[]string{"dart", "pubspec"}, "dart"}, + {[]string{"elixir", "mix"}, "elixir"}, + {[]string{"erlang", "rebar"}, "erlang"}, + {[]string{"haskell", "cabal", "stack"}, "haskell"}, + {[]string{"lua"}, "lua"}, + {[]string{"ocaml", "opam"}, "ocaml"}, + {[]string{"r-package"}, "r"}, + {[]string{"swipl", "prolog"}, "prolog"}, + {[]string{"cpp", "conan"}, "c++"}, + {[]string{"kotlin"}, "kotlin"}, + + // os/distro-based ecosystems + {[]string{"apk", "alpine"}, "alpine"}, + {[]string{"dpkg", "deb", "debian"}, "debian"}, + {[]string{"rpm", "redhat"}, "rpm"}, + {[]string{"alpm", "arch"}, "arch"}, + {[]string{"portage", "gentoo"}, "gentoo"}, + {[]string{"homebrew"}, "homebrew"}, + {[]string{"snap"}, "snap"}, + + // other ecosystems + {[]string{"binary", "elf", "pe-binary"}, "binary"}, + {[]string{"conda"}, "conda"}, + {[]string{"nix"}, "nix"}, + {[]string{"kernel"}, "linux"}, + {[]string{"bitnami"}, "bitnami"}, + {[]string{"terraform"}, "terraform"}, + {[]string{"github"}, "github-actions"}, + {[]string{"wordpress"}, "wordpress"}, + {[]string{"sbom"}, "sbom"}, +} + +// inferEcosystem attempts to determine the ecosystem from a cataloger name +func inferEcosystem(catalogerName string) string { + name := strings.ToLower(catalogerName) + + for _, mapping := range ecosystemMappings { + for _, pattern := range mapping.patterns { + if strings.Contains(name, pattern) { + return mapping.ecosystem + } + } + } + + // default + return "other" +} + // Statistics contains information about the regeneration process type Statistics struct { TotalGenericCatalogers int @@ -813,71 +882,3 @@ func formatOrphans(orphans []orphanInfo) string { } return strings.Join(lines, "\n") } - -// ecosystemMapping maps patterns in cataloger names to ecosystem names. -// order matters - more specific patterns should come first. -type ecosystemMapping struct { - patterns []string // patterns to match in the cataloger name - ecosystem string // ecosystem to return if any pattern matches -} - -// ecosystemMappings defines the pattern-to-ecosystem mappings. -// note: order matters - check more specific patterns first -var ecosystemMappings = []ecosystemMapping{ - // language-based ecosystems - {[]string{"rust", "cargo"}, "rust"}, - {[]string{"javascript", "node", "npm"}, "javascript"}, - {[]string{"python"}, "python"}, - {[]string{"java", "graalvm"}, "java"}, - {[]string{"go-module", "golang"}, "go"}, - {[]string{"ruby", "gem"}, "ruby"}, - {[]string{"php", "composer", "pear", "pecl"}, "php"}, - {[]string{"dotnet", ".net", "csharp"}, "dotnet"}, - {[]string{"swift", "cocoapods"}, "swift"}, - {[]string{"dart", "pubspec"}, "dart"}, - {[]string{"elixir", "mix"}, "elixir"}, - {[]string{"erlang", "rebar"}, "erlang"}, - {[]string{"haskell", "cabal", "stack"}, "haskell"}, - {[]string{"lua"}, "lua"}, - {[]string{"ocaml", "opam"}, "ocaml"}, - {[]string{"r-package"}, "r"}, - {[]string{"swipl", "prolog"}, "prolog"}, - {[]string{"cpp", "conan"}, "c++"}, - {[]string{"kotlin"}, "kotlin"}, - - // os/distro-based ecosystems - {[]string{"apk", "alpine"}, "alpine"}, - {[]string{"dpkg", "deb", "debian"}, "debian"}, - {[]string{"rpm", "redhat"}, "rpm"}, - {[]string{"alpm", "arch"}, "arch"}, - {[]string{"portage", "gentoo"}, "gentoo"}, - {[]string{"homebrew"}, "homebrew"}, - {[]string{"snap"}, "snap"}, - - // other ecosystems - {[]string{"binary", "elf", "pe-binary"}, "binary"}, - {[]string{"conda"}, "conda"}, - {[]string{"nix"}, "nix"}, - {[]string{"kernel"}, "linux"}, - {[]string{"bitnami"}, "bitnami"}, - {[]string{"terraform"}, "terraform"}, - {[]string{"github"}, "github-actions"}, - {[]string{"wordpress"}, "wordpress"}, - {[]string{"sbom"}, "sbom"}, -} - -// inferEcosystem attempts to determine the ecosystem from a cataloger name -func inferEcosystem(catalogerName string) string { - name := strings.ToLower(catalogerName) - - for _, mapping := range ecosystemMappings { - for _, pattern := range mapping.patterns { - if strings.Contains(name, pattern) { - return mapping.ecosystem - } - } - } - - // default - return "other" -} diff --git a/internal/capabilities/generate/merge_test.go b/internal/capabilities/generate/merge_test.go index e0d8bc3fe..0ec32828e 100644 --- a/internal/capabilities/generate/merge_test.go +++ b/internal/capabilities/generate/merge_test.go @@ -374,3 +374,153 @@ func TestCatalogerConfigFieldUpdatedForNewCatalogers(t *testing.T) { }) } } + +func TestStripPURLVersion(t *testing.T) { + tests := []struct { + name string + input string + want string + }{ + { + name: "purl with version", + input: "pkg:generic/python@1.0.0", + want: "pkg:generic/python", + }, + { + name: "purl without version", + input: "pkg:generic/python", + want: "pkg:generic/python", + }, + { + name: "purl with multiple @ signs", + input: "pkg:generic/py@thon@1.0.0", + want: "pkg:generic/py@thon", + }, + { + name: "empty string", + input: "", + want: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := stripPURLVersion(tt.input) + require.Equal(t, tt.want, got) + }) + } +} + +func TestInferEcosystem(t *testing.T) { + tests := []struct { + name string + catalogerName string + want string + }{ + { + name: "go module cataloger", + catalogerName: "go-module-binary-cataloger", + want: "go", + }, + { + name: "python cataloger", + catalogerName: "python-package-cataloger", + want: "python", + }, + { + name: "java archive cataloger", + catalogerName: "java-archive-cataloger", + want: "java", + }, + { + name: "rust cargo cataloger", + catalogerName: "rust-cargo-lock-cataloger", + want: "rust", + }, + { + name: "javascript npm cataloger", + catalogerName: "javascript-package-cataloger", + want: "javascript", + }, + { + name: "ruby gem cataloger", + catalogerName: "ruby-gemspec-cataloger", + want: "ruby", + }, + { + name: "debian dpkg cataloger", + catalogerName: "dpkg-db-cataloger", + want: "debian", + }, + { + name: "alpine apk cataloger", + catalogerName: "apk-db-cataloger", + want: "alpine", + }, + { + name: "linux kernel cataloger", + catalogerName: "linux-kernel-cataloger", + want: "linux", + }, + { + name: "binary classifier cataloger", + catalogerName: "binary-classifier-cataloger", + want: "binary", + }, + { + name: "github actions cataloger", + catalogerName: "github-actions-usage-cataloger", + want: "github-actions", + }, + { + name: "unknown cataloger defaults to other", + catalogerName: "unknown-custom-cataloger", + want: "other", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := inferEcosystem(tt.catalogerName) + require.Equal(t, tt.want, got) + }) + } +} + +func TestConvertToJSONSchemaTypesFromMetadata(t *testing.T) { + tests := []struct { + name string + metadataTypes []string + want []string + }{ + { + name: "empty slice returns nil", + metadataTypes: []string{}, + want: nil, + }, + { + name: "nil slice returns nil", + metadataTypes: nil, + want: nil, + }, + { + name: "single metadata type", + metadataTypes: []string{"pkg.AlpmDBEntry"}, + want: []string{"AlpmDbEntry"}, + }, + { + name: "multiple metadata types", + metadataTypes: []string{"pkg.ApkDBEntry", "pkg.BinarySignature"}, + want: []string{"ApkDbEntry", "BinarySignature"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := convertToJSONSchemaTypesFromMetadata(tt.metadataTypes) + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Errorf("convertToJSONSchemaTypesFromMetadata() mismatch (-want +got):\n%s", diff) + } + }) + } +} diff --git a/internal/capabilities/generate/metadata_check.go b/internal/capabilities/generate/metadata_check.go index 42c75bf33..0eb60cc72 100644 --- a/internal/capabilities/generate/metadata_check.go +++ b/internal/capabilities/generate/metadata_check.go @@ -1,3 +1,4 @@ +// this file validates that all known metadata and package types are documented in packages.yaml by checking coverage and reporting any missing types. package main import ( diff --git a/internal/capabilities/generate/test-fixtures/config-linking/cataloger-with-constant/python/cataloger.go b/internal/capabilities/generate/test-fixtures/config-linking/cataloger-with-constant/python/cataloger.go new file mode 100644 index 000000000..fd76c5cb0 --- /dev/null +++ b/internal/capabilities/generate/test-fixtures/config-linking/cataloger-with-constant/python/cataloger.go @@ -0,0 +1,21 @@ +package python + +import ( + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/generic" +) + +const catalogerName = "python-package-cataloger" + +type CatalogerConfig struct { + Setting string +} + +func NewPythonCataloger(cfg CatalogerConfig) pkg.Cataloger { + return generic.NewCataloger(catalogerName). + WithParserByGlobs(parse, "**/*.py") +} + +func parse(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) { + return nil, nil, nil +} diff --git a/internal/capabilities/generate/test-fixtures/config-linking/conflicting-names/duplicate/cataloger1.go b/internal/capabilities/generate/test-fixtures/config-linking/conflicting-names/duplicate/cataloger1.go new file mode 100644 index 000000000..496172064 --- /dev/null +++ b/internal/capabilities/generate/test-fixtures/config-linking/conflicting-names/duplicate/cataloger1.go @@ -0,0 +1,19 @@ +package duplicate + +import ( + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/generic" +) + +type Config1 struct { + Option1 bool +} + +func NewDuplicateCataloger1(cfg Config1) pkg.Cataloger { + return generic.NewCataloger("duplicate-cataloger"). + WithParserByGlobs(parse1, "**/*.txt") +} + +func parse1(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) { + return nil, nil, nil +} diff --git a/internal/capabilities/generate/test-fixtures/config-linking/conflicting-names/duplicate/cataloger2.go b/internal/capabilities/generate/test-fixtures/config-linking/conflicting-names/duplicate/cataloger2.go new file mode 100644 index 000000000..0c563c99a --- /dev/null +++ b/internal/capabilities/generate/test-fixtures/config-linking/conflicting-names/duplicate/cataloger2.go @@ -0,0 +1,19 @@ +package duplicate + +import ( + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/generic" +) + +type Config2 struct { + Option2 string +} + +func NewDuplicateCataloger2(cfg Config2) pkg.Cataloger { + return generic.NewCataloger("duplicate-cataloger"). + WithParserByGlobs(parse2, "**/*.json") +} + +func parse2(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) { + return nil, nil, nil +} diff --git a/internal/capabilities/generate/test-fixtures/config-linking/custom-cataloger-different-file/dotnet/cataloger.go b/internal/capabilities/generate/test-fixtures/config-linking/custom-cataloger-different-file/dotnet/cataloger.go new file mode 100644 index 000000000..78d934313 --- /dev/null +++ b/internal/capabilities/generate/test-fixtures/config-linking/custom-cataloger-different-file/dotnet/cataloger.go @@ -0,0 +1,9 @@ +package dotnet + +import ( + "github.com/anchore/syft/syft/pkg" +) + +func NewDotnetCataloger(cfg CatalogerConfig) pkg.Cataloger { + return dotnetCataloger{cfg: cfg} +} diff --git a/internal/capabilities/generate/test-fixtures/config-linking/custom-cataloger-different-file/dotnet/types.go b/internal/capabilities/generate/test-fixtures/config-linking/custom-cataloger-different-file/dotnet/types.go new file mode 100644 index 000000000..d32d318b7 --- /dev/null +++ b/internal/capabilities/generate/test-fixtures/config-linking/custom-cataloger-different-file/dotnet/types.go @@ -0,0 +1,23 @@ +package dotnet + +import ( + "github.com/anchore/syft/syft/pkg" +) + +const catalogerName = "dotnet-cataloger" + +type CatalogerConfig struct { + Option bool +} + +type dotnetCataloger struct { + cfg CatalogerConfig +} + +func (d dotnetCataloger) Name() string { + return catalogerName +} + +func (d dotnetCataloger) Catalog(resolver any) ([]pkg.Package, []pkg.Relationship, error) { + return nil, nil, nil +} diff --git a/internal/capabilities/generate/test-fixtures/config-linking/custom-cataloger-same-file/java/cataloger.go b/internal/capabilities/generate/test-fixtures/config-linking/custom-cataloger-same-file/java/cataloger.go new file mode 100644 index 000000000..a3530e6d7 --- /dev/null +++ b/internal/capabilities/generate/test-fixtures/config-linking/custom-cataloger-same-file/java/cataloger.go @@ -0,0 +1,27 @@ +package java + +import ( + "github.com/anchore/syft/syft/pkg" +) + +const pomCatalogerName = "java-pom-cataloger" + +type ArchiveCatalogerConfig struct { + IncludeArchives bool +} + +type pomXMLCataloger struct { + cfg ArchiveCatalogerConfig +} + +func (p pomXMLCataloger) Name() string { + return pomCatalogerName +} + +func (p pomXMLCataloger) Catalog(resolver any) ([]pkg.Package, []pkg.Relationship, error) { + return nil, nil, nil +} + +func NewPomCataloger(cfg ArchiveCatalogerConfig) pkg.Cataloger { + return pomXMLCataloger{cfg: cfg} +} diff --git a/internal/capabilities/generate/test-fixtures/config-linking/imported-config-type/kernel/cataloger.go b/internal/capabilities/generate/test-fixtures/config-linking/imported-config-type/kernel/cataloger.go new file mode 100644 index 000000000..99b181854 --- /dev/null +++ b/internal/capabilities/generate/test-fixtures/config-linking/imported-config-type/kernel/cataloger.go @@ -0,0 +1,15 @@ +package kernel + +import ( + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/generic" +) + +func NewLinuxKernelCataloger(cfg LinuxKernelCatalogerConfig) pkg.Cataloger { + return generic.NewCataloger("linux-kernel-cataloger"). + WithParserByGlobs(parse, "**/vmlinuz") +} + +func parse(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) { + return nil, nil, nil +} diff --git a/internal/capabilities/generate/test-fixtures/config-linking/imported-config-type/kernel/config.go b/internal/capabilities/generate/test-fixtures/config-linking/imported-config-type/kernel/config.go new file mode 100644 index 000000000..c77d49bff --- /dev/null +++ b/internal/capabilities/generate/test-fixtures/config-linking/imported-config-type/kernel/config.go @@ -0,0 +1,5 @@ +package kernel + +type LinuxKernelCatalogerConfig struct { + KernelVersion string +} diff --git a/internal/capabilities/generate/test-fixtures/config-linking/mixed-naming-patterns/ruby/cataloger.go b/internal/capabilities/generate/test-fixtures/config-linking/mixed-naming-patterns/ruby/cataloger.go new file mode 100644 index 000000000..cddb73dda --- /dev/null +++ b/internal/capabilities/generate/test-fixtures/config-linking/mixed-naming-patterns/ruby/cataloger.go @@ -0,0 +1,19 @@ +package ruby + +import ( + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/generic" +) + +type Config struct { + Setting bool +} + +func NewRubyCataloger(opts Config) pkg.Cataloger { + return generic.NewCataloger("ruby-cataloger"). + WithParserByGlobs(parse, "**/Gemfile") +} + +func parse(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) { + return nil, nil, nil +} diff --git a/internal/capabilities/generate/test-fixtures/config-linking/no-config-cataloger/javascript/cataloger.go b/internal/capabilities/generate/test-fixtures/config-linking/no-config-cataloger/javascript/cataloger.go new file mode 100644 index 000000000..c1668a8f9 --- /dev/null +++ b/internal/capabilities/generate/test-fixtures/config-linking/no-config-cataloger/javascript/cataloger.go @@ -0,0 +1,15 @@ +package javascript + +import ( + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/generic" +) + +func NewJavaScriptCataloger() pkg.Cataloger { + return generic.NewCataloger("javascript-cataloger"). + WithParserByGlobs(parse, "**/*.js") +} + +func parse(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) { + return nil, nil, nil +} diff --git a/internal/capabilities/generate/test-fixtures/config-linking/non-config-first-param/binary/cataloger.go b/internal/capabilities/generate/test-fixtures/config-linking/non-config-first-param/binary/cataloger.go new file mode 100644 index 000000000..f2b32e082 --- /dev/null +++ b/internal/capabilities/generate/test-fixtures/config-linking/non-config-first-param/binary/cataloger.go @@ -0,0 +1,17 @@ +package binary + +import ( + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/generic" +) + +type Parser struct{} + +func NewBinaryCataloger(parser Parser) pkg.Cataloger { + return generic.NewCataloger("binary-cataloger"). + WithParserByGlobs(parse, "**/*") +} + +func parse(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) { + return nil, nil, nil +} diff --git a/internal/capabilities/generate/test-fixtures/config-linking/selector-expression-config/rust/cataloger.go b/internal/capabilities/generate/test-fixtures/config-linking/selector-expression-config/rust/cataloger.go new file mode 100644 index 000000000..88a481e0c --- /dev/null +++ b/internal/capabilities/generate/test-fixtures/config-linking/selector-expression-config/rust/cataloger.go @@ -0,0 +1,16 @@ +package rust + +import ( + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/generic" + "github.com/test/cargo" +) + +func NewRustCataloger(cfg cargo.CatalogerConfig) pkg.Cataloger { + return generic.NewCataloger("rust-cataloger"). + WithParserByGlobs(parse, "**/Cargo.toml") +} + +func parse(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) { + return nil, nil, nil +} diff --git a/internal/capabilities/generate/test-fixtures/config-linking/simple-generic-cataloger/golang/cataloger.go b/internal/capabilities/generate/test-fixtures/config-linking/simple-generic-cataloger/golang/cataloger.go new file mode 100644 index 000000000..47a0d7c85 --- /dev/null +++ b/internal/capabilities/generate/test-fixtures/config-linking/simple-generic-cataloger/golang/cataloger.go @@ -0,0 +1,19 @@ +package golang + +import ( + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/generic" +) + +type CatalogerConfig struct { + SomeOption bool +} + +func NewGoModuleCataloger(cfg CatalogerConfig) pkg.Cataloger { + return generic.NewCataloger("go-module-cataloger"). + WithParserByGlobs(parseGoMod, "**/go.mod") +} + +func parseGoMod(path string, reader any) ([]pkg.Package, []pkg.Relationship, error) { + return nil, nil, nil +} diff --git a/internal/capabilities/packages.yaml b/internal/capabilities/packages.yaml index 3ecf6ad9f..f74b3639c 100644 --- a/internal/capabilities/packages.yaml +++ b/internal/capabilities/packages.yaml @@ -218,7 +218,6 @@ application: # AUTO-GENERATED - application-level config keys description: enables Syft to use the network to fill in more detailed license information - key: linux-kernel.catalog-modules description: whether to catalog linux kernel modules found within lib/modules/** directories - default: true - key: nix.capture-owned-files description: enumerate all files owned by packages found within Nix store paths - key: python.guess-unpinned-requirements