Alex Goodman 0dd906b071 fix linting
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
2025-10-28 10:38:24 -04:00

680 lines
22 KiB
Go

package main
import (
"fmt"
"os"
"gopkg.in/yaml.v3"
"github.com/anchore/syft/internal/capabilities"
)
const autoGeneratedComment = "AUTO-GENERATED"
const capabilitiesHeaderComment = `This file is partially auto-generated. Run 'go generate ./internal/capabilities' to regenerate.
Fields marked AUTO-GENERATED will be updated during regeneration.
All 'capabilities' sections are MANUAL - edit these to describe cataloger behavior.
CAPABILITY SECTIONS:
There are two types of capability sections depending on cataloger type:
1. Generic catalogers (type=generic): Have capabilities at the PARSER level
- Each parser function has its own capabilities section
- Allows different parsers within the same cataloger to have different capabilities
2. Custom catalogers (type=custom): Have capabilities at the CATALOGER level
- Single capabilities section for the entire cataloger
CAPABILITIES FORMAT:
Capabilities use a field-based format with defaults and optional conditional overrides:
capabilities:
- field: <field-name> # dot-notation path (e.g., "license", "dependency.depth")
default: <value> # value when no conditions match
conditions: # optional - conditional overrides evaluated in order
- when: {ConfigField: val} # when these config fields match (AND logic)
value: <override-value> # use this value instead
comment: "explanation" # optional - why this condition exists
evidence: # optional - source code references
- "StructName.FieldName"
comment: "explanation" # optional - general field explanation
DETECTOR CONDITIONS:
Detectors (used by custom catalogers) can have optional conditions that control when
they are active. This allows a single cataloger to have different detection behavior
based on configuration.
Structure:
detectors:
- method: glob # AUTO-GENERATED - detection method
criteria: ["**/*.jar"] # AUTO-GENERATED - patterns to match
comment: "always active" # MANUAL - optional explanation
- method: glob
criteria: ["**/*.zip"]
conditions: # MANUAL - when this detector is active
- when: {IncludeZipFiles: true} # config fields that must match
comment: "optional explanation"
comment: "ZIP detection requires config"
Notes:
- Conditions reference fields from the cataloger's config struct
- Multiple conditions in the array use OR logic (any condition can activate)
- Multiple fields in a 'when' clause use AND logic (all must match)
- Detectors without conditions are always active
- Only custom catalogers support detectors with conditions
CONDITION EVALUATION:
- Conditions are evaluated in array order (first match wins)
- Multiple fields in a 'when' clause use AND logic (all must match)
- Multiple conditions in the array use OR logic (first matching condition)
- If no conditions match, the default value is used
CAPABILITY FIELDS:
Standard capability field names and their value types:
license: (boolean)
Whether license information is available.
Examples:
default: true # always available
default: false # never available
default: false # requires configuration
conditions:
- when: {SearchRemoteLicenses: true}
value: true
dependency.depth: (array of strings)
Which dependency depths can be discovered.
Values: "direct" (immediate deps), "indirect" (transitive deps)
Examples:
default: [direct] # only immediate dependencies
default: [direct, indirect] # full transitive closure
default: [] # no dependency information
dependency.edges: (string)
Relationships between nodes and completeness of the dependency graph.
Values:
- "" # dependencies found but no edges between them
- "flat" # single level of dependencies with edges to root package only
- "reduced" # transitive reduction (redundant edges removed)
- "complete" # all relationships with accurate direct and indirect edges
Examples:
default: complete
default: ""
dependency.kinds: (array of strings)
Types of dependencies that can be discovered.
Values: "runtime", "dev", "build", "test", "optional"
Examples:
default: [runtime] # production dependencies only
default: [runtime, dev] # production and development
default: [runtime, dev, build] # all dependency types
default: [runtime] # with conditional dev deps
conditions:
- when: {IncludeDevDeps: true}
value: [runtime, dev]
package_manager.files.listing: (boolean)
Whether file listings are available (which files belong to the package).
Examples:
default: true
default: false
conditions:
- when: {CaptureOwnedFiles: true}
value: true
package_manager.files.digests: (boolean)
Whether file digests/checksums are included in listings.
Examples:
default: true
default: false
package_manager.package_integrity_hash: (boolean)
Whether a hash for verifying package integrity is available.
Examples:
default: true
default: false
EXAMPLES:
# Simple cataloger with no configuration
capabilities:
- name: license
default: true
comment: "license field always present in package.json"
- name: dependency.depth
default: [direct]
- name: dependency.edges
default: ""
- name: dependency.kinds
default: [runtime]
comment: "devDependencies not parsed by this cataloger"
- name: package_manager.files.listing
default: false
- name: package_manager.files.digests
default: false
- name: package_manager.package_integrity_hash
default: false
# Cataloger with configuration-dependent capabilities
capabilities:
- name: license
default: false
conditions:
- when: {SearchLocalModCacheLicenses: true}
value: true
comment: "searches for licenses in GOPATH mod cache"
- when: {SearchRemoteLicenses: true}
value: true
comment: "fetches licenses from proxy.golang.org"
comment: "license scanning requires configuration"
- name: dependency.depth
default: [direct, indirect]
- name: dependency.edges
default: flat
- name: dependency.kinds
default: [runtime, dev]
- name: package_manager.files.listing
default: false
- name: package_manager.files.digests
default: false
- name: package_manager.package_integrity_hash
default: true
evidence:
- "GolangBinaryBuildinfoEntry.H1Digest"`
// loadCapabilities loads the capabilities document from a YAML file.
// Returns both the parsed document and the original YAML node tree to preserve comments.
// Exported for use by the generator in generate/main.go
func loadCapabilities(path string) (*capabilities.Document, *yaml.Node, error) {
data, err := os.ReadFile(path)
if err != nil {
if os.IsNotExist(err) {
// return empty document if file doesn't exist
return &capabilities.Document{}, nil, nil
}
return nil, nil, fmt.Errorf("failed to read capabilities file: %w", err)
}
// parse into node tree to preserve comments
var rootNode yaml.Node
if err := yaml.Unmarshal(data, &rootNode); err != nil {
return nil, nil, fmt.Errorf("failed to parse capabilities YAML into node tree: %w", err)
}
// also parse into struct for easy manipulation
var doc capabilities.Document
if err := yaml.Unmarshal(data, &doc); err != nil {
return nil, nil, fmt.Errorf("failed to parse capabilities YAML into struct: %w", err)
}
return &doc, &rootNode, nil
}
// writeYAMLToFile writes a YAML node to a file with proper encoding
func writeYAMLToFile(path string, rootNode *yaml.Node) error {
f, err := os.Create(path)
if err != nil {
return fmt.Errorf("failed to create file: %w", err)
}
defer f.Close()
encoder := yaml.NewEncoder(f)
encoder.SetIndent(2)
if err := encoder.Encode(rootNode); err != nil {
return fmt.Errorf("failed to write YAML: %w", err)
}
if err := encoder.Close(); err != nil {
return fmt.Errorf("failed to close encoder: %w", err)
}
return nil
}
// addFieldComments adds comments to top-level fields in the YAML node
func addFieldComments(rootNode *yaml.Node) {
// navigate to the mapping node (handle both DocumentNode and MappingNode)
var mappingNode *yaml.Node
if rootNode.Kind == yaml.DocumentNode && len(rootNode.Content) > 0 {
mappingNode = rootNode.Content[0]
} else {
mappingNode = rootNode
}
if mappingNode != nil && len(mappingNode.Content) > 0 {
// iterate through top-level keys
for i := 0; i < len(mappingNode.Content); i += 2 {
keyNode := mappingNode.Content[i]
valueNode := mappingNode.Content[i+1]
switch keyNode.Value {
case "configs":
// configs section is AUTO-GENERATED
if keyNode.LineComment == "" {
keyNode.LineComment = "AUTO-GENERATED - config structs and their fields"
}
case "application":
// application section is AUTO-GENERATED
if keyNode.LineComment == "" {
keyNode.LineComment = "AUTO-GENERATED - application-level config keys"
}
case "catalogers":
// clear any HeadComment on catalogers key and value nodes to prevent duplicate headers
// (the only header should be on the root DocumentNode)
keyNode.HeadComment = ""
valueNode.HeadComment = ""
addCatalogerComments(valueNode)
}
}
}
}
// SaveCapabilities saves the capabilities document to a YAML file with comments.
// If existingNode is provided, it updates the existing node tree in-place to preserve comments.
// If existingNode is nil, it creates a new node tree.
func saveCapabilities(path string, doc *capabilities.Document, existingNode *yaml.Node) error {
var rootNode yaml.Node
if existingNode != nil {
// update existing node tree in-place to preserve all comments
rootNode = *existingNode
rootNode.HeadComment = capabilitiesHeaderComment // update header before processing
if err := updateNodeTree(&rootNode, doc); err != nil {
return fmt.Errorf("failed to update node tree: %w", err)
}
} else {
// create a new yaml.Node for new files
if err := rootNode.Encode(doc); err != nil {
return fmt.Errorf("failed to encode document: %w", err)
}
rootNode.HeadComment = capabilitiesHeaderComment
}
// add/update comments to fields
addFieldComments(&rootNode)
// write to file
return writeYAMLToFile(path, &rootNode)
}
// updateNodeTree updates an existing YAML node tree with new document data
// while preserving all existing comments (HeadComment, LineComment, FootComment).
func updateNodeTree(rootNode *yaml.Node, doc *capabilities.Document) error {
// encode the document into a new temporary node tree
var newNode yaml.Node
if err := newNode.Encode(doc); err != nil {
return fmt.Errorf("failed to encode document: %w", err)
}
// get the mapping node from root
var existingMapping *yaml.Node
var newMapping *yaml.Node
if rootNode.Kind == yaml.DocumentNode && len(rootNode.Content) > 0 {
existingMapping = rootNode.Content[0]
} else {
existingMapping = rootNode
}
if newNode.Kind == yaml.DocumentNode && len(newNode.Content) > 0 {
newMapping = newNode.Content[0]
} else {
newMapping = &newNode
}
if existingMapping == nil || newMapping == nil {
*rootNode = newNode
return nil
}
// update or add configs section (AUTO-GENERATED, no comment preservation needed)
updateOrAddSection(existingMapping, newMapping, "configs")
// update or add application section (AUTO-GENERATED, no comment preservation needed)
updateOrAddSection(existingMapping, newMapping, "application")
// update catalogers section (preserve comments)
updateCatalogersSection(existingMapping, newMapping)
return nil
}
// updateCatalogersSection updates the catalogers section while preserving comments
func updateCatalogersSection(existingMapping, newMapping *yaml.Node) {
existingCatalogersNode := findSectionNode(existingMapping, "catalogers")
newCatalogersNode := findSectionNode(newMapping, "catalogers")
if existingCatalogersNode == nil || newCatalogersNode == nil {
return
}
// create a map of existing cataloger nodes by name for quick lookup
existingByName := make(map[string]*yaml.Node)
if existingCatalogersNode.Kind == yaml.SequenceNode {
for _, catalogerNode := range existingCatalogersNode.Content {
if catalogerNode.Kind == yaml.MappingNode {
name := findFieldValue(catalogerNode, "name")
if name != "" {
existingByName[name] = catalogerNode
}
}
}
}
// update each cataloger in the new tree with preserved comments
if newCatalogersNode.Kind == yaml.SequenceNode {
for _, newCatalogerNode := range newCatalogersNode.Content {
if newCatalogerNode.Kind != yaml.MappingNode {
continue
}
name := findFieldValue(newCatalogerNode, "name")
if existingNode := existingByName[name]; existingNode != nil {
// preserve comments from existing cataloger entry
newCatalogerNode.HeadComment = existingNode.HeadComment
newCatalogerNode.LineComment = existingNode.LineComment
newCatalogerNode.FootComment = existingNode.FootComment
// preserve field-level and nested comments
preserveFieldComments(existingNode, newCatalogerNode)
}
}
}
// replace the catalogers content
existingCatalogersNode.Content = newCatalogersNode.Content
}
// updateOrAddSection updates or adds a section in the existing mapping from the new mapping
func updateOrAddSection(existingMapping, newMapping *yaml.Node, sectionName string) {
if existingMapping.Kind != yaml.MappingNode || newMapping.Kind != yaml.MappingNode {
return
}
newSection := findSectionNode(newMapping, sectionName)
if newSection == nil {
return
}
// find if section exists in existing mapping
existingSectionIdx := -1
for i := 0; i < len(existingMapping.Content); i += 2 {
if existingMapping.Content[i].Value == sectionName {
existingSectionIdx = i
break
}
}
if existingSectionIdx >= 0 {
// replace existing section value
existingMapping.Content[existingSectionIdx+1] = newSection
} else {
// add new section at the beginning (before catalogers)
keyNode := &yaml.Node{
Kind: yaml.ScalarNode,
Value: sectionName,
}
existingMapping.Content = append([]*yaml.Node{keyNode, newSection}, existingMapping.Content...)
}
}
// findSectionNode finds a section node by name in a mapping node
func findSectionNode(mappingNode *yaml.Node, sectionName string) *yaml.Node {
if mappingNode.Kind != yaml.MappingNode {
return nil
}
for i := 0; i < len(mappingNode.Content); i += 2 {
if mappingNode.Content[i].Value == sectionName && i+1 < len(mappingNode.Content) {
return mappingNode.Content[i+1]
}
}
return nil
}
// findFieldValue finds the value of a field in a mapping node
func findFieldValue(mappingNode *yaml.Node, fieldName string) string {
if mappingNode.Kind != yaml.MappingNode {
return ""
}
for i := 0; i < len(mappingNode.Content); i += 2 {
if mappingNode.Content[i].Value == fieldName && i+1 < len(mappingNode.Content) {
return mappingNode.Content[i+1].Value
}
}
return ""
}
// preserveMappingNodeComments preserves comments for mapping nodes
func preserveMappingNodeComments(existingNode, newNode *yaml.Node) {
// create maps of existing fields by key for both keys and values
existingKeys := make(map[string]*yaml.Node)
existingValues := make(map[string]*yaml.Node)
for i := 0; i < len(existingNode.Content); i += 2 {
keyNode := existingNode.Content[i]
valueNode := existingNode.Content[i+1]
existingKeys[keyNode.Value] = keyNode
existingValues[keyNode.Value] = valueNode
}
// preserve comments for matching fields in new node
for i := 0; i < len(newNode.Content); i += 2 {
keyNode := newNode.Content[i]
valueNode := newNode.Content[i+1]
// preserve comments on the key node (for line comments like "# AUTO-GENERATED")
if existingKey := existingKeys[keyNode.Value]; existingKey != nil {
keyNode.HeadComment = existingKey.HeadComment
keyNode.LineComment = existingKey.LineComment
keyNode.FootComment = existingKey.FootComment
}
// preserve comments on the value node
if existingValue := existingValues[keyNode.Value]; existingValue != nil {
valueNode.HeadComment = existingValue.HeadComment
valueNode.LineComment = existingValue.LineComment
valueNode.FootComment = existingValue.FootComment
// recursively preserve nested comments
preserveFieldComments(existingValue, valueNode)
}
}
}
// preserveSequenceNodeComments preserves comments for sequence nodes
func preserveSequenceNodeComments(existingNode, newNode *yaml.Node) {
// for sequences, preserve comments based on matching "parser_function" field (for parsers)
// or by array index as a fallback
existingByParser := make(map[string]*yaml.Node)
for _, existingItem := range existingNode.Content {
if existingItem.Kind == yaml.MappingNode {
parser := findFieldValue(existingItem, "parser_function")
if parser != "" {
existingByParser[parser] = existingItem
}
}
}
// match parsers by parser_function if available
for i, newItem := range newNode.Content {
if newItem.Kind == yaml.MappingNode {
parser := findFieldValue(newItem, "parser_function")
if parser != "" && existingByParser[parser] != nil {
existingItem := existingByParser[parser]
newItem.HeadComment = existingItem.HeadComment
newItem.LineComment = existingItem.LineComment
newItem.FootComment = existingItem.FootComment
preserveFieldComments(existingItem, newItem)
} else if i < len(existingNode.Content) {
// fallback to index-based matching
existingItem := existingNode.Content[i]
newItem.HeadComment = existingItem.HeadComment
newItem.LineComment = existingItem.LineComment
newItem.FootComment = existingItem.FootComment
preserveFieldComments(existingItem, newItem)
}
}
}
}
// preserveFieldComments recursively preserves comments from an existing node to a new node
func preserveFieldComments(existingNode, newNode *yaml.Node) {
if existingNode.Kind != newNode.Kind {
return
}
switch newNode.Kind {
case yaml.MappingNode:
preserveMappingNodeComments(existingNode, newNode)
case yaml.SequenceNode:
preserveSequenceNodeComments(existingNode, newNode)
}
}
// addCatalogerFieldComment adds appropriate comment to a single cataloger field
func addCatalogerFieldComment(keyNode, valueNode *yaml.Node, catalogerName string) {
switch keyNode.Value {
case "ecosystem":
// ecosystem is MANUAL
if keyNode.LineComment == "" {
keyNode.LineComment = "MANUAL"
}
case "name", "type":
// add AUTO-GENERATED comment to these fields
if keyNode.LineComment == "" {
keyNode.LineComment = autoGeneratedComment
}
case "source":
// add AUTO-GENERATED comment to source field
if keyNode.LineComment == "" {
keyNode.LineComment = autoGeneratedComment
}
case "config":
// add AUTO-GENERATED comment to config field
if keyNode.LineComment == "" {
keyNode.LineComment = autoGeneratedComment
}
case "selectors":
// selectors are AUTO-GENERATED
if keyNode.LineComment == "" {
keyNode.LineComment = autoGeneratedComment
}
case "parsers":
// parsers is AUTO-GENERATED structure
if keyNode.LineComment == "" {
keyNode.LineComment = "AUTO-GENERATED structure"
}
addParserComments(valueNode)
case "detectors":
// detectors are AUTO-GENERATED for binary-classifier-cataloger, MANUAL for others
if catalogerName == "binary-classifier-cataloger" {
keyNode.LineComment = autoGeneratedComment
} else if keyNode.LineComment == "" {
keyNode.LineComment = "MANUAL - edit detectors here"
}
case "metadata_types":
// cataloger-level metadata_types (for custom catalogers) are AUTO-GENERATED
if keyNode.LineComment == "" {
keyNode.LineComment = autoGeneratedComment
}
case "package_types":
// cataloger-level package_types (for custom catalogers) are AUTO-GENERATED
if keyNode.LineComment == "" {
keyNode.LineComment = autoGeneratedComment
}
case "json_schema_types":
// json_schema_types are AUTO-GENERATED
if keyNode.LineComment == "" {
keyNode.LineComment = autoGeneratedComment
}
case "capabilities":
// capabilities are MANUAL
if keyNode.LineComment == "" {
keyNode.LineComment = "MANUAL - edit capabilities here"
}
}
}
func addCatalogerComments(catalogersNode *yaml.Node) {
// catalogersNode should be a sequence of cataloger entries
if catalogersNode.Kind != yaml.SequenceNode {
return
}
for _, catalogerNode := range catalogersNode.Content {
if catalogerNode.Kind != yaml.MappingNode {
continue
}
// get the cataloger name for special handling
catalogerName := findFieldValue(catalogerNode, "name")
// iterate through the fields of this cataloger entry
for i := 0; i < len(catalogerNode.Content); i += 2 {
keyNode := catalogerNode.Content[i]
valueNode := catalogerNode.Content[i+1]
addCatalogerFieldComment(keyNode, valueNode, catalogerName)
}
}
}
func addParserComments(parsersNode *yaml.Node) {
// parsersNode should be a sequence of parser entries
if parsersNode.Kind != yaml.SequenceNode {
return
}
for _, parserNode := range parsersNode.Content {
if parserNode.Kind != yaml.MappingNode {
continue
}
// iterate through the fields of this parser entry
for i := 0; i < len(parserNode.Content); i += 2 {
keyNode := parserNode.Content[i]
valueNode := parserNode.Content[i+1]
switch keyNode.Value {
case "parser_function", "metadata_types", "package_types", "json_schema_types":
// add AUTO-GENERATED comment to these fields
if keyNode.LineComment == "" {
keyNode.LineComment = autoGeneratedComment
}
case "detector":
// detector is AUTO-GENERATED
if keyNode.LineComment == "" {
keyNode.LineComment = autoGeneratedComment
}
addDetectorComments(valueNode)
case "capabilities":
// capabilities are MANUAL
if keyNode.LineComment == "" {
keyNode.LineComment = "MANUAL - preserved across regeneration"
}
}
}
}
}
func addDetectorComments(detectorNode *yaml.Node) {
// detectorNode should be a mapping node with method and criteria fields
if detectorNode.Kind != yaml.MappingNode {
return
}
// iterate through the fields of the detector
for i := 0; i < len(detectorNode.Content); i += 2 {
keyNode := detectorNode.Content[i]
switch keyNode.Value {
case "method", "criteria":
// add AUTO-GENERATED comment to these fields
if keyNode.LineComment == "" {
keyNode.LineComment = autoGeneratedComment
}
}
}
}