// this file handles YAML file reading and writing with comment preservation, using gopkg.in/yaml.v3's node tree to maintain all existing comments during regeneration. package main import ( "fmt" "os" "gopkg.in/yaml.v3" "github.com/anchore/syft/internal/capabilities" ) const autoGeneratedComment = "AUTO-GENERATED" const capabilitiesHeaderComment = `This file is partially auto-generated. Run 'go generate ./internal/capabilities' to regenerate. Fields marked AUTO-GENERATED will be updated during regeneration. All 'capabilities' sections are MANUAL - edit these to describe cataloger behavior. CAPABILITY SECTIONS: There are two types of capability sections depending on cataloger type: 1. Generic catalogers (type=generic): Have capabilities at the PARSER level - Each parser function has its own capabilities section - Allows different parsers within the same cataloger to have different capabilities 2. Custom catalogers (type=custom): Have capabilities at the CATALOGER level - Single capabilities section for the entire cataloger CAPABILITIES FORMAT: Capabilities use a field-based format with defaults and optional conditional overrides: capabilities: - field: # dot-notation path (e.g., "license", "dependency.depth") default: # value when no conditions match conditions: # optional - conditional overrides evaluated in order - when: {ConfigField: val} # when these config fields match (AND logic) value: # use this value instead comment: "explanation" # optional - why this condition exists evidence: # optional - source code references - "StructName.FieldName" comment: "explanation" # optional - general field explanation DETECTOR CONDITIONS: Detectors (used by custom catalogers) can have optional conditions that control when they are active. This allows a single cataloger to have different detection behavior based on configuration. Structure: detectors: - method: glob # AUTO-GENERATED - detection method criteria: ["**/*.jar"] # AUTO-GENERATED - patterns to match comment: "always active" # MANUAL - optional explanation - method: glob criteria: ["**/*.zip"] conditions: # MANUAL - when this detector is active - when: {IncludeZipFiles: true} # config fields that must match comment: "optional explanation" comment: "ZIP detection requires config" Notes: - Conditions reference fields from the cataloger's config struct - Multiple conditions in the array use OR logic (any condition can activate) - Multiple fields in a 'when' clause use AND logic (all must match) - Detectors without conditions are always active - Only custom catalogers support detectors with conditions CONDITION EVALUATION: - Conditions are evaluated in array order (first match wins) - Multiple fields in a 'when' clause use AND logic (all must match) - Multiple conditions in the array use OR logic (first matching condition) - If no conditions match, the default value is used CAPABILITY FIELDS: Standard capability field names and their value types: license: (boolean) Whether license information is available. Examples: default: true # always available default: false # never available default: false # requires configuration conditions: - when: {SearchRemoteLicenses: true} value: true dependency.depth: (array of strings) Which dependency depths can be discovered. Values: "direct" (immediate deps), "indirect" (transitive deps) Examples: default: [direct] # only immediate dependencies default: [direct, indirect] # full transitive closure default: [] # no dependency information dependency.edges: (string) Relationships between nodes and completeness of the dependency graph. Values: - "" # dependencies found but no edges between them - "flat" # single level of dependencies with edges to root package only - "reduced" # transitive reduction (redundant edges removed) - "complete" # all relationships with accurate direct and indirect edges Examples: default: complete default: "" dependency.kinds: (array of strings) Types of dependencies that can be discovered. Values: "runtime", "dev", "build", "test", "optional" Examples: default: [runtime] # production dependencies only default: [runtime, dev] # production and development default: [runtime, dev, build] # all dependency types default: [runtime] # with conditional dev deps conditions: - when: {IncludeDevDeps: true} value: [runtime, dev] package_manager.files.listing: (boolean) Whether file listings are available (which files belong to the package). Examples: default: true default: false conditions: - when: {CaptureOwnedFiles: true} value: true package_manager.files.digests: (boolean) Whether file digests/checksums are included in listings. Examples: default: true default: false package_manager.package_integrity_hash: (boolean) Whether a hash for verifying package integrity is available. Examples: default: true default: false EXAMPLES: # Simple cataloger with no configuration capabilities: - name: license default: true comment: "license field always present in package.json" - name: dependency.depth default: [direct] - name: dependency.edges default: "" - name: dependency.kinds default: [runtime] comment: "devDependencies not parsed by this cataloger" - name: package_manager.files.listing default: false - name: package_manager.files.digests default: false - name: package_manager.package_integrity_hash default: false # Cataloger with configuration-dependent capabilities capabilities: - name: license default: false conditions: - when: {SearchLocalModCacheLicenses: true} value: true comment: "searches for licenses in GOPATH mod cache" - when: {SearchRemoteLicenses: true} value: true comment: "fetches licenses from proxy.golang.org" comment: "license scanning requires configuration" - name: dependency.depth default: [direct, indirect] - name: dependency.edges default: flat - name: dependency.kinds default: [runtime, dev] - name: package_manager.files.listing default: false - name: package_manager.files.digests default: false - name: package_manager.package_integrity_hash default: true evidence: - "GolangBinaryBuildinfoEntry.H1Digest"` // loadCapabilities loads the capabilities document from a YAML file. // Returns both the parsed document and the original YAML node tree to preserve comments. // Exported for use by the generator in generate/main.go func loadCapabilities(path string) (*capabilities.Document, *yaml.Node, error) { data, err := os.ReadFile(path) if err != nil { if os.IsNotExist(err) { // return empty document if file doesn't exist return &capabilities.Document{}, nil, nil } return nil, nil, fmt.Errorf("failed to read capabilities file: %w", err) } // parse into node tree to preserve comments var rootNode yaml.Node if err := yaml.Unmarshal(data, &rootNode); err != nil { return nil, nil, fmt.Errorf("failed to parse capabilities YAML into node tree: %w", err) } // also parse into struct for easy manipulation var doc capabilities.Document if err := yaml.Unmarshal(data, &doc); err != nil { return nil, nil, fmt.Errorf("failed to parse capabilities YAML into struct: %w", err) } return &doc, &rootNode, nil } // writeYAMLToFile writes a YAML node to a file with proper encoding func writeYAMLToFile(path string, rootNode *yaml.Node) error { f, err := os.Create(path) if err != nil { return fmt.Errorf("failed to create file: %w", err) } defer f.Close() encoder := yaml.NewEncoder(f) encoder.SetIndent(2) if err := encoder.Encode(rootNode); err != nil { return fmt.Errorf("failed to write YAML: %w", err) } if err := encoder.Close(); err != nil { return fmt.Errorf("failed to close encoder: %w", err) } return nil } // addFieldComments adds comments to top-level fields in the YAML node func addFieldComments(rootNode *yaml.Node) { // navigate to the mapping node (handle both DocumentNode and MappingNode) var mappingNode *yaml.Node if rootNode.Kind == yaml.DocumentNode && len(rootNode.Content) > 0 { mappingNode = rootNode.Content[0] } else { mappingNode = rootNode } if mappingNode != nil && len(mappingNode.Content) > 0 { // iterate through top-level keys for i := 0; i < len(mappingNode.Content); i += 2 { keyNode := mappingNode.Content[i] valueNode := mappingNode.Content[i+1] switch keyNode.Value { case "configs": // configs section is AUTO-GENERATED if keyNode.LineComment == "" { keyNode.LineComment = "AUTO-GENERATED - config structs and their fields" } case "application": // application section is AUTO-GENERATED if keyNode.LineComment == "" { keyNode.LineComment = "AUTO-GENERATED - application-level config keys" } case "catalogers": // clear any HeadComment on catalogers key and value nodes to prevent duplicate headers // (the only header should be on the root DocumentNode) keyNode.HeadComment = "" valueNode.HeadComment = "" addCatalogerComments(valueNode) } } } } // SaveCapabilities saves the capabilities document to a YAML file with comments. // If existingNode is provided, it updates the existing node tree in-place to preserve comments. // If existingNode is nil, it creates a new node tree. func saveCapabilities(path string, doc *capabilities.Document, existingNode *yaml.Node) error { var rootNode yaml.Node if existingNode != nil { // update existing node tree in-place to preserve all comments rootNode = *existingNode rootNode.HeadComment = capabilitiesHeaderComment // update header before processing if err := updateNodeTree(&rootNode, doc); err != nil { return fmt.Errorf("failed to update node tree: %w", err) } } else { // create a new yaml.Node for new files if err := rootNode.Encode(doc); err != nil { return fmt.Errorf("failed to encode document: %w", err) } rootNode.HeadComment = capabilitiesHeaderComment } // add/update comments to fields addFieldComments(&rootNode) // write to file return writeYAMLToFile(path, &rootNode) } // updateNodeTree updates an existing YAML node tree with new document data // while preserving all existing comments (HeadComment, LineComment, FootComment). func updateNodeTree(rootNode *yaml.Node, doc *capabilities.Document) error { // encode the document into a new temporary node tree var newNode yaml.Node if err := newNode.Encode(doc); err != nil { return fmt.Errorf("failed to encode document: %w", err) } // get the mapping node from root var existingMapping *yaml.Node var newMapping *yaml.Node if rootNode.Kind == yaml.DocumentNode && len(rootNode.Content) > 0 { existingMapping = rootNode.Content[0] } else { existingMapping = rootNode } if newNode.Kind == yaml.DocumentNode && len(newNode.Content) > 0 { newMapping = newNode.Content[0] } else { newMapping = &newNode } if existingMapping == nil || newMapping == nil { *rootNode = newNode return nil } // update or add configs section (AUTO-GENERATED, no comment preservation needed) updateOrAddSection(existingMapping, newMapping, "configs") // update or add application section (AUTO-GENERATED, no comment preservation needed) updateOrAddSection(existingMapping, newMapping, "application") // update catalogers section (preserve comments) updateCatalogersSection(existingMapping, newMapping) return nil } // updateCatalogersSection updates the catalogers section while preserving comments func updateCatalogersSection(existingMapping, newMapping *yaml.Node) { existingCatalogersNode := findSectionNode(existingMapping, "catalogers") newCatalogersNode := findSectionNode(newMapping, "catalogers") if existingCatalogersNode == nil || newCatalogersNode == nil { return } // create a map of existing cataloger nodes by name for quick lookup existingByName := make(map[string]*yaml.Node) if existingCatalogersNode.Kind == yaml.SequenceNode { for _, catalogerNode := range existingCatalogersNode.Content { if catalogerNode.Kind == yaml.MappingNode { name := findFieldValue(catalogerNode, "name") if name != "" { existingByName[name] = catalogerNode } } } } // update each cataloger in the new tree with preserved comments if newCatalogersNode.Kind == yaml.SequenceNode { for _, newCatalogerNode := range newCatalogersNode.Content { if newCatalogerNode.Kind != yaml.MappingNode { continue } name := findFieldValue(newCatalogerNode, "name") if existingNode := existingByName[name]; existingNode != nil { // preserve comments from existing cataloger entry newCatalogerNode.HeadComment = existingNode.HeadComment newCatalogerNode.LineComment = existingNode.LineComment newCatalogerNode.FootComment = existingNode.FootComment // preserve field-level and nested comments preserveFieldComments(existingNode, newCatalogerNode) } } } // replace the catalogers content existingCatalogersNode.Content = newCatalogersNode.Content } // updateOrAddSection updates or adds a section in the existing mapping from the new mapping func updateOrAddSection(existingMapping, newMapping *yaml.Node, sectionName string) { if existingMapping.Kind != yaml.MappingNode || newMapping.Kind != yaml.MappingNode { return } newSection := findSectionNode(newMapping, sectionName) if newSection == nil { return } // find if section exists in existing mapping existingSectionIdx := -1 for i := 0; i < len(existingMapping.Content); i += 2 { if existingMapping.Content[i].Value == sectionName { existingSectionIdx = i break } } if existingSectionIdx >= 0 { // replace existing section value existingMapping.Content[existingSectionIdx+1] = newSection } else { // add new section at the beginning (before catalogers) keyNode := &yaml.Node{ Kind: yaml.ScalarNode, Value: sectionName, } existingMapping.Content = append([]*yaml.Node{keyNode, newSection}, existingMapping.Content...) } } // findSectionNode finds a section node by name in a mapping node func findSectionNode(mappingNode *yaml.Node, sectionName string) *yaml.Node { if mappingNode.Kind != yaml.MappingNode { return nil } for i := 0; i < len(mappingNode.Content); i += 2 { if mappingNode.Content[i].Value == sectionName && i+1 < len(mappingNode.Content) { return mappingNode.Content[i+1] } } return nil } // findFieldValue finds the value of a field in a mapping node func findFieldValue(mappingNode *yaml.Node, fieldName string) string { if mappingNode.Kind != yaml.MappingNode { return "" } for i := 0; i < len(mappingNode.Content); i += 2 { if mappingNode.Content[i].Value == fieldName && i+1 < len(mappingNode.Content) { return mappingNode.Content[i+1].Value } } return "" } // preserveMappingNodeComments preserves comments for mapping nodes func preserveMappingNodeComments(existingNode, newNode *yaml.Node) { // create maps of existing fields by key for both keys and values existingKeys := make(map[string]*yaml.Node) existingValues := make(map[string]*yaml.Node) for i := 0; i < len(existingNode.Content); i += 2 { keyNode := existingNode.Content[i] valueNode := existingNode.Content[i+1] existingKeys[keyNode.Value] = keyNode existingValues[keyNode.Value] = valueNode } // preserve comments for matching fields in new node for i := 0; i < len(newNode.Content); i += 2 { keyNode := newNode.Content[i] valueNode := newNode.Content[i+1] // preserve comments on the key node (for line comments like "# AUTO-GENERATED") if existingKey := existingKeys[keyNode.Value]; existingKey != nil { keyNode.HeadComment = existingKey.HeadComment keyNode.LineComment = existingKey.LineComment keyNode.FootComment = existingKey.FootComment } // preserve comments on the value node if existingValue := existingValues[keyNode.Value]; existingValue != nil { valueNode.HeadComment = existingValue.HeadComment valueNode.LineComment = existingValue.LineComment valueNode.FootComment = existingValue.FootComment // recursively preserve nested comments preserveFieldComments(existingValue, valueNode) } } } // preserveSequenceNodeComments preserves comments for sequence nodes func preserveSequenceNodeComments(existingNode, newNode *yaml.Node) { // for sequences, preserve comments based on matching "parser_function" field (for parsers) // or by array index as a fallback existingByParser := make(map[string]*yaml.Node) for _, existingItem := range existingNode.Content { if existingItem.Kind == yaml.MappingNode { parser := findFieldValue(existingItem, "parser_function") if parser != "" { existingByParser[parser] = existingItem } } } // match parsers by parser_function if available for i, newItem := range newNode.Content { if newItem.Kind == yaml.MappingNode { parser := findFieldValue(newItem, "parser_function") if parser != "" && existingByParser[parser] != nil { existingItem := existingByParser[parser] newItem.HeadComment = existingItem.HeadComment newItem.LineComment = existingItem.LineComment newItem.FootComment = existingItem.FootComment preserveFieldComments(existingItem, newItem) } else if i < len(existingNode.Content) { // fallback to index-based matching existingItem := existingNode.Content[i] newItem.HeadComment = existingItem.HeadComment newItem.LineComment = existingItem.LineComment newItem.FootComment = existingItem.FootComment preserveFieldComments(existingItem, newItem) } } } } // preserveFieldComments recursively preserves comments from an existing node to a new node func preserveFieldComments(existingNode, newNode *yaml.Node) { if existingNode.Kind != newNode.Kind { return } switch newNode.Kind { case yaml.MappingNode: preserveMappingNodeComments(existingNode, newNode) case yaml.SequenceNode: preserveSequenceNodeComments(existingNode, newNode) } } // addCatalogerFieldComment adds appropriate comment to a single cataloger field func addCatalogerFieldComment(keyNode, valueNode *yaml.Node, catalogerName string) { switch keyNode.Value { case "ecosystem": // ecosystem is MANUAL if keyNode.LineComment == "" { keyNode.LineComment = "MANUAL" } case "name", "type": // add AUTO-GENERATED comment to these fields if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } case "source": // add AUTO-GENERATED comment to source field if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } case "config": // add AUTO-GENERATED comment to config field if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } case "selectors": // selectors are AUTO-GENERATED if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } case "parsers": // parsers is AUTO-GENERATED structure if keyNode.LineComment == "" { keyNode.LineComment = "AUTO-GENERATED structure" } addParserComments(valueNode) case "detectors": // detectors are AUTO-GENERATED for binary-classifier-cataloger, MANUAL for others if catalogerName == "binary-classifier-cataloger" { keyNode.LineComment = autoGeneratedComment } else if keyNode.LineComment == "" { keyNode.LineComment = "MANUAL - edit detectors here" } case "metadata_types": // cataloger-level metadata_types (for custom catalogers) are AUTO-GENERATED if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } case "package_types": // cataloger-level package_types (for custom catalogers) are AUTO-GENERATED if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } case "json_schema_types": // json_schema_types are AUTO-GENERATED if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } case "capabilities": // capabilities are MANUAL if keyNode.LineComment == "" { keyNode.LineComment = "MANUAL - edit capabilities here" } } } func addCatalogerComments(catalogersNode *yaml.Node) { // catalogersNode should be a sequence of cataloger entries if catalogersNode.Kind != yaml.SequenceNode { return } for _, catalogerNode := range catalogersNode.Content { if catalogerNode.Kind != yaml.MappingNode { continue } // get the cataloger name for special handling catalogerName := findFieldValue(catalogerNode, "name") // iterate through the fields of this cataloger entry for i := 0; i < len(catalogerNode.Content); i += 2 { keyNode := catalogerNode.Content[i] valueNode := catalogerNode.Content[i+1] addCatalogerFieldComment(keyNode, valueNode, catalogerName) } } } func addParserComments(parsersNode *yaml.Node) { // parsersNode should be a sequence of parser entries if parsersNode.Kind != yaml.SequenceNode { return } for _, parserNode := range parsersNode.Content { if parserNode.Kind != yaml.MappingNode { continue } // iterate through the fields of this parser entry for i := 0; i < len(parserNode.Content); i += 2 { keyNode := parserNode.Content[i] valueNode := parserNode.Content[i+1] switch keyNode.Value { case "parser_function", "metadata_types", "package_types", "json_schema_types": // add AUTO-GENERATED comment to these fields if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } case "detector": // detector is AUTO-GENERATED if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } addDetectorComments(valueNode) case "capabilities": // capabilities are MANUAL if keyNode.LineComment == "" { keyNode.LineComment = "MANUAL - preserved across regeneration" } } } } } func addDetectorComments(detectorNode *yaml.Node) { // detectorNode should be a mapping node with method and criteria fields if detectorNode.Kind != yaml.MappingNode { return } // iterate through the fields of the detector for i := 0; i < len(detectorNode.Content); i += 2 { keyNode := detectorNode.Content[i] switch keyNode.Value { case "method", "criteria": // add AUTO-GENERATED comment to these fields if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } } } }