// this file handles YAML file reading and writing with comment preservation, using gopkg.in/yaml.v3's node tree to maintain all existing comments during regeneration. package main import ( "fmt" "os" "path/filepath" "regexp" "strings" "gopkg.in/yaml.v3" "github.com/anchore/syft/internal/capabilities" ) const autoGeneratedComment = "AUTO-GENERATED" // loadCapabilities loads the capabilities document from a YAML file. // Returns both the parsed document and the original YAML node tree to preserve comments. // Exported for use by the generator in generate/main.go func loadCapabilities(capabilitiesDir string) (*capabilities.Document, map[string]*yaml.Node, error) { packagesDir := filepath.Join(capabilitiesDir, "packages") // load all packages/*.yaml files files, err := filepath.Glob(filepath.Join(packagesDir, "*.yaml")) if err != nil { return nil, nil, fmt.Errorf("failed to glob packages directory: %w", err) } mergedDoc := &capabilities.Document{ Configs: make(map[string]capabilities.CatalogerConfigEntry), Catalogers: []capabilities.CatalogerEntry{}, } nodeMap := make(map[string]*yaml.Node) // load each package file for _, file := range files { data, err := os.ReadFile(file) if err != nil { return nil, nil, fmt.Errorf("failed to read %s: %w", file, err) } // parse into node tree to preserve comments var rootNode yaml.Node if err := yaml.Unmarshal(data, &rootNode); err != nil { return nil, nil, fmt.Errorf("failed to parse %s into node tree: %w", file, err) } // parse into struct var doc struct { Configs map[string]capabilities.CatalogerConfigEntry `yaml:"configs"` Catalogers []capabilities.CatalogerEntry `yaml:"catalogers"` } if err := yaml.Unmarshal(data, &doc); err != nil { return nil, nil, fmt.Errorf("failed to parse %s into struct: %w", file, err) } // merge configs for k, v := range doc.Configs { mergedDoc.Configs[k] = v } // merge catalogers mergedDoc.Catalogers = append(mergedDoc.Catalogers, doc.Catalogers...) // store node tree by filename (basename without .yaml) ecosystem := strings.TrimSuffix(filepath.Base(file), ".yaml") nodeMap[ecosystem] = &rootNode } // load appconfig.yaml separately appconfigPath := filepath.Join(capabilitiesDir, "appconfig.yaml") if _, err := os.Stat(appconfigPath); err == nil { data, err := os.ReadFile(appconfigPath) if err != nil { return nil, nil, fmt.Errorf("failed to read appconfig.yaml: %w", err) } var appDoc struct { Application []capabilities.ApplicationConfigField `yaml:"application"` } if err := yaml.Unmarshal(data, &appDoc); err != nil { return nil, nil, fmt.Errorf("failed to parse appconfig.yaml: %w", err) } mergedDoc.ApplicationConfig = appDoc.Application // load node tree for appconfig var appNode yaml.Node if err := yaml.Unmarshal(data, &appNode); err != nil { return nil, nil, fmt.Errorf("failed to parse appconfig.yaml into node tree: %w", err) } nodeMap["appconfig"] = &appNode } return mergedDoc, nodeMap, nil } // writeYAMLToFile writes a YAML node to a file with proper encoding func writeYAMLToFile(path string, rootNode *yaml.Node) error { f, err := os.Create(path) if err != nil { return fmt.Errorf("failed to create file: %w", err) } defer f.Close() encoder := yaml.NewEncoder(f) encoder.SetIndent(2) if err := encoder.Encode(rootNode); err != nil { return fmt.Errorf("failed to write YAML: %w", err) } if err := encoder.Close(); err != nil { return fmt.Errorf("failed to close encoder: %w", err) } return nil } // addFieldComments adds comments to top-level fields in the YAML node func addFieldComments(rootNode *yaml.Node) { // navigate to the mapping node (handle both DocumentNode and MappingNode) var mappingNode *yaml.Node if rootNode.Kind == yaml.DocumentNode && len(rootNode.Content) > 0 { mappingNode = rootNode.Content[0] } else { mappingNode = rootNode } if mappingNode != nil && len(mappingNode.Content) > 0 { // iterate through top-level keys for i := 0; i < len(mappingNode.Content); i += 2 { keyNode := mappingNode.Content[i] valueNode := mappingNode.Content[i+1] switch keyNode.Value { case "configs": // configs section is AUTO-GENERATED if keyNode.LineComment == "" { keyNode.LineComment = "AUTO-GENERATED - config structs and their fields" } case "application": // application section is AUTO-GENERATED if keyNode.LineComment == "" { keyNode.LineComment = "AUTO-GENERATED - application-level config keys" } case "catalogers": // clear any HeadComment on catalogers key and value nodes to prevent duplicate headers // (the only header should be on the root DocumentNode) keyNode.HeadComment = "" valueNode.HeadComment = "" addCatalogerComments(valueNode) } } } } // SaveCapabilities saves the capabilities document to distributed YAML files with comments. // Groups catalogers by ecosystem and writes each to packages/ECOSYSTEM.yaml. // Also saves appconfig.yaml separately. func saveCapabilities(capabilitiesDir string, doc *capabilities.Document, existingNodes map[string]*yaml.Node) error { packagesDir := filepath.Join(capabilitiesDir, "packages") catalogersByEcosystem, configsByEcosystem := groupCatalogersByEcosystem(doc) // write each ecosystem file for ecosystem, catalogers := range catalogersByEcosystem { if err := writeEcosystemFile(packagesDir, ecosystem, catalogers, configsByEcosystem[ecosystem], existingNodes); err != nil { return err } } // save appconfig.yaml if len(doc.ApplicationConfig) > 0 { if err := writeAppconfigFile(capabilitiesDir, doc.ApplicationConfig, existingNodes); err != nil { return err } } return nil } // groupCatalogersByEcosystem groups catalogers and their configs by ecosystem func groupCatalogersByEcosystem(doc *capabilities.Document) (map[string][]capabilities.CatalogerEntry, map[string]map[string]capabilities.CatalogerConfigEntry) { catalogersByEcosystem := make(map[string][]capabilities.CatalogerEntry) configsByEcosystem := make(map[string]map[string]capabilities.CatalogerConfigEntry) for _, cat := range doc.Catalogers { ecosystem := mapCatalogerToEcosystem(cat) catalogersByEcosystem[ecosystem] = append(catalogersByEcosystem[ecosystem], cat) // also group configs for this ecosystem if cat.Config != "" { if configEntry, exists := doc.Configs[cat.Config]; exists { if configsByEcosystem[ecosystem] == nil { configsByEcosystem[ecosystem] = make(map[string]capabilities.CatalogerConfigEntry) } configsByEcosystem[ecosystem][cat.Config] = configEntry } } } return catalogersByEcosystem, configsByEcosystem } // writeEcosystemFile writes a single ecosystem's catalogers and configs to a YAML file func writeEcosystemFile(packagesDir, ecosystem string, catalogers []capabilities.CatalogerEntry, configs map[string]capabilities.CatalogerConfigEntry, existingNodes map[string]*yaml.Node) error { ecosystemDoc := struct { Configs map[string]capabilities.CatalogerConfigEntry `yaml:"configs,omitempty"` Catalogers []capabilities.CatalogerEntry `yaml:"catalogers"` }{ Configs: configs, Catalogers: catalogers, } var rootNode yaml.Node existingNode, hasExisting := existingNodes[ecosystem] if hasExisting && existingNode != nil { // update existing node tree rootNode = *existingNode rootNode.HeadComment = "# Cataloger capabilities. See ../README.md for documentation.\n" if err := updateNodeTreeEcosystem(&rootNode, &ecosystemDoc); err != nil { return fmt.Errorf("failed to update node tree for %s: %w", ecosystem, err) } } else { // create new node tree if err := rootNode.Encode(&ecosystemDoc); err != nil { return fmt.Errorf("failed to encode %s: %w", ecosystem, err) } rootNode.HeadComment = "# Cataloger capabilities. See ../README.md for documentation.\n" } // add field comments addFieldComments(&rootNode) // write file ecosystemPath := filepath.Join(packagesDir, ecosystem+".yaml") if err := writeYAMLToFile(ecosystemPath, &rootNode); err != nil { return fmt.Errorf("failed to write %s: %w", ecosystem, err) } return nil } // writeAppconfigFile writes the application config to appconfig.yaml func writeAppconfigFile(capabilitiesDir string, appConfig []capabilities.ApplicationConfigField, existingNodes map[string]*yaml.Node) error { appconfigDoc := struct { Application []capabilities.ApplicationConfigField `yaml:"application"` }{ Application: appConfig, } var appNode yaml.Node existingAppNode, hasExisting := existingNodes["appconfig"] if hasExisting && existingAppNode != nil { appNode = *existingAppNode if err := updateNodeTreeAppConfig(&appNode, &appconfigDoc); err != nil { return fmt.Errorf("failed to update appconfig node tree: %w", err) } } else { if err := appNode.Encode(&appconfigDoc); err != nil { return fmt.Errorf("failed to encode appconfig: %w", err) } appNode.HeadComment = "# Application-level configuration. See README.md for documentation.\n# This file is partially auto-generated. Run 'go generate ./internal/capabilities' to regenerate.\n" } addFieldComments(&appNode) appconfigPath := filepath.Join(capabilitiesDir, "appconfig.yaml") if err := writeYAMLToFile(appconfigPath, &appNode); err != nil { return fmt.Errorf("failed to write appconfig: %w", err) } return nil } // mapCatalogerToEcosystem determines which ecosystem file a cataloger belongs to func mapCatalogerToEcosystem(cat capabilities.CatalogerEntry) string { // first try using the source file path if cat.Source.File != "" { // extract directory from path like "syft/pkg/cataloger/golang/cataloger.go" re := regexp.MustCompile(`syft/pkg/cataloger/([^/]+)/`) if matches := re.FindStringSubmatch(cat.Source.File); len(matches) >= 2 { return matches[1] } } // fallback to inferring from cataloger name (from merge.go) return inferEcosystem(cat.Name) } // updateNodeTreeEcosystem updates an existing ecosystem YAML node tree func updateNodeTreeEcosystem(rootNode *yaml.Node, doc interface{}) error { var newNode yaml.Node if err := newNode.Encode(doc); err != nil { return err } var existingMapping *yaml.Node if rootNode.Kind == yaml.DocumentNode && len(rootNode.Content) > 0 { existingMapping = rootNode.Content[0] } else { existingMapping = rootNode } var newMapping *yaml.Node if newNode.Kind == yaml.DocumentNode && len(newNode.Content) > 0 { newMapping = newNode.Content[0] } else { newMapping = &newNode } if existingMapping.Kind == yaml.MappingNode && newMapping.Kind == yaml.MappingNode { existingMapping.Content = newMapping.Content } return nil } // updateNodeTreeAppConfig updates appconfig YAML node tree func updateNodeTreeAppConfig(rootNode *yaml.Node, doc interface{}) error { return updateNodeTreeEcosystem(rootNode, doc) } // addCatalogerFieldComment adds appropriate comment to a single cataloger field func addCatalogerFieldComment(keyNode, valueNode *yaml.Node, catalogerName string) { switch keyNode.Value { case "ecosystem": // ecosystem is MANUAL if keyNode.LineComment == "" { keyNode.LineComment = "MANUAL" } case "name", "type": // add AUTO-GENERATED comment to these fields if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } case "source": // add AUTO-GENERATED comment to source field if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } case "config": // add AUTO-GENERATED comment to config field if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } case "selectors": // selectors are AUTO-GENERATED if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } case "parsers": // parsers is AUTO-GENERATED structure if keyNode.LineComment == "" { keyNode.LineComment = "AUTO-GENERATED structure" } addParserComments(valueNode) case "detectors": // detectors are AUTO-GENERATED for binary-classifier-cataloger, MANUAL for others if catalogerName == "binary-classifier-cataloger" { keyNode.LineComment = autoGeneratedComment } else if keyNode.LineComment == "" { keyNode.LineComment = "MANUAL - edit detectors here" } case "metadata_types": // cataloger-level metadata_types (for custom catalogers) are AUTO-GENERATED if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } case "package_types": // cataloger-level package_types (for custom catalogers) are AUTO-GENERATED if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } case "json_schema_types": // json_schema_types are AUTO-GENERATED if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } case "capabilities": // capabilities are MANUAL if keyNode.LineComment == "" { keyNode.LineComment = "MANUAL - edit capabilities here" } } } // findFieldValue finds a field in a YAML mapping node and returns its value func findFieldValue(node *yaml.Node, fieldName string) string { if node.Kind != yaml.MappingNode { return "" } for i := 0; i < len(node.Content); i += 2 { keyNode := node.Content[i] valueNode := node.Content[i+1] if keyNode.Value == fieldName { return valueNode.Value } } return "" } func addCatalogerComments(catalogersNode *yaml.Node) { // catalogersNode should be a sequence of cataloger entries if catalogersNode.Kind != yaml.SequenceNode { return } for _, catalogerNode := range catalogersNode.Content { if catalogerNode.Kind != yaml.MappingNode { continue } // get the cataloger name for special handling catalogerName := findFieldValue(catalogerNode, "name") // iterate through the fields of this cataloger entry for i := 0; i < len(catalogerNode.Content); i += 2 { keyNode := catalogerNode.Content[i] valueNode := catalogerNode.Content[i+1] addCatalogerFieldComment(keyNode, valueNode, catalogerName) } } } func addParserComments(parsersNode *yaml.Node) { // parsersNode should be a sequence of parser entries if parsersNode.Kind != yaml.SequenceNode { return } for _, parserNode := range parsersNode.Content { if parserNode.Kind != yaml.MappingNode { continue } // iterate through the fields of this parser entry for i := 0; i < len(parserNode.Content); i += 2 { keyNode := parserNode.Content[i] valueNode := parserNode.Content[i+1] switch keyNode.Value { case "parser_function", "metadata_types", "package_types", "json_schema_types": // add AUTO-GENERATED comment to these fields if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } case "detector": // detector is AUTO-GENERATED if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } addDetectorComments(valueNode) case "capabilities": // capabilities are MANUAL if keyNode.LineComment == "" { keyNode.LineComment = "MANUAL - preserved across regeneration" } } } } } func addDetectorComments(detectorNode *yaml.Node) { // detectorNode should be a mapping node with method and criteria fields if detectorNode.Kind != yaml.MappingNode { return } // iterate through the fields of the detector for i := 0; i < len(detectorNode.Content); i += 2 { keyNode := detectorNode.Content[i] switch keyNode.Value { case "method", "criteria": // add AUTO-GENERATED comment to these fields if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } } } }