Alex Goodman 6edb1162c8 split packages.yaml to multiple files by go package
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
2025-11-18 13:28:18 -05:00

487 lines
15 KiB
Go

// this file handles YAML file reading and writing with comment preservation, using gopkg.in/yaml.v3's node tree to maintain all existing comments during regeneration.
package main
import (
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
"gopkg.in/yaml.v3"
"github.com/anchore/syft/internal/capabilities"
)
const autoGeneratedComment = "AUTO-GENERATED"
// loadCapabilities loads the capabilities document from a YAML file.
// Returns both the parsed document and the original YAML node tree to preserve comments.
// Exported for use by the generator in generate/main.go
func loadCapabilities(capabilitiesDir string) (*capabilities.Document, map[string]*yaml.Node, error) {
packagesDir := filepath.Join(capabilitiesDir, "packages")
// load all packages/*.yaml files
files, err := filepath.Glob(filepath.Join(packagesDir, "*.yaml"))
if err != nil {
return nil, nil, fmt.Errorf("failed to glob packages directory: %w", err)
}
mergedDoc := &capabilities.Document{
Configs: make(map[string]capabilities.CatalogerConfigEntry),
Catalogers: []capabilities.CatalogerEntry{},
}
nodeMap := make(map[string]*yaml.Node)
// load each package file
for _, file := range files {
data, err := os.ReadFile(file)
if err != nil {
return nil, nil, fmt.Errorf("failed to read %s: %w", file, err)
}
// parse into node tree to preserve comments
var rootNode yaml.Node
if err := yaml.Unmarshal(data, &rootNode); err != nil {
return nil, nil, fmt.Errorf("failed to parse %s into node tree: %w", file, err)
}
// parse into struct
var doc struct {
Configs map[string]capabilities.CatalogerConfigEntry `yaml:"configs"`
Catalogers []capabilities.CatalogerEntry `yaml:"catalogers"`
}
if err := yaml.Unmarshal(data, &doc); err != nil {
return nil, nil, fmt.Errorf("failed to parse %s into struct: %w", file, err)
}
// merge configs
for k, v := range doc.Configs {
mergedDoc.Configs[k] = v
}
// merge catalogers
mergedDoc.Catalogers = append(mergedDoc.Catalogers, doc.Catalogers...)
// store node tree by filename (basename without .yaml)
ecosystem := strings.TrimSuffix(filepath.Base(file), ".yaml")
nodeMap[ecosystem] = &rootNode
}
// load appconfig.yaml separately
appconfigPath := filepath.Join(capabilitiesDir, "appconfig.yaml")
if _, err := os.Stat(appconfigPath); err == nil {
data, err := os.ReadFile(appconfigPath)
if err != nil {
return nil, nil, fmt.Errorf("failed to read appconfig.yaml: %w", err)
}
var appDoc struct {
Application []capabilities.ApplicationConfigField `yaml:"application"`
}
if err := yaml.Unmarshal(data, &appDoc); err != nil {
return nil, nil, fmt.Errorf("failed to parse appconfig.yaml: %w", err)
}
mergedDoc.ApplicationConfig = appDoc.Application
// load node tree for appconfig
var appNode yaml.Node
if err := yaml.Unmarshal(data, &appNode); err != nil {
return nil, nil, fmt.Errorf("failed to parse appconfig.yaml into node tree: %w", err)
}
nodeMap["appconfig"] = &appNode
}
return mergedDoc, nodeMap, nil
}
// writeYAMLToFile writes a YAML node to a file with proper encoding
func writeYAMLToFile(path string, rootNode *yaml.Node) error {
f, err := os.Create(path)
if err != nil {
return fmt.Errorf("failed to create file: %w", err)
}
defer f.Close()
encoder := yaml.NewEncoder(f)
encoder.SetIndent(2)
if err := encoder.Encode(rootNode); err != nil {
return fmt.Errorf("failed to write YAML: %w", err)
}
if err := encoder.Close(); err != nil {
return fmt.Errorf("failed to close encoder: %w", err)
}
return nil
}
// addFieldComments adds comments to top-level fields in the YAML node
func addFieldComments(rootNode *yaml.Node) {
// navigate to the mapping node (handle both DocumentNode and MappingNode)
var mappingNode *yaml.Node
if rootNode.Kind == yaml.DocumentNode && len(rootNode.Content) > 0 {
mappingNode = rootNode.Content[0]
} else {
mappingNode = rootNode
}
if mappingNode != nil && len(mappingNode.Content) > 0 {
// iterate through top-level keys
for i := 0; i < len(mappingNode.Content); i += 2 {
keyNode := mappingNode.Content[i]
valueNode := mappingNode.Content[i+1]
switch keyNode.Value {
case "configs":
// configs section is AUTO-GENERATED
if keyNode.LineComment == "" {
keyNode.LineComment = "AUTO-GENERATED - config structs and their fields"
}
case "application":
// application section is AUTO-GENERATED
if keyNode.LineComment == "" {
keyNode.LineComment = "AUTO-GENERATED - application-level config keys"
}
case "catalogers":
// clear any HeadComment on catalogers key and value nodes to prevent duplicate headers
// (the only header should be on the root DocumentNode)
keyNode.HeadComment = ""
valueNode.HeadComment = ""
addCatalogerComments(valueNode)
}
}
}
}
// SaveCapabilities saves the capabilities document to distributed YAML files with comments.
// Groups catalogers by ecosystem and writes each to packages/ECOSYSTEM.yaml.
// Also saves appconfig.yaml separately.
func saveCapabilities(capabilitiesDir string, doc *capabilities.Document, existingNodes map[string]*yaml.Node) error {
packagesDir := filepath.Join(capabilitiesDir, "packages")
catalogersByEcosystem, configsByEcosystem := groupCatalogersByEcosystem(doc)
// write each ecosystem file
for ecosystem, catalogers := range catalogersByEcosystem {
if err := writeEcosystemFile(packagesDir, ecosystem, catalogers, configsByEcosystem[ecosystem], existingNodes); err != nil {
return err
}
}
// save appconfig.yaml
if len(doc.ApplicationConfig) > 0 {
if err := writeAppconfigFile(capabilitiesDir, doc.ApplicationConfig, existingNodes); err != nil {
return err
}
}
return nil
}
// groupCatalogersByEcosystem groups catalogers and their configs by ecosystem
func groupCatalogersByEcosystem(doc *capabilities.Document) (map[string][]capabilities.CatalogerEntry, map[string]map[string]capabilities.CatalogerConfigEntry) {
catalogersByEcosystem := make(map[string][]capabilities.CatalogerEntry)
configsByEcosystem := make(map[string]map[string]capabilities.CatalogerConfigEntry)
for _, cat := range doc.Catalogers {
ecosystem := mapCatalogerToEcosystem(cat)
catalogersByEcosystem[ecosystem] = append(catalogersByEcosystem[ecosystem], cat)
// also group configs for this ecosystem
if cat.Config != "" {
if configEntry, exists := doc.Configs[cat.Config]; exists {
if configsByEcosystem[ecosystem] == nil {
configsByEcosystem[ecosystem] = make(map[string]capabilities.CatalogerConfigEntry)
}
configsByEcosystem[ecosystem][cat.Config] = configEntry
}
}
}
return catalogersByEcosystem, configsByEcosystem
}
// writeEcosystemFile writes a single ecosystem's catalogers and configs to a YAML file
func writeEcosystemFile(packagesDir, ecosystem string, catalogers []capabilities.CatalogerEntry, configs map[string]capabilities.CatalogerConfigEntry, existingNodes map[string]*yaml.Node) error {
ecosystemDoc := struct {
Configs map[string]capabilities.CatalogerConfigEntry `yaml:"configs,omitempty"`
Catalogers []capabilities.CatalogerEntry `yaml:"catalogers"`
}{
Configs: configs,
Catalogers: catalogers,
}
var rootNode yaml.Node
existingNode, hasExisting := existingNodes[ecosystem]
if hasExisting && existingNode != nil {
// update existing node tree
rootNode = *existingNode
rootNode.HeadComment = "# Cataloger capabilities. See ../README.md for documentation.\n"
if err := updateNodeTreeEcosystem(&rootNode, &ecosystemDoc); err != nil {
return fmt.Errorf("failed to update node tree for %s: %w", ecosystem, err)
}
} else {
// create new node tree
if err := rootNode.Encode(&ecosystemDoc); err != nil {
return fmt.Errorf("failed to encode %s: %w", ecosystem, err)
}
rootNode.HeadComment = "# Cataloger capabilities. See ../README.md for documentation.\n"
}
// add field comments
addFieldComments(&rootNode)
// write file
ecosystemPath := filepath.Join(packagesDir, ecosystem+".yaml")
if err := writeYAMLToFile(ecosystemPath, &rootNode); err != nil {
return fmt.Errorf("failed to write %s: %w", ecosystem, err)
}
return nil
}
// writeAppconfigFile writes the application config to appconfig.yaml
func writeAppconfigFile(capabilitiesDir string, appConfig []capabilities.ApplicationConfigField, existingNodes map[string]*yaml.Node) error {
appconfigDoc := struct {
Application []capabilities.ApplicationConfigField `yaml:"application"`
}{
Application: appConfig,
}
var appNode yaml.Node
existingAppNode, hasExisting := existingNodes["appconfig"]
if hasExisting && existingAppNode != nil {
appNode = *existingAppNode
if err := updateNodeTreeAppConfig(&appNode, &appconfigDoc); err != nil {
return fmt.Errorf("failed to update appconfig node tree: %w", err)
}
} else {
if err := appNode.Encode(&appconfigDoc); err != nil {
return fmt.Errorf("failed to encode appconfig: %w", err)
}
appNode.HeadComment = "# Application-level configuration. See README.md for documentation.\n# This file is partially auto-generated. Run 'go generate ./internal/capabilities' to regenerate.\n"
}
addFieldComments(&appNode)
appconfigPath := filepath.Join(capabilitiesDir, "appconfig.yaml")
if err := writeYAMLToFile(appconfigPath, &appNode); err != nil {
return fmt.Errorf("failed to write appconfig: %w", err)
}
return nil
}
// mapCatalogerToEcosystem determines which ecosystem file a cataloger belongs to
func mapCatalogerToEcosystem(cat capabilities.CatalogerEntry) string {
// first try using the source file path
if cat.Source.File != "" {
// extract directory from path like "syft/pkg/cataloger/golang/cataloger.go"
re := regexp.MustCompile(`syft/pkg/cataloger/([^/]+)/`)
if matches := re.FindStringSubmatch(cat.Source.File); len(matches) >= 2 {
return matches[1]
}
}
// fallback to inferring from cataloger name (from merge.go)
return inferEcosystem(cat.Name)
}
// updateNodeTreeEcosystem updates an existing ecosystem YAML node tree
func updateNodeTreeEcosystem(rootNode *yaml.Node, doc interface{}) error {
var newNode yaml.Node
if err := newNode.Encode(doc); err != nil {
return err
}
var existingMapping *yaml.Node
if rootNode.Kind == yaml.DocumentNode && len(rootNode.Content) > 0 {
existingMapping = rootNode.Content[0]
} else {
existingMapping = rootNode
}
var newMapping *yaml.Node
if newNode.Kind == yaml.DocumentNode && len(newNode.Content) > 0 {
newMapping = newNode.Content[0]
} else {
newMapping = &newNode
}
if existingMapping.Kind == yaml.MappingNode && newMapping.Kind == yaml.MappingNode {
existingMapping.Content = newMapping.Content
}
return nil
}
// updateNodeTreeAppConfig updates appconfig YAML node tree
func updateNodeTreeAppConfig(rootNode *yaml.Node, doc interface{}) error {
return updateNodeTreeEcosystem(rootNode, doc)
}
// addCatalogerFieldComment adds appropriate comment to a single cataloger field
func addCatalogerFieldComment(keyNode, valueNode *yaml.Node, catalogerName string) {
switch keyNode.Value {
case "ecosystem":
// ecosystem is MANUAL
if keyNode.LineComment == "" {
keyNode.LineComment = "MANUAL"
}
case "name", "type":
// add AUTO-GENERATED comment to these fields
if keyNode.LineComment == "" {
keyNode.LineComment = autoGeneratedComment
}
case "source":
// add AUTO-GENERATED comment to source field
if keyNode.LineComment == "" {
keyNode.LineComment = autoGeneratedComment
}
case "config":
// add AUTO-GENERATED comment to config field
if keyNode.LineComment == "" {
keyNode.LineComment = autoGeneratedComment
}
case "selectors":
// selectors are AUTO-GENERATED
if keyNode.LineComment == "" {
keyNode.LineComment = autoGeneratedComment
}
case "parsers":
// parsers is AUTO-GENERATED structure
if keyNode.LineComment == "" {
keyNode.LineComment = "AUTO-GENERATED structure"
}
addParserComments(valueNode)
case "detectors":
// detectors are AUTO-GENERATED for binary-classifier-cataloger, MANUAL for others
if catalogerName == "binary-classifier-cataloger" {
keyNode.LineComment = autoGeneratedComment
} else if keyNode.LineComment == "" {
keyNode.LineComment = "MANUAL - edit detectors here"
}
case "metadata_types":
// cataloger-level metadata_types (for custom catalogers) are AUTO-GENERATED
if keyNode.LineComment == "" {
keyNode.LineComment = autoGeneratedComment
}
case "package_types":
// cataloger-level package_types (for custom catalogers) are AUTO-GENERATED
if keyNode.LineComment == "" {
keyNode.LineComment = autoGeneratedComment
}
case "json_schema_types":
// json_schema_types are AUTO-GENERATED
if keyNode.LineComment == "" {
keyNode.LineComment = autoGeneratedComment
}
case "capabilities":
// capabilities are MANUAL
if keyNode.LineComment == "" {
keyNode.LineComment = "MANUAL - edit capabilities here"
}
}
}
// findFieldValue finds a field in a YAML mapping node and returns its value
func findFieldValue(node *yaml.Node, fieldName string) string {
if node.Kind != yaml.MappingNode {
return ""
}
for i := 0; i < len(node.Content); i += 2 {
keyNode := node.Content[i]
valueNode := node.Content[i+1]
if keyNode.Value == fieldName {
return valueNode.Value
}
}
return ""
}
func addCatalogerComments(catalogersNode *yaml.Node) {
// catalogersNode should be a sequence of cataloger entries
if catalogersNode.Kind != yaml.SequenceNode {
return
}
for _, catalogerNode := range catalogersNode.Content {
if catalogerNode.Kind != yaml.MappingNode {
continue
}
// get the cataloger name for special handling
catalogerName := findFieldValue(catalogerNode, "name")
// iterate through the fields of this cataloger entry
for i := 0; i < len(catalogerNode.Content); i += 2 {
keyNode := catalogerNode.Content[i]
valueNode := catalogerNode.Content[i+1]
addCatalogerFieldComment(keyNode, valueNode, catalogerName)
}
}
}
func addParserComments(parsersNode *yaml.Node) {
// parsersNode should be a sequence of parser entries
if parsersNode.Kind != yaml.SequenceNode {
return
}
for _, parserNode := range parsersNode.Content {
if parserNode.Kind != yaml.MappingNode {
continue
}
// iterate through the fields of this parser entry
for i := 0; i < len(parserNode.Content); i += 2 {
keyNode := parserNode.Content[i]
valueNode := parserNode.Content[i+1]
switch keyNode.Value {
case "parser_function", "metadata_types", "package_types", "json_schema_types":
// add AUTO-GENERATED comment to these fields
if keyNode.LineComment == "" {
keyNode.LineComment = autoGeneratedComment
}
case "detector":
// detector is AUTO-GENERATED
if keyNode.LineComment == "" {
keyNode.LineComment = autoGeneratedComment
}
addDetectorComments(valueNode)
case "capabilities":
// capabilities are MANUAL
if keyNode.LineComment == "" {
keyNode.LineComment = "MANUAL - preserved across regeneration"
}
}
}
}
}
func addDetectorComments(detectorNode *yaml.Node) {
// detectorNode should be a mapping node with method and criteria fields
if detectorNode.Kind != yaml.MappingNode {
return
}
// iterate through the fields of the detector
for i := 0; i < len(detectorNode.Content); i += 2 {
keyNode := detectorNode.Content[i]
switch keyNode.Value {
case "method", "criteria":
// add AUTO-GENERATED comment to these fields
if keyNode.LineComment == "" {
keyNode.LineComment = autoGeneratedComment
}
}
}
}