syft/internal/capabilities/generate/metadata_discovery.go
Alex Goodman 1510db7c4e add info command from generated capabilities
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
2025-10-13 17:14:40 -04:00

332 lines
11 KiB
Go

package main
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"sort"
"github.com/anchore/syft/internal/capabilities/pkgtestobservation"
)
// TestObservationIndex indexes all test observations for efficient lookup and application.
// parser-level observations are indexed by package name (from test file) + parser function,
// while cataloger-level observations are indexed by cataloger name.
type TestObservationIndex struct {
// parser-level observations: package -> parser function -> types
parsersByPackage map[string]map[string]*TypeObservation
// cataloger-level observations: cataloger name -> types
catalogers map[string]*TypeObservation
}
// TypeObservation combines metadata types and package types
type TypeObservation struct {
MetadataTypes []string
PackageTypes []string
}
// newTestObservationIndex creates a new empty index
func newTestObservationIndex() *TestObservationIndex {
return &TestObservationIndex{
parsersByPackage: make(map[string]map[string]*TypeObservation),
catalogers: make(map[string]*TypeObservation),
}
}
// getParserObservations retrieves parser-level observations by package name and parser function
func (idx *TestObservationIndex) getParserObservations(packageName, parserFunction string) *TypeObservation {
if parsers, ok := idx.parsersByPackage[packageName]; ok {
return parsers[parserFunction]
}
return nil
}
// getCatalogerObservations retrieves cataloger-level observations by cataloger name
func (idx *TestObservationIndex) getCatalogerObservations(catalogerName string) *TypeObservation {
return idx.catalogers[catalogerName]
}
// setParserObservations stores parser-level observations
func (idx *TestObservationIndex) setParserObservations(packageName, parserFunction string, obs *TypeObservation) {
if idx.parsersByPackage[packageName] == nil {
idx.parsersByPackage[packageName] = make(map[string]*TypeObservation)
}
idx.parsersByPackage[packageName][parserFunction] = obs
}
// setCatalogerObservations stores cataloger-level observations
func (idx *TestObservationIndex) setCatalogerObservations(catalogerName string, obs *TypeObservation) {
idx.catalogers[catalogerName] = obs
}
// extractCustomCatalogerData extracts cataloger-level metadata and package types for custom catalogers
func (idx *TestObservationIndex) extractCustomCatalogerData() (map[string][]string, map[string][]string) {
metadata := make(map[string][]string)
packageTypes := make(map[string][]string)
for catalogerName, obs := range idx.catalogers {
if len(obs.MetadataTypes) > 0 {
metadata[catalogerName] = obs.MetadataTypes
}
if len(obs.PackageTypes) > 0 {
packageTypes[catalogerName] = obs.PackageTypes
}
}
return metadata, packageTypes
}
// findTestFixtureDirs walks the cataloger directory tree and returns all test-fixtures directories
func findTestFixtureDirs(repoRoot string) ([]string, error) {
catalogerRoot := filepath.Join(repoRoot, "syft", "pkg", "cataloger")
var testFixtureDirs []string
err := filepath.Walk(catalogerRoot, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() && info.Name() == "test-fixtures" {
testFixtureDirs = append(testFixtureDirs, path)
}
return nil
})
if err != nil {
return nil, fmt.Errorf("failed to walk cataloger directory: %w", err)
}
return testFixtureDirs, nil
}
// discoverMetadataTypes searches for test-observations.json files and merges metadata type information
// into the discovered catalogers. Returns maps of custom cataloger metadata types and package types.
func discoverMetadataTypes(repoRoot string, discovered map[string]DiscoveredCataloger) (map[string][]string, map[string][]string, error) {
testFixtureDirs, err := findTestFixtureDirs(repoRoot)
if err != nil {
return nil, nil, err
}
// create index to aggregate all observations
index := newTestObservationIndex()
// read all test-observations files and merge into index
for _, dir := range testFixtureDirs {
observationsFile := filepath.Join(dir, "test-observations.json")
if observations, err := readTestObservations(observationsFile); err == nil {
mergeTestObservations(observations, index)
} else if !os.IsNotExist(err) {
fmt.Printf(" Warning: failed to read %s: %v\n", observationsFile, err)
}
}
// check if any observations were found
if len(index.parsersByPackage) == 0 && len(index.catalogers) == 0 {
// no metadata found, this is not an error
return nil, nil, nil
}
// apply observations to discovered catalogers
applyTypesToCatalogers(discovered, index)
// extract custom cataloger data for return
customMetadata, customPackageTypes := index.extractCustomCatalogerData()
return customMetadata, customPackageTypes, nil
}
// readTestObservations reads and parses a test-observations.json file
func readTestObservations(path string) (*pkgtestobservation.Test, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, err
}
var observations pkgtestobservation.Test
if err := json.Unmarshal(data, &observations); err != nil {
return nil, fmt.Errorf("failed to parse JSON: %w", err)
}
return &observations, nil
}
// mergeAndDeduplicateStrings merges two string slices, removes duplicates, and returns a sorted slice
func mergeAndDeduplicateStrings(existing, additional []string) []string {
set := make(map[string]bool)
for _, s := range existing {
set[s] = true
}
for _, s := range additional {
set[s] = true
}
result := make([]string, 0, len(set))
for s := range set {
result = append(result, s)
}
sort.Strings(result)
return result
}
// mergeTestObservations merges metadata and package type data from a test-observations.json file
// into the observation index
func mergeTestObservations(observations *pkgtestobservation.Test, index *TestObservationIndex) {
pkg := observations.Package
// merge parser-level observations
for parserName, parserObs := range observations.Parsers {
if len(parserObs.MetadataTypes) == 0 && len(parserObs.PackageTypes) == 0 {
continue
}
existing := index.getParserObservations(pkg, parserName)
if existing == nil {
existing = &TypeObservation{}
}
// merge the types
existing.MetadataTypes = mergeAndDeduplicateStrings(existing.MetadataTypes, parserObs.MetadataTypes)
existing.PackageTypes = mergeAndDeduplicateStrings(existing.PackageTypes, parserObs.PackageTypes)
index.setParserObservations(pkg, parserName, existing)
}
// merge cataloger-level observations
for catalogerName, catalogerObs := range observations.Catalogers {
if len(catalogerObs.MetadataTypes) == 0 && len(catalogerObs.PackageTypes) == 0 {
continue
}
existing := index.getCatalogerObservations(catalogerName)
if existing == nil {
existing = &TypeObservation{}
}
// merge the types
existing.MetadataTypes = mergeAndDeduplicateStrings(existing.MetadataTypes, catalogerObs.MetadataTypes)
existing.PackageTypes = mergeAndDeduplicateStrings(existing.PackageTypes, catalogerObs.PackageTypes)
index.setCatalogerObservations(catalogerName, existing)
}
}
// applyParserObservations applies parser-level observations to a cataloger's parsers
func applyParserObservations(cataloger *DiscoveredCataloger, index *TestObservationIndex) bool {
foundData := false
// apply parser-level observations by matching package name + parser function
for i, parser := range cataloger.Parsers {
if obs := index.getParserObservations(cataloger.PackageName, parser.ParserFunction); obs != nil {
if len(obs.MetadataTypes) > 0 {
cataloger.Parsers[i].MetadataTypes = obs.MetadataTypes
foundData = true
}
if len(obs.PackageTypes) > 0 {
cataloger.Parsers[i].PackageTypes = obs.PackageTypes
foundData = true
}
}
}
return foundData
}
// applySingleParserCatalogerObservations applies cataloger-level observations to a single-parser cataloger
// by merging them with any existing parser-level observations
func applySingleParserCatalogerObservations(cataloger *DiscoveredCataloger, catalogerObs *TypeObservation) bool {
foundData := false
if len(catalogerObs.MetadataTypes) > 0 {
cataloger.Parsers[0].MetadataTypes = mergeAndDeduplicateStrings(
cataloger.Parsers[0].MetadataTypes,
catalogerObs.MetadataTypes,
)
foundData = true
}
if len(catalogerObs.PackageTypes) > 0 {
cataloger.Parsers[0].PackageTypes = mergeAndDeduplicateStrings(
cataloger.Parsers[0].PackageTypes,
catalogerObs.PackageTypes,
)
foundData = true
}
return foundData
}
// applyMultiParserCatalogerObservations applies cataloger-level observations to a multi-parser cataloger
// only applies to parsers that don't already have parser-level data
func applyMultiParserCatalogerObservations(cataloger *DiscoveredCataloger, catalogerObs *TypeObservation) bool {
foundData := false
// count parsers without any data
parsersWithoutData := 0
for _, parser := range cataloger.Parsers {
if len(parser.MetadataTypes) == 0 && len(parser.PackageTypes) == 0 {
parsersWithoutData++
}
}
// if all parsers lack data, apply to all and warn
if parsersWithoutData == len(cataloger.Parsers) {
fmt.Printf(" Warning: cataloger %q has %d parsers but only cataloger-level observations. Consider adding parser-level tests for better granularity.\n",
cataloger.Name, len(cataloger.Parsers))
for i := range cataloger.Parsers {
if len(catalogerObs.MetadataTypes) > 0 {
cataloger.Parsers[i].MetadataTypes = catalogerObs.MetadataTypes
foundData = true
}
if len(catalogerObs.PackageTypes) > 0 {
cataloger.Parsers[i].PackageTypes = catalogerObs.PackageTypes
foundData = true
}
}
} else if parsersWithoutData > 0 {
// only apply to parsers without data
for i, parser := range cataloger.Parsers {
if len(parser.MetadataTypes) == 0 && len(catalogerObs.MetadataTypes) > 0 {
cataloger.Parsers[i].MetadataTypes = catalogerObs.MetadataTypes
foundData = true
}
if len(parser.PackageTypes) == 0 && len(catalogerObs.PackageTypes) > 0 {
cataloger.Parsers[i].PackageTypes = catalogerObs.PackageTypes
foundData = true
}
}
}
return foundData
}
// applyTypesToCatalogers applies the aggregated metadata and package type data to the discovered catalogers.
// it updates the cataloger's parser entries with the appropriate metadata and package types.
func applyTypesToCatalogers(discovered map[string]DiscoveredCataloger, index *TestObservationIndex) {
for catalogerName, cataloger := range discovered {
var foundData bool
// first, apply parser-level observations
if applyParserObservations(&cataloger, index) {
foundData = true
}
// then, apply cataloger-level observations if they exist
if catalogerObs := index.getCatalogerObservations(catalogerName); catalogerObs != nil && len(cataloger.Parsers) > 0 {
if len(cataloger.Parsers) == 1 {
// single parser: merge cataloger-level with parser-level observations
if applySingleParserCatalogerObservations(&cataloger, catalogerObs) {
foundData = true
}
} else {
// multiple parsers: only apply to parsers without parser-level data
if applyMultiParserCatalogerObservations(&cataloger, catalogerObs) {
foundData = true
}
}
}
if foundData {
discovered[catalogerName] = cataloger
}
}
}