mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 16:33:21 +01:00
841 lines
30 KiB
Go
841 lines
30 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
|
|
"github.com/scylladb/go-set/strset"
|
|
|
|
"github.com/anchore/syft/internal/capabilities"
|
|
"github.com/anchore/syft/internal/packagemetadata"
|
|
"github.com/anchore/syft/syft/pkg/cataloger/binary"
|
|
)
|
|
|
|
const genericCatalogerType = "generic"
|
|
|
|
// stripPURLVersion removes the @version suffix from a PURL string
|
|
// e.g., "pkg:generic/python@version" -> "pkg:generic/python"
|
|
func stripPURLVersion(purl string) string {
|
|
if idx := strings.LastIndex(purl, "@"); idx != -1 {
|
|
return purl[:idx]
|
|
}
|
|
return purl
|
|
}
|
|
|
|
// catalogerTypeOverrides specifies catalogers that should have their type manually controlled
|
|
// rather than determined from the discovered cataloger structure.
|
|
// This is useful when a cataloger is discovered as "generic" but should be treated as "custom"
|
|
// in the YAML (or vice versa).
|
|
var catalogerTypeOverrides = map[string]string{
|
|
// the java cataloger is technically generic (it has parsers), but we want it to be treated as custom since
|
|
// these nuances can't automatically be detected, and it requires manual source info
|
|
"java-archive-cataloger": "custom",
|
|
}
|
|
|
|
// catalogerConfigExceptions specifies catalogers that should NOT have config fields
|
|
// auto-generated even if a config mapping is discovered via AST parsing.
|
|
// This is useful when a cataloger uses a config struct internally but it shouldn't
|
|
// be exposed in the capabilities document.
|
|
var catalogerConfigExceptions = strset.New(
|
|
"binary-classifier-cataloger",
|
|
)
|
|
|
|
// catalogerConfigOverrides specifies manual mappings from cataloger names to config struct names.
|
|
// Use this when the AST parser cannot automatically discover the config linkage, or when you want
|
|
// to explicitly override the discovered mapping.
|
|
// Format: cataloger-name -> "package.ConfigStructName"
|
|
var catalogerConfigOverrides = map[string]string{
|
|
"dotnet-portable-executable-cataloger": "dotnet.CatalogerConfig",
|
|
"nix-store-cataloger": "nix.Config",
|
|
}
|
|
|
|
// Statistics contains information about the regeneration process
|
|
type Statistics struct {
|
|
TotalGenericCatalogers int
|
|
TotalCustomCatalogers int
|
|
TotalParserFunctions int
|
|
NewCatalogers []string
|
|
NewParserFunctions []string
|
|
UpdatedCatalogers []string
|
|
}
|
|
|
|
// RegenerateCapabilities updates the YAML file with discovered catalogers
|
|
// while preserving manually-edited capability information.
|
|
// This is exported for use by the generator in generate/main.go
|
|
func RegenerateCapabilities(yamlPath string, repoRoot string) (*Statistics, error) {
|
|
stats := &Statistics{}
|
|
|
|
// 1. Discover generic catalogers from code
|
|
fmt.Print(" → Scanning source code for generic catalogers...")
|
|
discovered, err := discoverGenericCatalogers(repoRoot)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to discover catalogers: %w", err)
|
|
}
|
|
stats.TotalGenericCatalogers = len(discovered)
|
|
fmt.Printf(" found %d\n", stats.TotalGenericCatalogers)
|
|
|
|
// 1a. Discover metadata types and package types from test-generated JSON files
|
|
fmt.Print(" → Searching for metadata type and package type information...")
|
|
customCatalogerMetadata, customCatalogerPackageTypes, err := discoverMetadataTypes(repoRoot, discovered)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to discover metadata types: %w", err)
|
|
}
|
|
fmt.Println(" done")
|
|
|
|
// 1b. Extract binary classifiers
|
|
fmt.Print(" → Extracting binary classifiers...")
|
|
binaryClassifiers := extractBinaryClassifiers()
|
|
fmt.Printf(" found %d classifiers\n", len(binaryClassifiers))
|
|
|
|
// Count parser functions
|
|
for _, disc := range discovered {
|
|
stats.TotalParserFunctions += len(disc.Parsers)
|
|
}
|
|
|
|
// 2. Get all package cataloger info (names and selectors)
|
|
fmt.Print(" → Fetching all cataloger info from syft...")
|
|
allCatalogers, err := allPackageCatalogerInfo()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get cataloger info: %w", err)
|
|
}
|
|
fmt.Printf(" found %d total\n", len(allCatalogers))
|
|
|
|
// 3. Load existing YAML (if exists) - now returns both document and node tree
|
|
fmt.Print(" → Loading existing packages.yaml...")
|
|
existing, existingNode, err := loadCapabilities(yamlPath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to load existing capabilities: %w", err)
|
|
}
|
|
fmt.Printf(" loaded %d entries\n", len(existing.Catalogers))
|
|
|
|
// 3a. Discover cataloger config structs
|
|
fmt.Print(" → Discovering cataloger config structs...")
|
|
configInfoMap, err := DiscoverConfigs(repoRoot)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to discover configs: %w", err)
|
|
}
|
|
fmt.Printf(" found %d\n", len(configInfoMap))
|
|
|
|
// 3a-1. Get whitelist of allowed config structs from pkgcataloging.Config
|
|
fmt.Print(" → Filtering configs by pkgcataloging.Config whitelist...")
|
|
allowedConfigs, err := DiscoverAllowedConfigStructs(repoRoot)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to discover allowed config structs: %w", err)
|
|
}
|
|
|
|
// filter discovered configs to only include allowed ones
|
|
filteredConfigInfoMap := make(map[string]ConfigInfo)
|
|
for key, configInfo := range configInfoMap {
|
|
if allowedConfigs[key] {
|
|
filteredConfigInfoMap[key] = configInfo
|
|
}
|
|
}
|
|
fmt.Printf(" %d allowed (filtered %d)\n", len(filteredConfigInfoMap), len(configInfoMap)-len(filteredConfigInfoMap))
|
|
|
|
// convert ConfigInfo to CatalogerConfigEntry format for packages.yaml
|
|
discoveredConfigs := make(map[string]capabilities.CatalogerConfigEntry)
|
|
for key, configInfo := range filteredConfigInfoMap {
|
|
fields := make([]capabilities.CatalogerConfigFieldEntry, len(configInfo.Fields))
|
|
for i, field := range configInfo.Fields {
|
|
fields[i] = capabilities.CatalogerConfigFieldEntry{
|
|
Key: field.Name,
|
|
Description: field.Description,
|
|
AppKey: field.AppKey,
|
|
}
|
|
}
|
|
discoveredConfigs[key] = capabilities.CatalogerConfigEntry{
|
|
Fields: fields,
|
|
}
|
|
}
|
|
|
|
// 3b. Discover app-level configs
|
|
fmt.Print(" → Discovering app-level config fields...")
|
|
appConfigFields, err := DiscoverAppConfigs(repoRoot)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to discover app configs: %w", err)
|
|
}
|
|
fmt.Printf(" found %d\n", len(appConfigFields))
|
|
|
|
// convert to ApplicationConfigField format
|
|
discoveredAppConfigs := make([]capabilities.ApplicationConfigField, len(appConfigFields))
|
|
for i, field := range appConfigFields {
|
|
discoveredAppConfigs[i] = capabilities.ApplicationConfigField{
|
|
Key: field.Key,
|
|
Description: field.Description,
|
|
DefaultValue: field.DefaultValue,
|
|
}
|
|
}
|
|
|
|
// 3c. Link catalogers to their configs
|
|
fmt.Print(" → Linking catalogers to config structs...")
|
|
catalogerConfigMappings, err := LinkCatalogersToConfigs(repoRoot)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to link catalogers to configs: %w", err)
|
|
}
|
|
fmt.Printf(" found %d mappings\n", len(catalogerConfigMappings))
|
|
|
|
// 3c-1. Filter cataloger config mappings by exceptions
|
|
// Remove any catalogers that should not have config fields
|
|
fmt.Print(" → Filtering cataloger config mappings by exceptions...")
|
|
filteredCatalogerConfigMappings := make(map[string]string)
|
|
filteredCount := 0
|
|
for catalogerName, configName := range catalogerConfigMappings {
|
|
if catalogerConfigExceptions.Has(catalogerName) {
|
|
filteredCount++
|
|
continue
|
|
}
|
|
filteredCatalogerConfigMappings[catalogerName] = configName
|
|
}
|
|
if filteredCount > 0 {
|
|
fmt.Printf(" filtered %d\n", filteredCount)
|
|
} else {
|
|
fmt.Println(" none")
|
|
}
|
|
|
|
// 3c-2. Merge manual config overrides
|
|
// Manual overrides take precedence over discovered mappings
|
|
fmt.Print(" → Merging manual config overrides...")
|
|
overrideCount := 0
|
|
for catalogerName, configName := range catalogerConfigOverrides {
|
|
if catalogerConfigExceptions.Has(catalogerName) {
|
|
// skip if this cataloger is in the exceptions list
|
|
continue
|
|
}
|
|
filteredCatalogerConfigMappings[catalogerName] = configName
|
|
overrideCount++
|
|
}
|
|
if overrideCount > 0 {
|
|
fmt.Printf(" added %d\n", overrideCount)
|
|
} else {
|
|
fmt.Println(" none")
|
|
}
|
|
|
|
// 4. Build updated catalogers list
|
|
fmt.Println(" → Merging discovered catalogers with existing entries...")
|
|
updated, orphans, mergeStats := mergeDiscoveredWithExisting(
|
|
discovered,
|
|
customCatalogerMetadata,
|
|
customCatalogerPackageTypes,
|
|
binaryClassifiers,
|
|
allCatalogers,
|
|
existing,
|
|
discoveredConfigs,
|
|
discoveredAppConfigs,
|
|
filteredCatalogerConfigMappings,
|
|
)
|
|
stats.NewCatalogers = mergeStats.NewCatalogers
|
|
stats.NewParserFunctions = mergeStats.NewParserFunctions
|
|
stats.UpdatedCatalogers = mergeStats.UpdatedCatalogers
|
|
stats.TotalCustomCatalogers = len(allCatalogers) - stats.TotalGenericCatalogers
|
|
|
|
// 5. Check for orphaned parsers (parser functions that were renamed/deleted)
|
|
if len(orphans) > 0 {
|
|
return nil, fmt.Errorf("orphaned parsers detected (parser functions renamed or deleted):\n%s\n\nPlease manually remove these from %s or restore the parser functions in the code",
|
|
formatOrphans(orphans), yamlPath)
|
|
}
|
|
|
|
// 6. Write back to YAML with comments, preserving existing node tree
|
|
fmt.Print(" → Writing updated packages.yaml...")
|
|
if err := saveCapabilities(yamlPath, updated, existingNode); err != nil {
|
|
return nil, fmt.Errorf("failed to save capabilities: %w", err)
|
|
}
|
|
fmt.Println(" done")
|
|
|
|
return stats, nil
|
|
}
|
|
|
|
type orphanInfo struct {
|
|
catalogerName string
|
|
parserFunction string
|
|
}
|
|
|
|
type mergeStatistics struct {
|
|
NewCatalogers []string
|
|
NewParserFunctions []string
|
|
UpdatedCatalogers []string
|
|
}
|
|
|
|
// CatalogerRegistry encapsulates cataloger lookup data and provides methods for querying cataloger information
|
|
type CatalogerRegistry struct {
|
|
discovered map[string]DiscoveredCataloger
|
|
all []capabilities.CatalogerInfo
|
|
infoByName map[string]*capabilities.CatalogerInfo
|
|
}
|
|
|
|
// NewCatalogerRegistry creates a new registry with the given discovered and all catalogers
|
|
func NewCatalogerRegistry(discovered map[string]DiscoveredCataloger, all []capabilities.CatalogerInfo) *CatalogerRegistry {
|
|
infoByName := make(map[string]*capabilities.CatalogerInfo)
|
|
for i := range all {
|
|
infoByName[all[i].Name] = &all[i]
|
|
}
|
|
|
|
return &CatalogerRegistry{
|
|
discovered: discovered,
|
|
all: all,
|
|
infoByName: infoByName,
|
|
}
|
|
}
|
|
|
|
// IsGeneric checks if a cataloger is a discovered generic cataloger and returns it if found
|
|
func (r *CatalogerRegistry) IsGeneric(name string) (DiscoveredCataloger, bool) {
|
|
disc, ok := r.discovered[name]
|
|
return disc, ok
|
|
}
|
|
|
|
// GetInfo returns the cataloger info for the given name, or nil if not found
|
|
func (r *CatalogerRegistry) GetInfo(name string) *capabilities.CatalogerInfo {
|
|
return r.infoByName[name]
|
|
}
|
|
|
|
// DiscoveredCatalogers returns all discovered generic catalogers
|
|
func (r *CatalogerRegistry) DiscoveredCatalogers() map[string]DiscoveredCataloger {
|
|
return r.discovered
|
|
}
|
|
|
|
// AllCatalogers returns all catalogers from the syft cataloger list
|
|
func (r *CatalogerRegistry) AllCatalogers() []capabilities.CatalogerInfo {
|
|
return r.all
|
|
}
|
|
|
|
// EnrichmentData encapsulates metadata enrichment information (metadata types, package types, binary classifiers)
|
|
type EnrichmentData struct {
|
|
metadata map[string][]string
|
|
packageTypes map[string][]string
|
|
binaryClassifiers []binary.Classifier
|
|
}
|
|
|
|
// NewEnrichmentData creates a new enrichment data container
|
|
func NewEnrichmentData(metadata, packageTypes map[string][]string, binaryClassifiers []binary.Classifier) *EnrichmentData {
|
|
return &EnrichmentData{
|
|
metadata: metadata,
|
|
packageTypes: packageTypes,
|
|
binaryClassifiers: binaryClassifiers,
|
|
}
|
|
}
|
|
|
|
// GetMetadataTypes returns the metadata types for the given cataloger name
|
|
func (e *EnrichmentData) GetMetadataTypes(catalogerName string) ([]string, bool) {
|
|
types, ok := e.metadata[catalogerName]
|
|
return types, ok
|
|
}
|
|
|
|
// GetPackageTypes returns the package types for the given cataloger name
|
|
func (e *EnrichmentData) GetPackageTypes(catalogerName string) ([]string, bool) {
|
|
types, ok := e.packageTypes[catalogerName]
|
|
return types, ok
|
|
}
|
|
|
|
// EnrichEntry enriches a cataloger entry with metadata types and package types if available
|
|
func (e *EnrichmentData) EnrichEntry(catalogerName string, entry *capabilities.CatalogerEntry) {
|
|
// update metadata types if available
|
|
if types, ok := e.GetMetadataTypes(catalogerName); ok {
|
|
entry.MetadataTypes = types
|
|
// generate JSON schema types from metadata types
|
|
entry.JSONSchemaTypes = convertToJSONSchemaTypesFromMetadata(types)
|
|
}
|
|
// update package types if available
|
|
if types, ok := e.GetPackageTypes(catalogerName); ok {
|
|
entry.PackageTypes = types
|
|
}
|
|
}
|
|
|
|
// convertToJSONSchemaTypesFromMetadata converts Go struct names to UpperCamelCase JSON schema names
|
|
func convertToJSONSchemaTypesFromMetadata(metadataTypes []string) []string {
|
|
if len(metadataTypes) == 0 {
|
|
return nil
|
|
}
|
|
|
|
result := make([]string, 0, len(metadataTypes))
|
|
for _, typeName := range metadataTypes {
|
|
jsonName := packagemetadata.JSONNameFromString(typeName)
|
|
if jsonName != "" {
|
|
camelCase := packagemetadata.ToUpperCamelCase(jsonName)
|
|
result = append(result, camelCase)
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// EnrichWithBinaryClassifier enriches an entry with binary classifier detectors if it's the binary-classifier-cataloger
|
|
func (e *EnrichmentData) EnrichWithBinaryClassifier(catalogerName string, entry *capabilities.CatalogerEntry) {
|
|
// special handling for binary-classifier-cataloger: auto-generate one detector per classifier
|
|
if catalogerName == "binary-classifier-cataloger" && len(e.binaryClassifiers) > 0 {
|
|
var detectors []capabilities.Detector
|
|
for _, classifier := range e.binaryClassifiers {
|
|
// convert CPEs to strings
|
|
cpeStrings := make([]string, len(classifier.CPEs))
|
|
for i, c := range classifier.CPEs {
|
|
cpeStrings[i] = c.Attributes.BindToFmtString()
|
|
}
|
|
|
|
// strip @version from PURL
|
|
purlStr := stripPURLVersion(classifier.PURL.String())
|
|
|
|
detectors = append(detectors, capabilities.Detector{
|
|
Method: "glob",
|
|
Criteria: []string{classifier.FileGlob},
|
|
Packages: []capabilities.DetectorPackageInfo{
|
|
{
|
|
Class: classifier.Class,
|
|
Name: classifier.Package,
|
|
PURL: purlStr,
|
|
CPEs: cpeStrings,
|
|
Type: "BinaryPkg",
|
|
},
|
|
},
|
|
})
|
|
}
|
|
entry.Detectors = detectors
|
|
}
|
|
}
|
|
|
|
// CatalogerMerger orchestrates the merging of discovered catalogers with existing capabilities
|
|
type CatalogerMerger struct {
|
|
registry *CatalogerRegistry
|
|
enrichment *EnrichmentData
|
|
existing *capabilities.Document
|
|
catalogerConfigMappings map[string]string // catalogerName -> config struct name
|
|
|
|
// internal merge state
|
|
updated *capabilities.Document
|
|
processedCatalogers map[string]bool
|
|
orphans []orphanInfo
|
|
stats *mergeStatistics
|
|
}
|
|
|
|
// NewCatalogerMerger creates a new merger with the given registry, enrichment data, and existing document
|
|
func NewCatalogerMerger(registry *CatalogerRegistry, enrichment *EnrichmentData, existing *capabilities.Document, catalogerConfigMappings map[string]string) *CatalogerMerger {
|
|
return &CatalogerMerger{
|
|
registry: registry,
|
|
enrichment: enrichment,
|
|
existing: existing,
|
|
catalogerConfigMappings: catalogerConfigMappings,
|
|
updated: &capabilities.Document{},
|
|
processedCatalogers: make(map[string]bool),
|
|
stats: &mergeStatistics{},
|
|
}
|
|
}
|
|
|
|
// Merge performs the merge operation and returns the updated document, orphans, and statistics.
|
|
// Note: Configs and ApplicationConfig must be set on the merger's updated document before or after calling Merge.
|
|
// They are AUTO-GENERATED sections that are completely replaced during regeneration.
|
|
func (m *CatalogerMerger) Merge() (*capabilities.Document, []orphanInfo, *mergeStatistics) {
|
|
// process catalogers
|
|
m.processExistingCatalogers()
|
|
m.addNewGenericCatalogers()
|
|
m.addNewCustomCatalogers()
|
|
return m.updated, m.orphans, m.stats
|
|
}
|
|
|
|
// processExistingCatalogers processes all existing catalogers in their original order
|
|
func (m *CatalogerMerger) processExistingCatalogers() {
|
|
for i := range m.existing.Catalogers {
|
|
existingEntry := &m.existing.Catalogers[i]
|
|
catalogerName := existingEntry.Name
|
|
|
|
disc, isGeneric := m.registry.IsGeneric(catalogerName)
|
|
info := m.registry.GetInfo(catalogerName)
|
|
|
|
switch {
|
|
case isGeneric:
|
|
// existing generic cataloger - update auto-gen fields, preserve manual, check for orphans
|
|
m.processGenericCataloger(existingEntry, disc, info)
|
|
|
|
case info != nil:
|
|
// existing custom cataloger - preserve but update ecosystem, selectors, metadata types, and package types
|
|
m.processCustomCataloger(existingEntry, info)
|
|
|
|
default:
|
|
// cataloger no longer exists in syft - keep it as-is (user may have added manually)
|
|
m.updated.Catalogers = append(m.updated.Catalogers, *existingEntry)
|
|
}
|
|
|
|
m.processedCatalogers[catalogerName] = true
|
|
}
|
|
}
|
|
|
|
// addNewGenericCatalogers appends any new generic catalogers that weren't in existing
|
|
func (m *CatalogerMerger) addNewGenericCatalogers() {
|
|
for catalogerName, disc := range m.registry.DiscoveredCatalogers() {
|
|
if m.processedCatalogers[catalogerName] {
|
|
continue
|
|
}
|
|
|
|
info := m.registry.GetInfo(catalogerName)
|
|
// new generic cataloger - create with template
|
|
entry := createTemplateEntry(disc, info)
|
|
// update config field from discovered mappings
|
|
if configName, hasConfig := m.catalogerConfigMappings[catalogerName]; hasConfig {
|
|
entry.Config = configName
|
|
}
|
|
m.updated.Catalogers = append(m.updated.Catalogers, entry)
|
|
m.stats.NewCatalogers = append(m.stats.NewCatalogers, catalogerName)
|
|
for _, parser := range disc.Parsers {
|
|
m.stats.NewParserFunctions = append(m.stats.NewParserFunctions, fmt.Sprintf("%s/%s", catalogerName, parser.ParserFunction))
|
|
}
|
|
|
|
m.processedCatalogers[catalogerName] = true
|
|
}
|
|
}
|
|
|
|
// addNewCustomCatalogers appends any new custom catalogers from syft cataloger list
|
|
func (m *CatalogerMerger) addNewCustomCatalogers() {
|
|
for _, catalogerInfo := range m.registry.AllCatalogers() {
|
|
catalogerName := catalogerInfo.Name
|
|
if m.processedCatalogers[catalogerName] {
|
|
continue
|
|
}
|
|
|
|
// new custom cataloger - create template entry
|
|
entry := capabilities.CatalogerEntry{
|
|
Ecosystem: inferEcosystem(catalogerName),
|
|
Name: catalogerName,
|
|
Type: "custom",
|
|
Source: capabilities.Source{
|
|
File: "", // must be filled manually
|
|
Function: "", // must be filled manually
|
|
},
|
|
Selectors: catalogerInfo.Selectors,
|
|
Capabilities: capabilities.CapabilitySet{}, // empty array - must be filled manually
|
|
}
|
|
|
|
// update config field from discovered mappings
|
|
if configName, hasConfig := m.catalogerConfigMappings[catalogerName]; hasConfig {
|
|
entry.Config = configName
|
|
}
|
|
|
|
// enrich with metadata and package types
|
|
m.enrichment.EnrichEntry(catalogerName, &entry)
|
|
|
|
// fallback: if we have metadata_types but no json_schema_types, convert them
|
|
// this handles cases where metadata_types exist in YAML but no enrichment data
|
|
if len(entry.MetadataTypes) > 0 && len(entry.JSONSchemaTypes) == 0 {
|
|
entry.JSONSchemaTypes = convertToJSONSchemaTypesFromMetadata(entry.MetadataTypes)
|
|
}
|
|
|
|
// enrich with binary classifier globs
|
|
m.enrichment.EnrichWithBinaryClassifier(catalogerName, &entry)
|
|
|
|
m.updated.Catalogers = append(m.updated.Catalogers, entry)
|
|
m.stats.NewCatalogers = append(m.stats.NewCatalogers, catalogerName)
|
|
|
|
m.processedCatalogers[catalogerName] = true
|
|
}
|
|
}
|
|
|
|
// processGenericCataloger processes an existing generic cataloger entry
|
|
func (m *CatalogerMerger) processGenericCataloger(existingEntry *capabilities.CatalogerEntry, disc DiscoveredCataloger, info *capabilities.CatalogerInfo) {
|
|
entry, catalogerOrphans, newParsers := updateEntry(existingEntry, disc, info, m.catalogerConfigMappings)
|
|
|
|
// fallback for catalogers with type override to custom but processed as generic
|
|
// these may have cataloger-level metadata_types that need json_schema_types
|
|
if len(entry.MetadataTypes) > 0 && len(entry.JSONSchemaTypes) == 0 {
|
|
entry.JSONSchemaTypes = convertToJSONSchemaTypesFromMetadata(entry.MetadataTypes)
|
|
}
|
|
|
|
m.updated.Catalogers = append(m.updated.Catalogers, entry)
|
|
m.orphans = append(m.orphans, catalogerOrphans...)
|
|
if len(newParsers) > 0 || len(catalogerOrphans) > 0 {
|
|
m.stats.UpdatedCatalogers = append(m.stats.UpdatedCatalogers, existingEntry.Name)
|
|
}
|
|
for _, parser := range newParsers {
|
|
m.stats.NewParserFunctions = append(m.stats.NewParserFunctions, fmt.Sprintf("%s/%s", existingEntry.Name, parser))
|
|
}
|
|
}
|
|
|
|
// processCustomCataloger processes an existing custom cataloger entry
|
|
func (m *CatalogerMerger) processCustomCataloger(existingEntry *capabilities.CatalogerEntry, info *capabilities.CatalogerInfo) {
|
|
entry := *existingEntry
|
|
entry.Ecosystem = inferEcosystem(existingEntry.Name)
|
|
entry.Selectors = info.Selectors
|
|
|
|
// update config field from discovered mappings (AUTO-GENERATED)
|
|
if configName, hasConfig := m.catalogerConfigMappings[existingEntry.Name]; hasConfig {
|
|
entry.Config = configName
|
|
}
|
|
|
|
// enrich with metadata and package types
|
|
m.enrichment.EnrichEntry(existingEntry.Name, &entry)
|
|
|
|
// fallback: if we have metadata_types but no json_schema_types, convert them
|
|
// this handles cases where metadata_types exist in YAML but no enrichment data
|
|
if len(entry.MetadataTypes) > 0 && len(entry.JSONSchemaTypes) == 0 {
|
|
entry.JSONSchemaTypes = convertToJSONSchemaTypesFromMetadata(entry.MetadataTypes)
|
|
}
|
|
|
|
// enrich with binary classifier globs
|
|
m.enrichment.EnrichWithBinaryClassifier(existingEntry.Name, &entry)
|
|
|
|
m.updated.Catalogers = append(m.updated.Catalogers, entry)
|
|
}
|
|
|
|
// mergeDiscoveredWithExisting combines discovered cataloger information with existing capabilities,
|
|
// preserving manual sections (capabilities) while updating AUTO-GENERATED sections.
|
|
//
|
|
// The configs and appConfigs parameters are AUTO-GENERATED sections that completely replace
|
|
// any existing configs/app-config data in the packages.yaml file.
|
|
//
|
|
// The catalogerConfigMappings parameter maps cataloger names to their config struct names
|
|
// (e.g., "go-module-binary-cataloger" -> "golang.CatalogerConfig").
|
|
func mergeDiscoveredWithExisting(
|
|
discovered map[string]DiscoveredCataloger,
|
|
customMetadata map[string][]string,
|
|
customPackageTypes map[string][]string,
|
|
binaryClassifiers []binary.Classifier,
|
|
allCatalogers []capabilities.CatalogerInfo,
|
|
existing *capabilities.Document,
|
|
configs map[string]capabilities.CatalogerConfigEntry,
|
|
appConfigs []capabilities.ApplicationConfigField,
|
|
catalogerConfigMappings map[string]string,
|
|
) (*capabilities.Document, []orphanInfo, *mergeStatistics) {
|
|
registry := NewCatalogerRegistry(discovered, allCatalogers)
|
|
enrichment := NewEnrichmentData(customMetadata, customPackageTypes, binaryClassifiers)
|
|
merger := NewCatalogerMerger(registry, enrichment, existing, catalogerConfigMappings)
|
|
|
|
// set the AUTO-GENERATED config sections
|
|
// these completely replace any existing data (not merged)
|
|
merger.updated.Configs = configs
|
|
merger.updated.ApplicationConfig = appConfigs
|
|
|
|
return merger.Merge()
|
|
}
|
|
|
|
func updateEntry(existing *capabilities.CatalogerEntry, discovered DiscoveredCataloger, info *capabilities.CatalogerInfo, catalogerConfigMappings map[string]string) (capabilities.CatalogerEntry, []orphanInfo, []string) {
|
|
updated := *existing
|
|
|
|
// update AUTO-GENERATED fields
|
|
updated.Name = discovered.Name
|
|
|
|
// check if there's a type override for this cataloger
|
|
if overrideType, hasOverride := catalogerTypeOverrides[discovered.Name]; hasOverride {
|
|
updated.Type = overrideType
|
|
} else {
|
|
updated.Type = discovered.Type
|
|
}
|
|
|
|
updated.Source = capabilities.Source{
|
|
File: discovered.SourceFile,
|
|
Function: discovered.SourceFunction,
|
|
}
|
|
|
|
// update selectors from cataloger info
|
|
if info != nil {
|
|
updated.Selectors = info.Selectors
|
|
}
|
|
|
|
// update config field from discovered mappings (AUTO-GENERATED)
|
|
if configName, hasConfig := catalogerConfigMappings[discovered.Name]; hasConfig {
|
|
updated.Config = configName
|
|
} else {
|
|
// clear config if no mapping exists (it may have been removed)
|
|
updated.Config = ""
|
|
}
|
|
|
|
// always re-infer ecosystem (it's MANUAL so users can override if needed)
|
|
updated.Ecosystem = inferEcosystem(discovered.Name)
|
|
|
|
var orphans []orphanInfo
|
|
var newParsers []string
|
|
|
|
// update parsers only if the final type is generic (not overridden to custom)
|
|
// if a cataloger is overridden from generic to custom, we don't update parsers
|
|
if discovered.Type == genericCatalogerType && updated.Type == genericCatalogerType {
|
|
updatedParsers, parserOrphans, newParserFuncs := updateParsers(existing.Parsers, discovered.Parsers, discovered.Name)
|
|
updated.Parsers = updatedParsers
|
|
orphans = append(orphans, parserOrphans...)
|
|
newParsers = newParserFuncs
|
|
}
|
|
|
|
return updated, orphans, newParsers
|
|
}
|
|
|
|
func updateParsers(existingParsers []capabilities.Parser, discoveredParsers []DiscoveredParser, catalogerName string) ([]capabilities.Parser, []orphanInfo, []string) {
|
|
var updated []capabilities.Parser
|
|
var orphans []orphanInfo
|
|
var newParserFuncs []string
|
|
|
|
// create lookup for discovered parsers by parser function
|
|
discoveredByParserFunc := make(map[string]*DiscoveredParser)
|
|
for i := range discoveredParsers {
|
|
discoveredByParserFunc[discoveredParsers[i].ParserFunction] = &discoveredParsers[i]
|
|
}
|
|
|
|
// create lookup for existing parsers by parser function
|
|
existingByParserFunc := make(map[string]*capabilities.Parser)
|
|
for i := range existingParsers {
|
|
existingByParserFunc[existingParsers[i].ParserFunction] = &existingParsers[i]
|
|
}
|
|
|
|
// process all discovered parsers
|
|
for _, discParser := range discoveredParsers {
|
|
existingParser := existingByParserFunc[discParser.ParserFunction]
|
|
|
|
if existingParser == nil {
|
|
// new parser - create with empty capabilities
|
|
updated = append(updated, createTemplateParser(discParser))
|
|
newParserFuncs = append(newParserFuncs, discParser.ParserFunction)
|
|
} else {
|
|
// update auto-gen fields, preserve capabilities
|
|
p := *existingParser
|
|
p.Detector.Method = discParser.Method
|
|
p.Detector.Criteria = discParser.Criteria
|
|
|
|
// only update metadata/package types if discovered parser has them
|
|
// this preserves existing YAML values when no test observations exist
|
|
if len(discParser.MetadataTypes) > 0 {
|
|
p.MetadataTypes = discParser.MetadataTypes
|
|
p.JSONSchemaTypes = discParser.JSONSchemaTypes
|
|
} else if len(p.MetadataTypes) > 0 && len(p.JSONSchemaTypes) == 0 {
|
|
// fallback: if parser has metadata_types but no json_schema_types, convert them
|
|
p.JSONSchemaTypes = convertToJSONSchemaTypesFromMetadata(p.MetadataTypes)
|
|
}
|
|
|
|
if len(discParser.PackageTypes) > 0 {
|
|
p.PackageTypes = discParser.PackageTypes
|
|
}
|
|
|
|
// p.Capabilities is preserved from existing
|
|
updated = append(updated, p)
|
|
}
|
|
|
|
// mark this parser as processed
|
|
delete(existingByParserFunc, discParser.ParserFunction)
|
|
}
|
|
|
|
// any remaining existing parsers are orphans (parser function was renamed/deleted)
|
|
for parserFunc := range existingByParserFunc {
|
|
orphans = append(orphans, orphanInfo{
|
|
catalogerName: catalogerName,
|
|
parserFunction: parserFunc,
|
|
})
|
|
}
|
|
|
|
return updated, orphans, newParserFuncs
|
|
}
|
|
|
|
func createTemplateEntry(disc DiscoveredCataloger, info *capabilities.CatalogerInfo) capabilities.CatalogerEntry {
|
|
// determine type, checking for overrides first
|
|
catalogerType := disc.Type
|
|
if overrideType, hasOverride := catalogerTypeOverrides[disc.Name]; hasOverride {
|
|
catalogerType = overrideType
|
|
}
|
|
|
|
entry := capabilities.CatalogerEntry{
|
|
Ecosystem: inferEcosystem(disc.Name),
|
|
Name: disc.Name,
|
|
Type: catalogerType,
|
|
Source: capabilities.Source{
|
|
File: disc.SourceFile,
|
|
Function: disc.SourceFunction,
|
|
},
|
|
}
|
|
|
|
// add selectors from cataloger info
|
|
if info != nil {
|
|
entry.Selectors = info.Selectors
|
|
}
|
|
|
|
// use the determined catalogerType (which may be overridden) to structure the entry
|
|
switch catalogerType {
|
|
case genericCatalogerType:
|
|
for _, discParser := range disc.Parsers {
|
|
entry.Parsers = append(entry.Parsers, createTemplateParser(discParser))
|
|
}
|
|
case "custom":
|
|
// custom cataloger with empty capabilities (must be filled manually)
|
|
entry.Capabilities = capabilities.CapabilitySet{}
|
|
}
|
|
|
|
return entry
|
|
}
|
|
|
|
func createTemplateParser(disc DiscoveredParser) capabilities.Parser {
|
|
return capabilities.Parser{
|
|
ParserFunction: disc.ParserFunction,
|
|
Detector: capabilities.Detector{
|
|
Method: disc.Method,
|
|
Criteria: disc.Criteria,
|
|
},
|
|
MetadataTypes: disc.MetadataTypes,
|
|
PackageTypes: disc.PackageTypes,
|
|
JSONSchemaTypes: disc.JSONSchemaTypes,
|
|
Capabilities: capabilities.CapabilitySet{}, // empty array - must be filled manually
|
|
}
|
|
}
|
|
|
|
func formatOrphans(orphans []orphanInfo) string {
|
|
var lines []string
|
|
for _, o := range orphans {
|
|
lines = append(lines, fmt.Sprintf(" - cataloger: %s, parser function: %s", o.catalogerName, o.parserFunction))
|
|
}
|
|
return strings.Join(lines, "\n")
|
|
}
|
|
|
|
// ecosystemMapping maps patterns in cataloger names to ecosystem names.
|
|
// order matters - more specific patterns should come first.
|
|
type ecosystemMapping struct {
|
|
patterns []string // patterns to match in the cataloger name
|
|
ecosystem string // ecosystem to return if any pattern matches
|
|
}
|
|
|
|
// ecosystemMappings defines the pattern-to-ecosystem mappings.
|
|
// note: order matters - check more specific patterns first
|
|
var ecosystemMappings = []ecosystemMapping{
|
|
// language-based ecosystems
|
|
{[]string{"rust", "cargo"}, "rust"},
|
|
{[]string{"javascript", "node", "npm"}, "javascript"},
|
|
{[]string{"python"}, "python"},
|
|
{[]string{"java", "graalvm"}, "java"},
|
|
{[]string{"go-module", "golang"}, "go"},
|
|
{[]string{"ruby", "gem"}, "ruby"},
|
|
{[]string{"php", "composer", "pear", "pecl"}, "php"},
|
|
{[]string{"dotnet", ".net", "csharp"}, "dotnet"},
|
|
{[]string{"swift", "cocoapods"}, "swift"},
|
|
{[]string{"dart", "pubspec"}, "dart"},
|
|
{[]string{"elixir", "mix"}, "elixir"},
|
|
{[]string{"erlang", "rebar"}, "erlang"},
|
|
{[]string{"haskell", "cabal", "stack"}, "haskell"},
|
|
{[]string{"lua"}, "lua"},
|
|
{[]string{"ocaml", "opam"}, "ocaml"},
|
|
{[]string{"r-package"}, "r"},
|
|
{[]string{"swipl", "prolog"}, "prolog"},
|
|
{[]string{"cpp", "conan"}, "c++"},
|
|
{[]string{"kotlin"}, "kotlin"},
|
|
|
|
// os/distro-based ecosystems
|
|
{[]string{"apk", "alpine"}, "alpine"},
|
|
{[]string{"dpkg", "deb", "debian"}, "debian"},
|
|
{[]string{"rpm", "redhat"}, "rpm"},
|
|
{[]string{"alpm", "arch"}, "arch"},
|
|
{[]string{"portage", "gentoo"}, "gentoo"},
|
|
{[]string{"homebrew"}, "homebrew"},
|
|
{[]string{"snap"}, "snap"},
|
|
|
|
// other ecosystems
|
|
{[]string{"binary", "elf", "pe-binary"}, "binary"},
|
|
{[]string{"conda"}, "conda"},
|
|
{[]string{"nix"}, "nix"},
|
|
{[]string{"kernel"}, "linux"},
|
|
{[]string{"bitnami"}, "bitnami"},
|
|
{[]string{"terraform"}, "terraform"},
|
|
{[]string{"github"}, "github-actions"},
|
|
{[]string{"wordpress"}, "wordpress"},
|
|
{[]string{"sbom"}, "sbom"},
|
|
}
|
|
|
|
// inferEcosystem attempts to determine the ecosystem from a cataloger name
|
|
func inferEcosystem(catalogerName string) string {
|
|
name := strings.ToLower(catalogerName)
|
|
|
|
for _, mapping := range ecosystemMappings {
|
|
for _, pattern := range mapping.patterns {
|
|
if strings.Contains(name, pattern) {
|
|
return mapping.ecosystem
|
|
}
|
|
}
|
|
}
|
|
|
|
// default
|
|
return "other"
|
|
}
|