From 63832e5e5a7609b324af8263d75c28b85df0b1dd Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 24 Oct 2025 09:21:07 -0400 Subject: [PATCH] expose json schema types Signed-off-by: Alex Goodman --- cmd/syft/internal/commands/cataloger_info.go | 46 +++--- .../generate/completeness_test.go | 94 +++++++++++ internal/capabilities/generate/discover.go | 11 +- internal/capabilities/generate/io.go | 7 +- internal/capabilities/generate/merge.go | 63 +++++++- .../generate/metadata_discovery.go | 31 +++- internal/capabilities/model.go | 34 ++-- internal/capabilities/packages.yaml | 152 +++++++++++++++++- internal/packagemetadata/names.go | 27 ++++ 9 files changed, 409 insertions(+), 56 deletions(-) diff --git a/cmd/syft/internal/commands/cataloger_info.go b/cmd/syft/internal/commands/cataloger_info.go index 1fd643aa9..7d1cfebb0 100644 --- a/cmd/syft/internal/commands/cataloger_info.go +++ b/cmd/syft/internal/commands/cataloger_info.go @@ -124,14 +124,14 @@ func renderCatalogerInfoJSON(doc *capabilities.Document, catalogers []capabiliti } type patternInfo struct { - Method string `json:"method"` - Criteria []string `json:"criteria"` - Conditions []capabilities.DetectorCondition `json:"conditions,omitempty"` - Packages []detectorPackageInfo `json:"packages,omitempty"` - Comment string `json:"comment,omitempty"` - MetadataTypes []string `json:"metadata_types,omitempty"` - PackageTypes []string `json:"package_types,omitempty"` - Capabilities capabilities.CapabilitySet `json:"capabilities,omitempty"` + Method string `json:"method"` + Criteria []string `json:"criteria"` + Conditions []capabilities.DetectorCondition `json:"conditions,omitempty"` + Packages []detectorPackageInfo `json:"packages,omitempty"` + Comment string `json:"comment,omitempty"` + PackageTypes []string `json:"package_types,omitempty"` + JSONSchemaTypes []string `json:"json_schema_types,omitempty"` + Capabilities capabilities.CapabilitySet `json:"capabilities,omitempty"` } type catalogerInfo struct { @@ -170,14 +170,14 @@ func renderCatalogerInfoJSON(doc *capabilities.Document, catalogers []capabiliti } pi := patternInfo{ - Method: string(parser.Detector.Method), - Criteria: parser.Detector.Criteria, - Conditions: parser.Detector.Conditions, - Packages: pkgs, - Comment: parser.Detector.Comment, - MetadataTypes: parser.MetadataTypes, - PackageTypes: parser.PackageTypes, - Capabilities: parser.Capabilities, + Method: string(parser.Detector.Method), + Criteria: parser.Detector.Criteria, + Conditions: parser.Detector.Conditions, + Packages: pkgs, + Comment: parser.Detector.Comment, + PackageTypes: parser.PackageTypes, + JSONSchemaTypes: parser.JSONSchemaTypes, + Capabilities: parser.Capabilities, } info.Patterns = append(info.Patterns, pi) @@ -200,13 +200,13 @@ func renderCatalogerInfoJSON(doc *capabilities.Document, catalogers []capabiliti } pi := patternInfo{ - Method: string(det.Method), - Criteria: det.Criteria, - Conditions: det.Conditions, - Packages: pkgs, - Comment: det.Comment, - MetadataTypes: cat.MetadataTypes, - PackageTypes: cat.PackageTypes, + Method: string(det.Method), + Criteria: det.Criteria, + Conditions: det.Conditions, + Packages: pkgs, + Comment: det.Comment, + PackageTypes: cat.PackageTypes, + JSONSchemaTypes: cat.JSONSchemaTypes, } info.Patterns = append(info.Patterns, pi) } diff --git a/internal/capabilities/generate/completeness_test.go b/internal/capabilities/generate/completeness_test.go index 69d6445e0..c1998629a 100644 --- a/internal/capabilities/generate/completeness_test.go +++ b/internal/capabilities/generate/completeness_test.go @@ -869,6 +869,100 @@ func validateCapabilityValueType(fieldPath string, value interface{}) error { return nil } +// TestMetadataTypesHaveJSONSchemaTypes validates that metadata_types and json_schema_types are synchronized +// in packages.yaml - every metadata type should have a corresponding json_schema_type with correct conversion +func TestMetadataTypesHaveJSONSchemaTypes(t *testing.T) { + repoRoot, err := RepoRoot() + require.NoError(t, err) + + // load the packages.yaml + doc, _, err := loadCapabilities(filepath.Join(repoRoot, "internal/capabilities/packages.yaml")) + require.NoError(t, err) + + // collect all validation errors + var errors []string + + // validate cataloger-level types (custom catalogers) + for _, cataloger := range doc.Catalogers { + if cataloger.Type == "custom" { + if len(cataloger.MetadataTypes) > 0 { + // verify counts match + if len(cataloger.MetadataTypes) != len(cataloger.JSONSchemaTypes) { + errors = append(errors, + fmt.Sprintf("Cataloger %q has %d metadata_types but %d json_schema_types (counts must match)", + cataloger.Name, len(cataloger.MetadataTypes), len(cataloger.JSONSchemaTypes))) + continue + } + + // verify each metadata_type converts to its corresponding json_schema_type + for i, metadataType := range cataloger.MetadataTypes { + expectedJSONSchemaType := convertMetadataTypeToJSONSchemaType(metadataType) + if expectedJSONSchemaType == "" { + errors = append(errors, + fmt.Sprintf("Cataloger %q metadata_type[%d] %q could not be converted to json_schema_type (not found in packagemetadata registry)", + cataloger.Name, i, metadataType)) + continue + } + + actualJSONSchemaType := cataloger.JSONSchemaTypes[i] + if expectedJSONSchemaType != actualJSONSchemaType { + errors = append(errors, + fmt.Sprintf("Cataloger %q metadata_type[%d] %q should convert to json_schema_type %q but found %q", + cataloger.Name, i, metadataType, expectedJSONSchemaType, actualJSONSchemaType)) + } + } + } + } + + // validate parser-level types (generic catalogers) + if cataloger.Type == "generic" { + for _, parser := range cataloger.Parsers { + if len(parser.MetadataTypes) > 0 { + // verify counts match + if len(parser.MetadataTypes) != len(parser.JSONSchemaTypes) { + errors = append(errors, + fmt.Sprintf("Parser %q/%s has %d metadata_types but %d json_schema_types (counts must match)", + cataloger.Name, parser.ParserFunction, len(parser.MetadataTypes), len(parser.JSONSchemaTypes))) + continue + } + + // verify each metadata_type converts to its corresponding json_schema_type + for i, metadataType := range parser.MetadataTypes { + expectedJSONSchemaType := convertMetadataTypeToJSONSchemaType(metadataType) + if expectedJSONSchemaType == "" { + errors = append(errors, + fmt.Sprintf("Parser %q/%s metadata_type[%d] %q could not be converted to json_schema_type (not found in packagemetadata registry)", + cataloger.Name, parser.ParserFunction, i, metadataType)) + continue + } + + actualJSONSchemaType := parser.JSONSchemaTypes[i] + if expectedJSONSchemaType != actualJSONSchemaType { + errors = append(errors, + fmt.Sprintf("Parser %q/%s metadata_type[%d] %q should convert to json_schema_type %q but found %q", + cataloger.Name, parser.ParserFunction, i, metadataType, expectedJSONSchemaType, actualJSONSchemaType)) + } + } + } + } + } + } + + // report all errors at once + if len(errors) > 0 { + require.Fail(t, "Metadata types and JSON schema types validation failed", strings.Join(errors, "\n")) + } +} + +// convertMetadataTypeToJSONSchemaType converts a metadata type (e.g., "pkg.AlpmDBEntry") to its JSON schema type (e.g., "AlpmDbEntry") +func convertMetadataTypeToJSONSchemaType(metadataType string) string { + jsonName := packagemetadata.JSONNameFromString(metadataType) + if jsonName == "" { + return "" + } + return packagemetadata.ToUpperCamelCase(jsonName) +} + // loadConfigStructFields loads the config struct definition from source code using AST parsing func loadConfigStructFields(repoRoot, configName string) (map[string]string, error) { // configName format: "package.StructName" (e.g., "golang.CatalogerConfig") diff --git a/internal/capabilities/generate/discover.go b/internal/capabilities/generate/discover.go index e3d766429..8e05c202e 100644 --- a/internal/capabilities/generate/discover.go +++ b/internal/capabilities/generate/discover.go @@ -26,11 +26,12 @@ type DiscoveredCataloger struct { // DiscoveredParser represents a parser function and its detection criteria discovered from source type DiscoveredParser struct { - ParserFunction string - Method capabilities.ArtifactDetectionMethod - Criteria []string - MetadataTypes []string // populated from cataloger-type-uses.json files - PackageTypes []string // populated from cataloger-type-uses.json files + ParserFunction string + Method capabilities.ArtifactDetectionMethod + Criteria []string + MetadataTypes []string // populated from cataloger-type-uses.json files + PackageTypes []string // populated from cataloger-type-uses.json files + JSONSchemaTypes []string // populated from MetadataTypes } // discoverGenericCatalogers finds all uses of generic.NewCataloger() in the codebase diff --git a/internal/capabilities/generate/io.go b/internal/capabilities/generate/io.go index 454c4aa41..c4cb7fa87 100644 --- a/internal/capabilities/generate/io.go +++ b/internal/capabilities/generate/io.go @@ -577,6 +577,11 @@ func addCatalogerFieldComment(keyNode, valueNode *yaml.Node, catalogerName strin if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment } + case "json_schema_types": + // json_schema_types are AUTO-GENERATED + if keyNode.LineComment == "" { + keyNode.LineComment = autoGeneratedComment + } case "capabilities": // capabilities are MANUAL if keyNode.LineComment == "" { @@ -625,7 +630,7 @@ func addParserComments(parsersNode *yaml.Node) { valueNode := parserNode.Content[i+1] switch keyNode.Value { - case "parser_function", "metadata_types", "package_types": + case "parser_function", "metadata_types", "package_types", "json_schema_types": // add AUTO-GENERATED comment to these fields if keyNode.LineComment == "" { keyNode.LineComment = autoGeneratedComment diff --git a/internal/capabilities/generate/merge.go b/internal/capabilities/generate/merge.go index 09142ed26..c84c0f268 100644 --- a/internal/capabilities/generate/merge.go +++ b/internal/capabilities/generate/merge.go @@ -7,6 +7,7 @@ import ( "github.com/scylladb/go-set/strset" "github.com/anchore/syft/internal/capabilities" + "github.com/anchore/syft/internal/packagemetadata" "github.com/anchore/syft/syft/pkg/cataloger/binary" ) @@ -329,6 +330,8 @@ func (e *EnrichmentData) EnrichEntry(catalogerName string, entry *capabilities.C // update metadata types if available if types, ok := e.GetMetadataTypes(catalogerName); ok { entry.MetadataTypes = types + // generate JSON schema types from metadata types + entry.JSONSchemaTypes = convertToJSONSchemaTypesFromMetadata(types) } // update package types if available if types, ok := e.GetPackageTypes(catalogerName); ok { @@ -336,6 +339,23 @@ func (e *EnrichmentData) EnrichEntry(catalogerName string, entry *capabilities.C } } +// convertToJSONSchemaTypesFromMetadata converts Go struct names to UpperCamelCase JSON schema names +func convertToJSONSchemaTypesFromMetadata(metadataTypes []string) []string { + if len(metadataTypes) == 0 { + return nil + } + + result := make([]string, 0, len(metadataTypes)) + for _, typeName := range metadataTypes { + jsonName := packagemetadata.JSONNameFromString(typeName) + if jsonName != "" { + camelCase := packagemetadata.ToUpperCamelCase(jsonName) + result = append(result, camelCase) + } + } + return result +} + // EnrichWithBinaryClassifier enriches an entry with binary classifier detectors if it's the binary-classifier-cataloger func (e *EnrichmentData) EnrichWithBinaryClassifier(catalogerName string, entry *capabilities.CatalogerEntry) { // special handling for binary-classifier-cataloger: auto-generate one detector per classifier @@ -487,6 +507,12 @@ func (m *CatalogerMerger) addNewCustomCatalogers() { // enrich with metadata and package types m.enrichment.EnrichEntry(catalogerName, &entry) + // fallback: if we have metadata_types but no json_schema_types, convert them + // this handles cases where metadata_types exist in YAML but no enrichment data + if len(entry.MetadataTypes) > 0 && len(entry.JSONSchemaTypes) == 0 { + entry.JSONSchemaTypes = convertToJSONSchemaTypesFromMetadata(entry.MetadataTypes) + } + // enrich with binary classifier globs m.enrichment.EnrichWithBinaryClassifier(catalogerName, &entry) @@ -500,6 +526,13 @@ func (m *CatalogerMerger) addNewCustomCatalogers() { // processGenericCataloger processes an existing generic cataloger entry func (m *CatalogerMerger) processGenericCataloger(existingEntry *capabilities.CatalogerEntry, disc DiscoveredCataloger, info *capabilities.CatalogerInfo) { entry, catalogerOrphans, newParsers := updateEntry(existingEntry, disc, info, m.catalogerConfigMappings) + + // fallback for catalogers with type override to custom but processed as generic + // these may have cataloger-level metadata_types that need json_schema_types + if len(entry.MetadataTypes) > 0 && len(entry.JSONSchemaTypes) == 0 { + entry.JSONSchemaTypes = convertToJSONSchemaTypesFromMetadata(entry.MetadataTypes) + } + m.updated.Catalogers = append(m.updated.Catalogers, entry) m.orphans = append(m.orphans, catalogerOrphans...) if len(newParsers) > 0 || len(catalogerOrphans) > 0 { @@ -524,6 +557,12 @@ func (m *CatalogerMerger) processCustomCataloger(existingEntry *capabilities.Cat // enrich with metadata and package types m.enrichment.EnrichEntry(existingEntry.Name, &entry) + // fallback: if we have metadata_types but no json_schema_types, convert them + // this handles cases where metadata_types exist in YAML but no enrichment data + if len(entry.MetadataTypes) > 0 && len(entry.JSONSchemaTypes) == 0 { + entry.JSONSchemaTypes = convertToJSONSchemaTypesFromMetadata(entry.MetadataTypes) + } + // enrich with binary classifier globs m.enrichment.EnrichWithBinaryClassifier(existingEntry.Name, &entry) @@ -640,8 +679,21 @@ func updateParsers(existingParsers []capabilities.Parser, discoveredParsers []Di p := *existingParser p.Detector.Method = discParser.Method p.Detector.Criteria = discParser.Criteria - p.MetadataTypes = discParser.MetadataTypes - p.PackageTypes = discParser.PackageTypes + + // only update metadata/package types if discovered parser has them + // this preserves existing YAML values when no test observations exist + if len(discParser.MetadataTypes) > 0 { + p.MetadataTypes = discParser.MetadataTypes + p.JSONSchemaTypes = discParser.JSONSchemaTypes + } else if len(p.MetadataTypes) > 0 && len(p.JSONSchemaTypes) == 0 { + // fallback: if parser has metadata_types but no json_schema_types, convert them + p.JSONSchemaTypes = convertToJSONSchemaTypesFromMetadata(p.MetadataTypes) + } + + if len(discParser.PackageTypes) > 0 { + p.PackageTypes = discParser.PackageTypes + } + // p.Capabilities is preserved from existing updated = append(updated, p) } @@ -704,9 +756,10 @@ func createTemplateParser(disc DiscoveredParser) capabilities.Parser { Method: disc.Method, Criteria: disc.Criteria, }, - MetadataTypes: disc.MetadataTypes, - PackageTypes: disc.PackageTypes, - Capabilities: capabilities.CapabilitySet{}, // empty array - must be filled manually + MetadataTypes: disc.MetadataTypes, + PackageTypes: disc.PackageTypes, + JSONSchemaTypes: disc.JSONSchemaTypes, + Capabilities: capabilities.CapabilitySet{}, // empty array - must be filled manually } } diff --git a/internal/capabilities/generate/metadata_discovery.go b/internal/capabilities/generate/metadata_discovery.go index a58138bf1..894501e69 100644 --- a/internal/capabilities/generate/metadata_discovery.go +++ b/internal/capabilities/generate/metadata_discovery.go @@ -8,6 +8,7 @@ import ( "sort" "github.com/anchore/syft/internal/capabilities/pkgtestobservation" + "github.com/anchore/syft/internal/packagemetadata" ) // TestObservationIndex indexes all test observations for efficient lookup and application. @@ -22,8 +23,9 @@ type TestObservationIndex struct { // TypeObservation combines metadata types and package types type TypeObservation struct { - MetadataTypes []string - PackageTypes []string + MetadataTypes []string + PackageTypes []string + JSONSchemaTypes []string } // newTestObservationIndex creates a new empty index @@ -166,6 +168,23 @@ func mergeAndDeduplicateStrings(existing, additional []string) []string { return result } +// convertToJSONSchemaTypes converts Go struct names to UpperCamelCase JSON schema names +func convertToJSONSchemaTypes(metadataTypes []string) []string { + if len(metadataTypes) == 0 { + return nil + } + + result := make([]string, 0, len(metadataTypes)) + for _, typeName := range metadataTypes { + jsonName := packagemetadata.JSONNameFromString(typeName) + if jsonName != "" { + camelCase := packagemetadata.ToUpperCamelCase(jsonName) + result = append(result, camelCase) + } + } + return result +} + // mergeTestObservations merges metadata and package type data from a test-observations.json file // into the observation index func mergeTestObservations(observations *pkgtestobservation.Test, index *TestObservationIndex) { @@ -185,6 +204,8 @@ func mergeTestObservations(observations *pkgtestobservation.Test, index *TestObs // merge the types existing.MetadataTypes = mergeAndDeduplicateStrings(existing.MetadataTypes, parserObs.MetadataTypes) existing.PackageTypes = mergeAndDeduplicateStrings(existing.PackageTypes, parserObs.PackageTypes) + // generate JSON schema types from metadata types + existing.JSONSchemaTypes = convertToJSONSchemaTypes(existing.MetadataTypes) index.setParserObservations(pkg, parserName, existing) } @@ -203,6 +224,8 @@ func mergeTestObservations(observations *pkgtestobservation.Test, index *TestObs // merge the types existing.MetadataTypes = mergeAndDeduplicateStrings(existing.MetadataTypes, catalogerObs.MetadataTypes) existing.PackageTypes = mergeAndDeduplicateStrings(existing.PackageTypes, catalogerObs.PackageTypes) + // generate JSON schema types from metadata types + existing.JSONSchemaTypes = convertToJSONSchemaTypes(existing.MetadataTypes) index.setCatalogerObservations(catalogerName, existing) } @@ -217,6 +240,7 @@ func applyParserObservations(cataloger *DiscoveredCataloger, index *TestObservat if obs := index.getParserObservations(cataloger.PackageName, parser.ParserFunction); obs != nil { if len(obs.MetadataTypes) > 0 { cataloger.Parsers[i].MetadataTypes = obs.MetadataTypes + cataloger.Parsers[i].JSONSchemaTypes = obs.JSONSchemaTypes foundData = true } if len(obs.PackageTypes) > 0 { @@ -239,6 +263,7 @@ func applySingleParserCatalogerObservations(cataloger *DiscoveredCataloger, cata cataloger.Parsers[0].MetadataTypes, catalogerObs.MetadataTypes, ) + cataloger.Parsers[0].JSONSchemaTypes = convertToJSONSchemaTypes(cataloger.Parsers[0].MetadataTypes) foundData = true } @@ -274,6 +299,7 @@ func applyMultiParserCatalogerObservations(cataloger *DiscoveredCataloger, catal for i := range cataloger.Parsers { if len(catalogerObs.MetadataTypes) > 0 { cataloger.Parsers[i].MetadataTypes = catalogerObs.MetadataTypes + cataloger.Parsers[i].JSONSchemaTypes = catalogerObs.JSONSchemaTypes foundData = true } if len(catalogerObs.PackageTypes) > 0 { @@ -286,6 +312,7 @@ func applyMultiParserCatalogerObservations(cataloger *DiscoveredCataloger, catal for i, parser := range cataloger.Parsers { if len(parser.MetadataTypes) == 0 && len(catalogerObs.MetadataTypes) > 0 { cataloger.Parsers[i].MetadataTypes = catalogerObs.MetadataTypes + cataloger.Parsers[i].JSONSchemaTypes = catalogerObs.JSONSchemaTypes foundData = true } if len(parser.PackageTypes) == 0 && len(catalogerObs.PackageTypes) > 0 { diff --git a/internal/capabilities/model.go b/internal/capabilities/model.go index 0082b504d..c969897b8 100644 --- a/internal/capabilities/model.go +++ b/internal/capabilities/model.go @@ -72,26 +72,28 @@ type DetectorCondition struct { // CatalogerEntry represents a single cataloger's capabilities type CatalogerEntry struct { - Ecosystem string `yaml:"ecosystem" json:"ecosystem"` // MANUAL - ecosystem categorization (e.g., "python", "java", "javascript") - Name string `yaml:"name" json:"name"` // AUTO-GENERATED for generic, MANUAL for custom - Type string `yaml:"type" json:"type"` // AUTO-GENERATED: "generic" or "custom" - Source Source `yaml:"source" json:"source"` // AUTO-GENERATED for generic, MANUAL for custom - Config string `yaml:"config,omitempty" json:"config,omitempty"` // e.g., "golang.CatalogerConfig" - Selectors []string `yaml:"selectors,omitempty" json:"selectors,omitempty"` // AUTO-GENERATED - cataloger name tags for selection - Parsers []Parser `yaml:"parsers,omitempty" json:"parsers,omitempty"` // AUTO-GENERATED structure, only for type=generic - Detectors []Detector `yaml:"detectors,omitempty" json:"detectors,omitempty"` // AUTO-GENERATED - detection methods (only for type=custom) - MetadataTypes []string `yaml:"metadata_types,omitempty" json:"metadata_types,omitempty"` // AUTO-GENERATED - pkg metadata types emitted (only for type=custom) - PackageTypes []string `yaml:"package_types,omitempty" json:"package_types,omitempty"` // AUTO-GENERATED - package types emitted (only for type=custom) - Capabilities CapabilitySet `yaml:"capabilities,omitempty" json:"capabilities,omitempty"` // MANUAL - config-driven capability definitions (only for type=custom) + Ecosystem string `yaml:"ecosystem" json:"ecosystem"` // MANUAL - ecosystem categorization (e.g., "python", "java", "javascript") + Name string `yaml:"name" json:"name"` // AUTO-GENERATED for generic, MANUAL for custom + Type string `yaml:"type" json:"type"` // AUTO-GENERATED: "generic" or "custom" + Source Source `yaml:"source" json:"source"` // AUTO-GENERATED for generic, MANUAL for custom + Config string `yaml:"config,omitempty" json:"config,omitempty"` // e.g., "golang.CatalogerConfig" + Selectors []string `yaml:"selectors,omitempty" json:"selectors,omitempty"` // AUTO-GENERATED - cataloger name tags for selection + Parsers []Parser `yaml:"parsers,omitempty" json:"parsers,omitempty"` // AUTO-GENERATED structure, only for type=generic + Detectors []Detector `yaml:"detectors,omitempty" json:"detectors,omitempty"` // AUTO-GENERATED - detection methods (only for type=custom) + MetadataTypes []string `yaml:"metadata_types,omitempty" json:"metadata_types,omitempty"` // AUTO-GENERATED - pkg metadata types emitted (only for type=custom) + PackageTypes []string `yaml:"package_types,omitempty" json:"package_types,omitempty"` // AUTO-GENERATED - package types emitted (only for type=custom) + JSONSchemaTypes []string `yaml:"json_schema_types,omitempty" json:"json_schema_types,omitempty"` // AUTO-GENERATED - JSON schema type names (UpperCamelCase) + Capabilities CapabilitySet `yaml:"capabilities,omitempty" json:"capabilities,omitempty"` // MANUAL - config-driven capability definitions (only for type=custom) } // Parser represents a parser function and its artifact detection criteria for generic catalogers type Parser struct { - ParserFunction string `yaml:"function" json:"function"` // AUTO-GENERATED (used as preservation key) - Detector Detector `yaml:"detector" json:"detector"` // AUTO-GENERATED - how artifacts are detected - MetadataTypes []string `yaml:"metadata_types,omitempty" json:"metadata_types,omitempty"` // AUTO-GENERATED - pkg metadata types emitted by this parser - PackageTypes []string `yaml:"package_types,omitempty" json:"package_types,omitempty"` // AUTO-GENERATED - package types emitted by this parser - Capabilities CapabilitySet `yaml:"capabilities,omitempty" json:"capabilities,omitempty"` // MANUAL - config-driven capability definitions + ParserFunction string `yaml:"function" json:"function"` // AUTO-GENERATED (used as preservation key) + Detector Detector `yaml:"detector" json:"detector"` // AUTO-GENERATED - how artifacts are detected + MetadataTypes []string `yaml:"metadata_types,omitempty" json:"metadata_types,omitempty"` // AUTO-GENERATED - pkg metadata types emitted by this parser + PackageTypes []string `yaml:"package_types,omitempty" json:"package_types,omitempty"` // AUTO-GENERATED - package types emitted by this parser + JSONSchemaTypes []string `yaml:"json_schema_types,omitempty" json:"json_schema_types,omitempty"` // AUTO-GENERATED - JSON schema type names (UpperCamelCase) + Capabilities CapabilitySet `yaml:"capabilities,omitempty" json:"capabilities,omitempty"` // MANUAL - config-driven capability definitions } // CapabilityField represents a single capability field with optional conditional values based on configuration. diff --git a/internal/capabilities/packages.yaml b/internal/capabilities/packages.yaml index 14393d8fa..6d5b367e2 100644 --- a/internal/capabilities/packages.yaml +++ b/internal/capabilities/packages.yaml @@ -313,7 +313,7 @@ configs: # AUTO-GENERATED - config structs and their fields app_key: python.guess-unpinned-requirements catalogers: # alpm (arch / pacman) ################################################################################################# - - ecosystem: alpm # MANUAL + - ecosystem: arch # MANUAL name: alpm-db-cataloger # AUTO-GENERATED type: generic # AUTO-GENERATED source: # AUTO-GENERATED @@ -339,6 +339,8 @@ catalogers: - pkg.AlpmDBEntry package_types: # AUTO-GENERATED - alpm + json_schema_types: # AUTO-GENERATED + - AlpmDbEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -387,6 +389,8 @@ catalogers: - pkg.ApkDBEntry package_types: # AUTO-GENERATED - apk + json_schema_types: # AUTO-GENERATED + - ApkDbEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -1003,6 +1007,8 @@ catalogers: - pkg.BinarySignature package_types: # AUTO-GENERATED - binary + json_schema_types: # AUTO-GENERATED + - BinarySignature capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -1047,6 +1053,8 @@ catalogers: package_types: # AUTO-GENERATED - binary - rpm + json_schema_types: # AUTO-GENERATED + - ElfBinaryPackageNoteJsonPayload capabilities: # MANUAL - config-driven capability definitions # licenses can be detected in some elf packages (via the licenses note field) - name: license @@ -1092,6 +1100,8 @@ catalogers: - pkg.PEBinary package_types: # AUTO-GENERATED - binary + json_schema_types: # AUTO-GENERATED + - PeBinary capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -1130,6 +1140,8 @@ catalogers: - pkg.BitnamiSBOMEntry package_types: # AUTO-GENERATED - bitnami + json_schema_types: # AUTO-GENERATED + - BitnamiSbomEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -1181,6 +1193,8 @@ catalogers: - pkg.RustBinaryAuditEntry package_types: # AUTO-GENERATED - rust-crate + json_schema_types: # AUTO-GENERATED + - RustCargoAuditEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -1222,6 +1236,8 @@ catalogers: - pkg.RustCargoLockEntry package_types: # AUTO-GENERATED - rust-crate + json_schema_types: # AUTO-GENERATED + - RustCargoLockEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -1269,6 +1285,8 @@ catalogers: - pkg.CocoaPodfileLockEntry package_types: # AUTO-GENERATED - pod + json_schema_types: # AUTO-GENERATED + - CocoaPodfileLockEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -1314,6 +1332,8 @@ catalogers: - pkg.SwiftPackageManagerResolvedEntry package_types: # AUTO-GENERATED - swift + json_schema_types: # AUTO-GENERATED + - SwiftPackageManagerLockEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -1357,6 +1377,9 @@ catalogers: - pkg.ConanV2LockEntry package_types: # AUTO-GENERATED - conan + json_schema_types: # AUTO-GENERATED + - CConanLockEntry + - CConanLockV2Entry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -1386,6 +1409,8 @@ catalogers: - pkg.ConanfileEntry package_types: # AUTO-GENERATED - conan + json_schema_types: # AUTO-GENERATED + - CConanFileEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -1427,6 +1452,8 @@ catalogers: - pkg.ConaninfoEntry package_types: # AUTO-GENERATED - conan + json_schema_types: # AUTO-GENERATED + - CConanInfoEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -1466,6 +1493,8 @@ catalogers: - pkg.CondaMetaPackage package_types: # AUTO-GENERATED - conda + json_schema_types: # AUTO-GENERATED + - CondaMetadataEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -1512,6 +1541,8 @@ catalogers: - pkg.DartPubspec package_types: # AUTO-GENERATED - dart-pub + json_schema_types: # AUTO-GENERATED + - DartPubspec capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -1551,6 +1582,8 @@ catalogers: - pkg.DartPubspecLockEntry package_types: # AUTO-GENERATED - dart-pub + json_schema_types: # AUTO-GENERATED + - DartPubspecLockEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -1571,7 +1604,7 @@ catalogers: - name: package_manager.package_integrity_hash default: false # Dpkg (debian) ################################################################################################### - - ecosystem: dpkg # MANUAL + - ecosystem: debian # MANUAL name: dpkg-db-cataloger # AUTO-GENERATED type: generic # AUTO-GENERATED source: # AUTO-GENERATED @@ -1599,6 +1632,8 @@ catalogers: - pkg.DpkgDBEntry package_types: # AUTO-GENERATED - deb + json_schema_types: # AUTO-GENERATED + - DpkgDbEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -1621,7 +1656,7 @@ catalogers: - DpkgDBEntry.Files[].Digest - name: package_manager.package_integrity_hash default: false - - ecosystem: dpkg # MANUAL + - ecosystem: debian # MANUAL name: deb-archive-cataloger # AUTO-GENERATED type: generic # AUTO-GENERATED source: # AUTO-GENERATED @@ -1645,6 +1680,8 @@ catalogers: - pkg.DpkgArchiveEntry package_types: # AUTO-GENERATED - deb + json_schema_types: # AUTO-GENERATED + - DpkgArchiveEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -1692,6 +1729,9 @@ catalogers: package_types: # AUTO-GENERATED - dotnet - npm + json_schema_types: # AUTO-GENERATED + - DotnetDepsEntry + - DotnetPortableExecutableEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -1727,6 +1767,8 @@ catalogers: - pkg.DotnetDepsEntry package_types: # AUTO-GENERATED - dotnet + json_schema_types: # AUTO-GENERATED + - DotnetDepsEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -1769,6 +1811,8 @@ catalogers: - pkg.DotnetPackagesLockEntry package_types: # AUTO-GENERATED - dotnet + json_schema_types: # AUTO-GENERATED + - DotnetPackagesLockEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -1810,6 +1854,8 @@ catalogers: - pkg.DotnetPortableExecutableEntry package_types: # AUTO-GENERATED - dotnet + json_schema_types: # AUTO-GENERATED + - DotnetPortableExecutableEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -1848,6 +1894,8 @@ catalogers: - pkg.ElixirMixLockEntry package_types: # AUTO-GENERATED - hex + json_schema_types: # AUTO-GENERATED + - ElixirMixLockEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -1933,6 +1981,8 @@ catalogers: - pkg.ErlangRebarLockEntry package_types: # AUTO-GENERATED - hex + json_schema_types: # AUTO-GENERATED + - ErlangRebarLockEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -1978,6 +2028,8 @@ catalogers: - pkg.GitHubActionsUseStatement package_types: # AUTO-GENERATED - github-action-workflow + json_schema_types: # AUTO-GENERATED + - GithubActionsUseStatement capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -2017,6 +2069,8 @@ catalogers: - pkg.GitHubActionsUseStatement package_types: # AUTO-GENERATED - github-action + json_schema_types: # AUTO-GENERATED + - GithubActionsUseStatement capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -2043,6 +2097,8 @@ catalogers: - pkg.GitHubActionsUseStatement package_types: # AUTO-GENERATED - github-action + json_schema_types: # AUTO-GENERATED + - GithubActionsUseStatement capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -2092,6 +2148,8 @@ catalogers: - pkg.GolangBinaryBuildinfoEntry package_types: # AUTO-GENERATED - go-module + json_schema_types: # AUTO-GENERATED + - GoModuleBuildinfoEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -2146,6 +2204,9 @@ catalogers: - pkg.GolangSourceEntry package_types: # AUTO-GENERATED - go-module + json_schema_types: # AUTO-GENERATED + - GoModuleEntry + - GoSourceEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -2241,6 +2302,8 @@ catalogers: - pkg.JavaArchive package_types: # AUTO-GENERATED - java-archive + json_schema_types: # AUTO-GENERATED + - JavaArchive capabilities: # MANUAL - config-driven capability definitions # TODO: online capabilities - name: license @@ -2288,6 +2351,8 @@ catalogers: - pkg.JavaArchive package_types: # AUTO-GENERATED - java-archive + json_schema_types: # AUTO-GENERATED + - JavaArchive capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -2329,6 +2394,8 @@ catalogers: - pkg.JavaArchive package_types: # AUTO-GENERATED - java-archive + json_schema_types: # AUTO-GENERATED + - JavaArchive capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -2373,6 +2440,8 @@ catalogers: - pkg.JavaVMInstallation package_types: # AUTO-GENERATED - binary + json_schema_types: # AUTO-GENERATED + - JavaJvmInstallation capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -2488,6 +2557,8 @@ catalogers: - pkg.HackageStackYamlLockEntry package_types: # AUTO-GENERATED - hackage + json_schema_types: # AUTO-GENERATED + - HaskellHackageStackLockEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -2518,6 +2589,8 @@ catalogers: - pkg.HackageStackYamlEntry package_types: # AUTO-GENERATED - hackage + json_schema_types: # AUTO-GENERATED + - HaskellHackageStackEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -2562,6 +2635,8 @@ catalogers: - pkg.HomebrewFormula package_types: # AUTO-GENERATED - homebrew + json_schema_types: # AUTO-GENERATED + - HomebrewFormula capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -2629,6 +2704,8 @@ catalogers: - pkg.YarnLockEntry package_types: # AUTO-GENERATED - npm + json_schema_types: # AUTO-GENERATED + - JavascriptYarnLockEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -2660,6 +2737,8 @@ catalogers: - pkg.NpmPackageLockEntry package_types: # AUTO-GENERATED - npm + json_schema_types: # AUTO-GENERATED + - JavascriptNpmPackageLockEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -2704,6 +2783,8 @@ catalogers: - pkg.NpmPackage package_types: # AUTO-GENERATED - npm + json_schema_types: # AUTO-GENERATED + - JavascriptNpmPackage capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -2754,6 +2835,9 @@ catalogers: package_types: # AUTO-GENERATED - linux-kernel - linux-kernel-module + json_schema_types: # AUTO-GENERATED + - LinuxKernelArchive + - LinuxKernelModule capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -2793,6 +2877,8 @@ catalogers: - pkg.LuaRocksPackage package_types: # AUTO-GENERATED - lua-rocks + json_schema_types: # AUTO-GENERATED + - LuarocksPackage capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -2834,6 +2920,8 @@ catalogers: - pkg.NixStoreEntry package_types: # AUTO-GENERATED - nix + json_schema_types: # AUTO-GENERATED + - NixStoreEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -2873,6 +2961,8 @@ catalogers: - pkg.NixStoreEntry package_types: # AUTO-GENERATED - nix + json_schema_types: # AUTO-GENERATED + - NixStoreEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -2923,6 +3013,8 @@ catalogers: - pkg.OpamPackage package_types: # AUTO-GENERATED - opam + json_schema_types: # AUTO-GENERATED + - OpamPackage capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -2964,6 +3056,8 @@ catalogers: - pkg.PhpComposerInstalledEntry package_types: # AUTO-GENERATED - php-composer + json_schema_types: # AUTO-GENERATED + - PhpComposerInstalledEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -3006,6 +3100,8 @@ catalogers: - pkg.PhpComposerLockEntry package_types: # AUTO-GENERATED - php-composer + json_schema_types: # AUTO-GENERATED + - PhpComposerLockEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -3049,6 +3145,8 @@ catalogers: - pkg.BinarySignature package_types: # AUTO-GENERATED - binary + json_schema_types: # AUTO-GENERATED + - BinarySignature capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -3090,6 +3188,8 @@ catalogers: - pkg.PhpPearEntry package_types: # AUTO-GENERATED - php-pear + json_schema_types: # AUTO-GENERATED + - PhpPearEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -3126,6 +3226,8 @@ catalogers: - pkg.PhpPeclEntry package_types: # AUTO-GENERATED - php-pecl + json_schema_types: # AUTO-GENERATED + - PhpPeclEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -3144,7 +3246,7 @@ catalogers: - name: package_manager.package_integrity_hash default: false # Portage (gentoo) ######################################################################################################## - - ecosystem: portage # MANUAL + - ecosystem: gentoo # MANUAL name: portage-cataloger # AUTO-GENERATED type: generic # AUTO-GENERATED source: # AUTO-GENERATED @@ -3169,6 +3271,8 @@ catalogers: - pkg.PortageEntry package_types: # AUTO-GENERATED - portage + json_schema_types: # AUTO-GENERATED + - PortageDbEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -3218,6 +3322,8 @@ catalogers: - pkg.PythonPackage package_types: # AUTO-GENERATED - python + json_schema_types: # AUTO-GENERATED + - PythonPackage capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -3262,6 +3368,8 @@ catalogers: - pkg.PythonUvLockEntry package_types: # AUTO-GENERATED - python + json_schema_types: # AUTO-GENERATED + - PythonUvLockEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -3313,6 +3421,8 @@ catalogers: - pkg.PythonPipfileLockEntry package_types: # AUTO-GENERATED - python + json_schema_types: # AUTO-GENERATED + - PythonPipfileLockEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -3342,6 +3452,8 @@ catalogers: - pkg.PythonPoetryLockEntry package_types: # AUTO-GENERATED - python + json_schema_types: # AUTO-GENERATED + - PythonPoetryLockEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -3370,6 +3482,8 @@ catalogers: - pkg.PythonRequirementsEntry package_types: # AUTO-GENERATED - python + json_schema_types: # AUTO-GENERATED + - PythonPipRequirementsEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -3409,6 +3523,8 @@ catalogers: - pkg.RDescription package_types: # AUTO-GENERATED - R-package + json_schema_types: # AUTO-GENERATED + - RDescription capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -3449,6 +3565,8 @@ catalogers: - pkg.RpmArchive package_types: # AUTO-GENERATED - rpm + json_schema_types: # AUTO-GENERATED + - RpmArchive capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -3495,6 +3613,8 @@ catalogers: - pkg.RpmDBEntry package_types: # AUTO-GENERATED - rpm + json_schema_types: # AUTO-GENERATED + - RpmDbEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -3522,6 +3642,8 @@ catalogers: - pkg.RpmDBEntry package_types: # AUTO-GENERATED - rpm + json_schema_types: # AUTO-GENERATED + - RpmDbEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -3609,6 +3731,8 @@ catalogers: - pkg.RubyGemspec package_types: # AUTO-GENERATED - gem + json_schema_types: # AUTO-GENERATED + - RubyGemspec capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -3652,6 +3776,8 @@ catalogers: - pkg.RubyGemspec package_types: # AUTO-GENERATED - gem + json_schema_types: # AUTO-GENERATED + - RubyGemspec capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -3702,6 +3828,8 @@ catalogers: - pkg.ApkDBEntry package_types: # AUTO-GENERATED - apk + json_schema_types: # AUTO-GENERATED + - ApkDbEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: true @@ -3740,6 +3868,8 @@ catalogers: - pkg.SnapEntry package_types: # AUTO-GENERATED - deb + json_schema_types: # AUTO-GENERATED + - SnapEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -3764,6 +3894,8 @@ catalogers: - pkg.SnapEntry package_types: # AUTO-GENERATED - deb + json_schema_types: # AUTO-GENERATED + - SnapEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -3788,6 +3920,8 @@ catalogers: - pkg.SnapEntry package_types: # AUTO-GENERATED - deb + json_schema_types: # AUTO-GENERATED + - SnapEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -3812,6 +3946,8 @@ catalogers: - pkg.SnapEntry package_types: # AUTO-GENERATED - deb + json_schema_types: # AUTO-GENERATED + - SnapEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -3836,6 +3972,8 @@ catalogers: - pkg.SnapEntry package_types: # AUTO-GENERATED - deb + json_schema_types: # AUTO-GENERATED + - SnapEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -3875,6 +4013,8 @@ catalogers: - pkg.SwiplPackEntry package_types: # AUTO-GENERATED - swiplpack + json_schema_types: # AUTO-GENERATED + - SwiplpackPackage capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -3915,6 +4055,8 @@ catalogers: - pkg.TerraformLockProviderEntry package_types: # AUTO-GENERATED - terraform + json_schema_types: # AUTO-GENERATED + - TerraformLockProviderEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: false @@ -3954,6 +4096,8 @@ catalogers: - pkg.WordpressPluginEntry package_types: # AUTO-GENERATED - wordpress-plugin + json_schema_types: # AUTO-GENERATED + - WordpressPluginEntry capabilities: # MANUAL - config-driven capability definitions - name: license default: true diff --git a/internal/packagemetadata/names.go b/internal/packagemetadata/names.go index 79eed0d4f..f4581853d 100644 --- a/internal/packagemetadata/names.go +++ b/internal/packagemetadata/names.go @@ -163,3 +163,30 @@ func ReflectTypeFromJSONName(name string) reflect.Type { name = strings.ToLower(name) return jsonTypes.nameToType[name] } + +// JSONNameFromString converts a Go struct name string (e.g., "pkg.AlpmDBEntry" or "AlpmDBEntry") +// to its JSON schema name (e.g., "alpm-db-entry"). Returns empty string if not found. +func JSONNameFromString(typeName string) string { + // strip "pkg." prefix if present + typeName = strings.TrimPrefix(typeName, "pkg.") + + // look through all types to find matching struct name + for typ, jsonName := range jsonTypes.typeToName { + if typ.Name() == typeName { + return jsonName + } + } + return "" +} + +// ToUpperCamelCase converts kebab-case to UpperCamelCase +// e.g., "alpm-db-entry" -> "AlpmDbEntry" +func ToUpperCamelCase(kebab string) string { + parts := strings.Split(kebab, "-") + for i, part := range parts { + if len(part) > 0 { + parts[i] = strings.ToUpper(part[0:1]) + part[1:] + } + } + return strings.Join(parts, "") +}