syft/internal/capabilities/generate/completeness_test.go
Alex Goodman d6512456b3 improve testing a docs
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
2025-10-29 10:08:29 -04:00

1374 lines
46 KiB
Go

// this file verifies the claims made in packages.yaml against test observations and source code, ensuring cataloger capabilities are accurate and complete.
package main
import (
"fmt"
"go/ast"
"go/parser"
"go/token"
"os"
"os/exec"
"path/filepath"
"reflect"
"sort"
"strings"
"testing"
"github.com/scylladb/go-set/strset"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/internal/capabilities"
"github.com/anchore/syft/internal/packagemetadata"
"github.com/anchore/syft/syft/pkg"
)
// requireParserObservations controls whether TestAllCatalogersHaveObservations enforces that all parsers have observations
// - true: fail test if any parser is missing observations (strict mode)
// - false: only check custom catalogers, skip parser checks (lenient mode, not all parsers are observable)
const requireParserObservations = false
// metadataTypeCoverageExceptions lists metadata types that are allowed to not be represented in any cataloger
var metadataTypeCoverageExceptions = strset.New(
reflect.TypeOf(pkg.MicrosoftKbPatch{}).Name(),
)
// packageTypeCoverageExceptions lists package types that are allowed to not be represented in any cataloger
var packageTypeCoverageExceptions = strset.New(
string(pkg.JenkinsPluginPkg), // TODO: this should probably be covered by a cataloger test one day
string(pkg.KbPkg),
)
// observationExceptions maps cataloger/parser names to observation types that should be ignored during validation
//
// TestAllCatalogersHaveObservations:
// - always checks custom catalogers
// - checks parsers only if requireParserObservations=true
// - nil or non-nil value: skip existence check for this cataloger/parser
//
// examples:
//
// "graalvm-native-image-cataloger": nil, // custom cataloger: skip existence check
// "linux-kernel-cataloger": strset.New("relationships"), // custom cataloger: skip only relationships validation
// "conan-cataloger/parseConanLock": nil, // parser: skip all observation validation
// "cataloger-name/parser-function": strset.New("file_digests"), // parser: skip only file_digests validation
var observationExceptions = map[string]*strset.Set{
// for the graalvm-native-image-cataloger, we don't have a really reliable test fixture yet
"graalvm-native-image-cataloger": nil,
// the linux-kernel-cataloger produces relationships but aren't really indicative of dependency information in the way the user might expect
"linux-kernel-cataloger": strset.New("relationships"),
}
func TestCatalogersInSync(t *testing.T) {
// get canonical list from syft binary
catalogersInBinary := getCatalogerNamesFromBinary(t)
// load catalogers from embedded YAML
catalogerEntries, err := capabilities.Packages()
require.NoError(t, err)
yamlCatalogers := strset.New()
for _, c := range catalogerEntries {
yamlCatalogers.Add(c.Name)
}
// test 1: All catalogers in binary must be in YAML
var missingFromYAML []string
for _, name := range catalogersInBinary {
if !yamlCatalogers.Has(name) {
missingFromYAML = append(missingFromYAML, name)
}
}
require.Empty(t, missingFromYAML,
"The following catalogers are in 'syft cataloger list' but missing from capabilities YAML: %v\n"+
"Run 'go generate ./internal/capabilities' to auto-add generic catalogers, or manually add custom catalogers.",
missingFromYAML)
// test 2: All catalogers in YAML must exist in binary
var orphanedInYAML []string
binarySet := strset.New()
for _, name := range catalogersInBinary {
binarySet.Add(name)
}
for _, name := range yamlCatalogers.List() {
if !binarySet.Has(name) {
orphanedInYAML = append(orphanedInYAML, name)
}
}
require.Empty(t, orphanedInYAML,
"The following catalogers are in capabilities YAML but not found in binary: %v\n"+
"These catalogers may have been removed. Delete them from the YAML.",
orphanedInYAML)
// test 3: All capabilities must be filled (no TODOs/nulls)
validateCapabilitiesFilled(t, catalogerEntries)
}
func getCatalogerNamesFromBinary(t *testing.T) []string {
// get cataloger names from task factories
infos, err := allPackageCatalogerInfo()
require.NoError(t, err)
var names []string
for _, info := range infos {
names = append(names, info.Name)
}
sort.Strings(names)
return names
}
func validateCapabilitiesFilled(t *testing.T, catalogers []capabilities.CatalogerEntry) {
for _, cataloger := range catalogers {
cataloger := cataloger // capture loop variable for subtest
t.Run(cataloger.Name, func(t *testing.T) {
if cataloger.Type == "generic" {
// generic catalogers have parsers with capabilities
require.NotEmpty(t, cataloger.Parsers, "generic cataloger must have at least one parser")
for _, parser := range cataloger.Parsers {
parser := parser // capture loop variable for subtest
t.Run(parser.ParserFunction, func(t *testing.T) {
require.NotEmpty(t, parser.Capabilities, "parser must have at least one capability field defined")
})
}
} else if cataloger.Type == "custom" {
// custom catalogers have cataloger-level capabilities
require.NotEmpty(t, cataloger.Capabilities, "custom cataloger must have at least one capability field defined")
}
})
}
}
func TestPackageTypeCoverage(t *testing.T) {
// load catalogers from embedded YAML
catalogerEntries, err := capabilities.Packages()
require.NoError(t, err)
// collect all package types mentioned in catalogers
foundPkgTypes := strset.New()
for _, cataloger := range catalogerEntries {
if cataloger.Type == "generic" {
for _, parser := range cataloger.Parsers {
for _, pkgType := range parser.PackageTypes {
foundPkgTypes.Add(pkgType)
}
}
} else if cataloger.Type == "custom" {
for _, pkgType := range cataloger.PackageTypes {
foundPkgTypes.Add(pkgType)
}
}
}
// check that all known package types are represented
var missingTypes []pkg.Type
for _, pkgType := range pkg.AllPkgs {
if !foundPkgTypes.Has(string(pkgType)) {
missingTypes = append(missingTypes, pkgType)
}
}
// filter out exceptions
var missingTypesWithoutExceptions []pkg.Type
for _, pkgType := range missingTypes {
if !packageTypeCoverageExceptions.Has(string(pkgType)) {
missingTypesWithoutExceptions = append(missingTypesWithoutExceptions, pkgType)
}
}
require.Empty(t, missingTypesWithoutExceptions,
"The following package types are not represented in any cataloger: %v\n"+
"Either add catalogers for these types or update pkg.AllPkgs if they're no longer supported.",
missingTypesWithoutExceptions)
}
func TestMetadataTypeCoverage(t *testing.T) {
// load catalogers from embedded YAML
catalogerEntries, err := capabilities.Packages()
require.NoError(t, err)
// collect all metadata types mentioned in catalogers
foundMetadataTypes := strset.New()
for _, cataloger := range catalogerEntries {
if cataloger.Type == "generic" {
for _, parser := range cataloger.Parsers {
for _, metadataType := range parser.MetadataTypes {
foundMetadataTypes.Add(strings.TrimPrefix(metadataType, "pkg."))
}
}
} else if cataloger.Type == "custom" {
for _, metadataType := range cataloger.MetadataTypes {
foundMetadataTypes.Add(strings.TrimPrefix(metadataType, "pkg."))
}
}
}
// get all known metadata types
allMetadataTypes := packagemetadata.AllTypes()
// check that all known metadata types are represented
var missingTypes []string
for _, metadataType := range allMetadataTypes {
typeName := reflect.TypeOf(metadataType).Name()
if !foundMetadataTypes.Has(typeName) {
missingTypes = append(missingTypes, typeName)
}
}
// filter out exceptions
var missingTypesWithoutExceptions []string
for _, metadataType := range missingTypes {
if !metadataTypeCoverageExceptions.Has(metadataType) {
missingTypesWithoutExceptions = append(missingTypesWithoutExceptions, metadataType)
}
}
require.Empty(t, missingTypesWithoutExceptions,
"The following metadata types are not represented in any cataloger: %v\n"+
"Either add catalogers for these types or update packagemetadata.AllTypes() if they're no longer supported.",
missingTypesWithoutExceptions)
}
func TestCatalogerStructure(t *testing.T) {
// load catalogers from embedded YAML
catalogerEntries, err := capabilities.Packages()
require.NoError(t, err)
for _, cataloger := range catalogerEntries {
cataloger := cataloger // capture loop variable for subtest
t.Run(cataloger.Name, func(t *testing.T) {
// ecosystem must always be set (it's MANUAL)
require.NotEmpty(t, cataloger.Ecosystem, "ecosystem must be set for all catalogers")
if cataloger.Type == "generic" {
// generic catalogers must have parsers
require.NotEmpty(t, cataloger.Parsers, "generic cataloger must have at least one parser")
// generic catalogers should not have cataloger-level capabilities
require.Empty(t, cataloger.Capabilities, "generic cataloger should not have cataloger-level capabilities (use parser-level instead)")
// generic catalogers should not have cataloger-level metadata/package types
require.Empty(t, cataloger.MetadataTypes, "generic cataloger should not have cataloger-level metadata types")
require.Empty(t, cataloger.PackageTypes, "generic cataloger should not have cataloger-level package types")
} else if cataloger.Type == "custom" {
// custom catalogers must have detectors
require.NotEmpty(t, cataloger.Detectors, "custom cataloger must have at least one detector")
// custom catalogers must have cataloger-level capabilities
require.NotEmpty(t, cataloger.Capabilities, "custom cataloger must have cataloger-level capabilities")
// custom catalogers should not have parsers
require.Empty(t, cataloger.Parsers, "custom cataloger should not have parsers (those are for generic catalogers)")
} else {
t.Errorf("unknown cataloger type: %q (must be 'generic' or 'custom')", cataloger.Type)
}
})
}
}
func TestCatalogerDataQuality(t *testing.T) {
// load catalogers from embedded YAML
catalogerEntries, err := capabilities.Packages()
require.NoError(t, err)
t.Run("no duplicate cataloger names", func(t *testing.T) {
nameCount := make(map[string]int)
for _, cataloger := range catalogerEntries {
nameCount[cataloger.Name]++
}
var duplicates []string
for name, count := range nameCount {
if count > 1 {
duplicates = append(duplicates, fmt.Sprintf("%s (appears %d times)", name, count))
}
}
require.Empty(t, duplicates, "Found duplicate cataloger names: %v", duplicates)
})
t.Run("detector validation for custom catalogers", func(t *testing.T) {
for _, cataloger := range catalogerEntries {
if cataloger.Type != "custom" {
continue
}
cataloger := cataloger // capture loop variable
t.Run(cataloger.Name, func(t *testing.T) {
require.NotEmpty(t, cataloger.Detectors, "custom cataloger must have at least one detector")
for i, detector := range cataloger.Detectors {
t.Run(fmt.Sprintf("detector-%d", i), func(t *testing.T) {
// detector criteria must not be empty
require.NotEmpty(t, detector.Criteria, "detector criteria must not be empty")
// detector method must be valid
validMethods := map[capabilities.ArtifactDetectionMethod]bool{
capabilities.GlobDetection: true,
capabilities.PathDetection: true,
capabilities.MIMETypeDetection: true,
}
require.True(t, validMethods[detector.Method],
"detector method must be one of: glob, path, mimetype (got %q)", detector.Method)
})
}
})
}
})
t.Run("no duplicate parser functions within cataloger", func(t *testing.T) {
for _, cataloger := range catalogerEntries {
if cataloger.Type != "generic" {
continue
}
cataloger := cataloger // capture loop variable
t.Run(cataloger.Name, func(t *testing.T) {
parserFuncs := strset.New()
var duplicates []string
for _, parser := range cataloger.Parsers {
if parserFuncs.Has(parser.ParserFunction) {
duplicates = append(duplicates, parser.ParserFunction)
}
parserFuncs.Add(parser.ParserFunction)
}
require.Empty(t, duplicates, "Found duplicate parser functions: %v", duplicates)
})
}
})
}
// TestCapabilitiesAreUpToDate verifies that regeneration runs successfully
func TestCapabilitiesAreUpToDate(t *testing.T) {
if os.Getenv("CI") == "" {
t.Skip("skipping regeneration test in local environment")
}
repoRoot, err := RepoRoot()
require.NoError(t, err)
yamlPath := filepath.Join(repoRoot, "internal/capabilities/packages.yaml")
// regenerate should not fail
_, err = RegenerateCapabilities(yamlPath, repoRoot)
require.NoError(t, err)
// verify file hasn't changed (i.e., it was already up to date)
cmd := exec.Command("git", "diff", "--exit-code", yamlPath)
cmd.Dir = repoRoot
err = cmd.Run()
require.NoError(t, err, "packages.yaml has uncommitted changes after regeneration. Run 'go generate ./internal/capabilities' locally and commit the changes.")
}
// TestCatalogersHaveTestObservations verifies that all catalogers have test observations,
// ensuring they are using the pkgtest helpers
func TestCatalogersHaveTestObservations(t *testing.T) {
repoRoot, err := RepoRoot()
require.NoError(t, err)
// load catalogers from YAML
catalogerEntries, err := capabilities.Packages()
require.NoError(t, err)
// collect all catalogers and parsers from observations
observedCatalogers := strset.New()
observedParsers := make(map[string]*strset.Set) // package -> parser set
// walk test-fixtures directories to find test-observations.json files
testFixtureDirs, err := findTestFixtureDirs(repoRoot)
require.NoError(t, err)
for _, dir := range testFixtureDirs {
observationsFile := filepath.Join(dir, "test-observations.json")
if _, err := os.Stat(observationsFile); os.IsNotExist(err) {
continue
}
observations, err := readTestObservations(observationsFile)
if err != nil {
t.Logf("Warning: failed to read %s: %v", observationsFile, err)
continue
}
// track observed catalogers
for catalogerName := range observations.Catalogers {
observedCatalogers.Add(catalogerName)
}
// track observed parsers
pkg := observations.Package
if observedParsers[pkg] == nil {
observedParsers[pkg] = strset.New()
}
for parserName := range observations.Parsers {
observedParsers[pkg].Add(parserName)
}
}
// infer parser observations for single-parser catalogers
// if a cataloger has only one parser and the cataloger was observed, assume the parser was evaluated
for _, cataloger := range catalogerEntries {
if cataloger.Type == "generic" && len(cataloger.Parsers) == 1 && observedCatalogers.Has(cataloger.Name) {
packageName := extractPackageName(cataloger.Name)
if observedParsers[packageName] == nil {
observedParsers[packageName] = strset.New()
}
observedParsers[packageName].Add(cataloger.Parsers[0].ParserFunction)
}
}
// verify catalogers have observations
var missingCatalogers []string
var missingParsers []string
for _, cataloger := range catalogerEntries {
if cataloger.Type == "custom" {
// custom catalogers should always have cataloger-level observations
// skip if this cataloger has an exception (nil or non-nil)
if _, hasException := observationExceptions[cataloger.Name]; hasException {
continue
}
if !observedCatalogers.Has(cataloger.Name) {
missingCatalogers = append(missingCatalogers, cataloger.Name)
}
} else if cataloger.Type == "generic" && requireParserObservations {
// generic catalogers have parser-level observations (only checked if requireParserObservations=true)
// skip if the cataloger itself has an exception (applies to all its parsers)
if _, hasException := observationExceptions[cataloger.Name]; hasException {
continue
}
// extract package name from cataloger name
packageName := extractPackageName(cataloger.Name)
for _, parser := range cataloger.Parsers {
parserKey := fmt.Sprintf("%s/%s", cataloger.Name, parser.ParserFunction)
// skip if this specific parser has an exception (nil or non-nil)
if _, hasException := observationExceptions[parserKey]; hasException {
continue
}
if observedParsers[packageName] == nil || !observedParsers[packageName].Has(parser.ParserFunction) {
missingParsers = append(missingParsers, parserKey)
}
}
}
}
require.Empty(t, missingCatalogers,
"The following custom catalogers have no test observations (not using pkgtest helpers): %v\n"+
"Update tests to use CatalogTester.TestCataloger() from syft/pkg/cataloger/internal/pkgtest",
missingCatalogers)
if requireParserObservations {
require.Empty(t, missingParsers,
"The following parsers have no test observations (not using pkgtest helpers): %v\n"+
"Update tests to use CatalogTester.TestParser() from syft/pkg/cataloger/internal/pkgtest",
missingParsers)
}
}
// extractPackageName extracts the package name from a cataloger name
// e.g., "javascript-lock-cataloger" -> "javascript"
func extractPackageName(catalogerName string) string {
// package name is the first segment before the first dash
for i, ch := range catalogerName {
if ch == '-' {
return catalogerName[:i]
}
}
return catalogerName
}
func TestConfigCompleteness(t *testing.T) {
repoRoot, err := RepoRoot()
require.NoError(t, err)
// load the packages.yaml
doc, _, err := loadCapabilities(filepath.Join(repoRoot, "internal/capabilities/packages.yaml"))
require.NoError(t, err)
// collect all validation errors before failing
var errors []string
// validation 1: all entries in configs section are referenced by at least one cataloger
configsReferenced := make(map[string]bool)
for _, cataloger := range doc.Catalogers {
if cataloger.Config != "" {
configsReferenced[cataloger.Config] = true
}
}
for configKey := range doc.Configs {
if !configsReferenced[configKey] {
errors = append(errors, fmt.Sprintf("Config %q is not referenced by any cataloger", configKey))
}
}
// validation 2: all catalogers with non-empty config field have entry in configs
for _, cataloger := range doc.Catalogers {
if cataloger.Config != "" {
if _, exists := doc.Configs[cataloger.Config]; !exists {
errors = append(errors, fmt.Sprintf("Cataloger %q references config %q which doesn't exist in configs section", cataloger.Name, cataloger.Config))
}
}
}
// validation 3: all app-key references in configs exist in app-config section
// build a set of all app-config keys for quick lookup
appConfigKeys := make(map[string]bool)
for _, appConfig := range doc.ApplicationConfig {
appConfigKeys[appConfig.Key] = true
}
for configName, configEntry := range doc.Configs {
for _, field := range configEntry.Fields {
if field.AppKey != "" {
if !appConfigKeys[field.AppKey] {
errors = append(errors, fmt.Sprintf("Config field %q.%s references app-key %q which doesn't exist in app-config section", configName, field.Key, field.AppKey))
}
}
}
}
// report all errors at once
if len(errors) > 0 {
require.Fail(t, "Config completeness validation failed", strings.Join(errors, "\n"))
}
}
func TestAppConfigFieldsHaveDescriptions(t *testing.T) {
repoRoot, err := RepoRoot()
require.NoError(t, err)
configs, err := DiscoverAppConfigs(repoRoot)
require.NoError(t, err)
// verify that all configs have descriptions
var missingDescriptions []string
for _, cfg := range configs {
if cfg.Description == "" {
missingDescriptions = append(missingDescriptions, cfg.Key)
}
}
require.Empty(t, missingDescriptions, "the following configs are missing descriptions: %v", missingDescriptions)
}
func TestAppConfigKeyFormat(t *testing.T) {
repoRoot, err := RepoRoot()
require.NoError(t, err)
configs, err := DiscoverAppConfigs(repoRoot)
require.NoError(t, err)
// verify that all config keys follow the expected format
for _, cfg := range configs {
// keys should be in format "ecosystem.field-name" or "ecosystem.nested.field-name"
require.Contains(t, cfg.Key, ".", "config key should contain at least one dot: %s", cfg.Key)
// keys should use kebab-case (all lowercase with hyphens)
require.NotContains(t, cfg.Key, "_", "config key should not contain underscores: %s", cfg.Key)
require.NotContains(t, cfg.Key, " ", "config key should not contain spaces: %s", cfg.Key)
}
}
// TestCapabilityConfigFieldReferences validates that config field names referenced in CapabilitiesV2
// conditions actually exist in the cataloger's config struct
func TestCapabilityConfigFieldReferences(t *testing.T) {
repoRoot, err := RepoRoot()
require.NoError(t, err)
// load the packages.yaml
doc, _, err := loadCapabilities(filepath.Join(repoRoot, "internal/capabilities/packages.yaml"))
require.NoError(t, err)
// collect all validation errors before failing
var errors []string
// for each cataloger with CapabilitiesV2
for _, cataloger := range doc.Catalogers {
// check cataloger-level CapabilitiesV2 (for custom catalogers)
if cataloger.Type == "custom" && len(cataloger.Capabilities) > 0 {
// load the cataloger's config struct if it has one
if cataloger.Config != "" {
configEntry, exists := doc.Configs[cataloger.Config]
if !exists {
errors = append(errors, fmt.Sprintf("Cataloger %q references config %q which doesn't exist", cataloger.Name, cataloger.Config))
continue
}
// build a set of valid config field names
validFields := make(map[string]bool)
for _, field := range configEntry.Fields {
validFields[field.Key] = true
}
// validate each capability field
for _, capField := range cataloger.Capabilities {
// check conditions for config field references
for _, condition := range capField.Conditions {
for fieldName := range condition.When {
if !validFields[fieldName] {
errors = append(errors,
fmt.Sprintf("Cataloger %q capability field %q references config field %q which doesn't exist in config struct %q",
cataloger.Name, capField.Name, fieldName, cataloger.Config))
}
}
}
}
} else if len(cataloger.Capabilities) > 0 {
// cataloger has CapabilitiesV2 with conditions but no config - check if any conditions reference fields
for _, capField := range cataloger.Capabilities {
if len(capField.Conditions) > 0 {
for _, condition := range capField.Conditions {
if len(condition.When) > 0 {
errors = append(errors,
fmt.Sprintf("Cataloger %q capability field %q has conditions but cataloger has no config struct",
cataloger.Name, capField.Name))
break
}
}
}
}
}
}
// check parser-level CapabilitiesV2 (for generic catalogers)
if cataloger.Type == "generic" {
for _, parser := range cataloger.Parsers {
if len(parser.Capabilities) > 0 {
// load the cataloger's config struct if it has one
if cataloger.Config != "" {
configEntry, exists := doc.Configs[cataloger.Config]
if !exists {
errors = append(errors, fmt.Sprintf("Cataloger %q references config %q which doesn't exist", cataloger.Name, cataloger.Config))
continue
}
// build a set of valid config field names
validFields := make(map[string]bool)
for _, field := range configEntry.Fields {
validFields[field.Key] = true
}
// validate each capability field
for _, capField := range parser.Capabilities {
// check conditions for config field references
for _, condition := range capField.Conditions {
for fieldName := range condition.When {
if !validFields[fieldName] {
errors = append(errors,
fmt.Sprintf("Parser %q/%s capability field %q references config field %q which doesn't exist in config struct %q",
cataloger.Name, parser.ParserFunction, capField.Name, fieldName, cataloger.Config))
}
}
}
}
} else {
// parser has CapabilitiesV2 with conditions but cataloger has no config
for _, capField := range parser.Capabilities {
if len(capField.Conditions) > 0 {
for _, condition := range capField.Conditions {
if len(condition.When) > 0 {
errors = append(errors,
fmt.Sprintf("Parser %q/%s capability field %q has conditions but cataloger has no config struct",
cataloger.Name, parser.ParserFunction, capField.Name))
break
}
}
}
}
}
}
}
}
}
// report all errors at once
if len(errors) > 0 {
require.Fail(t, "CapabilityV2 config field reference validation failed", strings.Join(errors, "\n"))
}
}
// TestCapabilityFieldNaming validates that capability field names follow known patterns
func TestCapabilityFieldNaming(t *testing.T) {
repoRoot, err := RepoRoot()
require.NoError(t, err)
// load the packages.yaml
doc, _, err := loadCapabilities(filepath.Join(repoRoot, "internal/capabilities/packages.yaml"))
require.NoError(t, err)
// define known capability field paths
knownFields := strset.New(
"license",
"dependency.depth",
"dependency.edges",
"dependency.kinds",
"package_manager.files.listing",
"package_manager.files.digests",
"package_manager.package_integrity_hash",
)
// collect all validation errors/warnings
var errors []string
// check cataloger-level CapabilitiesV2
for _, cataloger := range doc.Catalogers {
if cataloger.Type == "custom" && len(cataloger.Capabilities) > 0 {
for _, capField := range cataloger.Capabilities {
if !knownFields.Has(capField.Name) {
errors = append(errors,
fmt.Sprintf("Cataloger %q uses unknown capability field %q - may be a typo or new field not in known list",
cataloger.Name, capField.Name))
}
}
}
// check parser-level CapabilitiesV2
if cataloger.Type == "generic" {
for _, parser := range cataloger.Parsers {
if len(parser.Capabilities) > 0 {
for _, capField := range parser.Capabilities {
if !knownFields.Has(capField.Name) {
errors = append(errors,
fmt.Sprintf("Parser %q/%s uses unknown capability field %q - may be a typo or new field not in known list",
cataloger.Name, parser.ParserFunction, capField.Name))
}
}
}
}
}
}
// report all errors at once
if len(errors) > 0 {
require.Fail(t, "Capability field naming validation failed", strings.Join(errors, "\n"))
}
}
// TestCapabilityValueTypes validates that capability field values match expected types
func TestCapabilityValueTypes(t *testing.T) {
repoRoot, err := RepoRoot()
require.NoError(t, err)
// load the packages.yaml
doc, _, err := loadCapabilities(filepath.Join(repoRoot, "internal/capabilities/packages.yaml"))
require.NoError(t, err)
// collect all validation errors
var errors []string
// validate cataloger-level CapabilitiesV2
for _, cataloger := range doc.Catalogers {
if cataloger.Type == "custom" && len(cataloger.Capabilities) > 0 {
for _, capField := range cataloger.Capabilities {
// validate default value type
err := validateCapabilityValueType(capField.Name, capField.Default)
if err != nil {
errors = append(errors,
fmt.Sprintf("Cataloger %q capability field %q default value: %v",
cataloger.Name, capField.Name, err))
}
// validate condition value types
for i, condition := range capField.Conditions {
err := validateCapabilityValueType(capField.Name, condition.Value)
if err != nil {
errors = append(errors,
fmt.Sprintf("Cataloger %q capability field %q condition %d value: %v",
cataloger.Name, capField.Name, i, err))
}
}
}
}
// validate parser-level CapabilitiesV2
if cataloger.Type == "generic" {
for _, parser := range cataloger.Parsers {
if len(parser.Capabilities) > 0 {
for _, capField := range parser.Capabilities {
// validate default value type
err := validateCapabilityValueType(capField.Name, capField.Default)
if err != nil {
errors = append(errors,
fmt.Sprintf("Parser %q/%s capability field %q default value: %v",
cataloger.Name, parser.ParserFunction, capField.Name, err))
}
// validate condition value types
for i, condition := range capField.Conditions {
err := validateCapabilityValueType(capField.Name, condition.Value)
if err != nil {
errors = append(errors,
fmt.Sprintf("Parser %q/%s capability field %q condition %d value: %v",
cataloger.Name, parser.ParserFunction, capField.Name, i, err))
}
}
}
}
}
}
}
// report all errors at once
if len(errors) > 0 {
require.Fail(t, "Capability value type validation failed", strings.Join(errors, "\n"))
}
}
// validateCapabilityValueType checks if a value matches the expected type for a capability field
func validateCapabilityValueType(fieldPath string, value interface{}) error {
if value == nil {
return nil // nil is acceptable
}
switch fieldPath {
case "license",
"package_manager.files.listing",
"package_manager.files.digests",
"package_manager.package_integrity_hash":
// expect bool
if _, ok := value.(bool); !ok {
return fmt.Errorf("expected bool, got %T", value)
}
case "dependency.depth", "dependency.kinds":
// expect []string or []interface{} that can be converted to []string
switch v := value.(type) {
case []string:
// ok
case []interface{}:
// check each element is a string
for i, elem := range v {
if _, ok := elem.(string); !ok {
return fmt.Errorf("expected []string, but element %d is %T", i, elem)
}
}
default:
return fmt.Errorf("expected []string, got %T", value)
}
case "dependency.edges":
// expect string
if _, ok := value.(string); !ok {
return fmt.Errorf("expected string, got %T", value)
}
default:
// unknown field, skip validation
return nil
}
return nil
}
// TestMetadataTypesHaveJSONSchemaTypes validates that metadata_types and json_schema_types are synchronized
// in packages.yaml - every metadata type should have a corresponding json_schema_type with correct conversion
func TestMetadataTypesHaveJSONSchemaTypes(t *testing.T) {
repoRoot, err := RepoRoot()
require.NoError(t, err)
// load the packages.yaml
doc, _, err := loadCapabilities(filepath.Join(repoRoot, "internal/capabilities/packages.yaml"))
require.NoError(t, err)
// collect all validation errors
var errors []string
// validate cataloger-level types (custom catalogers)
for _, cataloger := range doc.Catalogers {
if cataloger.Type == "custom" {
if len(cataloger.MetadataTypes) > 0 {
// verify counts match
if len(cataloger.MetadataTypes) != len(cataloger.JSONSchemaTypes) {
errors = append(errors,
fmt.Sprintf("Cataloger %q has %d metadata_types but %d json_schema_types (counts must match)",
cataloger.Name, len(cataloger.MetadataTypes), len(cataloger.JSONSchemaTypes)))
continue
}
// verify each metadata_type converts to its corresponding json_schema_type
for i, metadataType := range cataloger.MetadataTypes {
expectedJSONSchemaType := convertMetadataTypeToJSONSchemaType(metadataType)
if expectedJSONSchemaType == "" {
errors = append(errors,
fmt.Sprintf("Cataloger %q metadata_type[%d] %q could not be converted to json_schema_type (not found in packagemetadata registry)",
cataloger.Name, i, metadataType))
continue
}
actualJSONSchemaType := cataloger.JSONSchemaTypes[i]
if expectedJSONSchemaType != actualJSONSchemaType {
errors = append(errors,
fmt.Sprintf("Cataloger %q metadata_type[%d] %q should convert to json_schema_type %q but found %q",
cataloger.Name, i, metadataType, expectedJSONSchemaType, actualJSONSchemaType))
}
}
}
}
// validate parser-level types (generic catalogers)
if cataloger.Type == "generic" {
for _, parser := range cataloger.Parsers {
if len(parser.MetadataTypes) > 0 {
// verify counts match
if len(parser.MetadataTypes) != len(parser.JSONSchemaTypes) {
errors = append(errors,
fmt.Sprintf("Parser %q/%s has %d metadata_types but %d json_schema_types (counts must match)",
cataloger.Name, parser.ParserFunction, len(parser.MetadataTypes), len(parser.JSONSchemaTypes)))
continue
}
// verify each metadata_type converts to its corresponding json_schema_type
for i, metadataType := range parser.MetadataTypes {
expectedJSONSchemaType := convertMetadataTypeToJSONSchemaType(metadataType)
if expectedJSONSchemaType == "" {
errors = append(errors,
fmt.Sprintf("Parser %q/%s metadata_type[%d] %q could not be converted to json_schema_type (not found in packagemetadata registry)",
cataloger.Name, parser.ParserFunction, i, metadataType))
continue
}
actualJSONSchemaType := parser.JSONSchemaTypes[i]
if expectedJSONSchemaType != actualJSONSchemaType {
errors = append(errors,
fmt.Sprintf("Parser %q/%s metadata_type[%d] %q should convert to json_schema_type %q but found %q",
cataloger.Name, parser.ParserFunction, i, metadataType, expectedJSONSchemaType, actualJSONSchemaType))
}
}
}
}
}
}
// report all errors at once
if len(errors) > 0 {
require.Fail(t, "Metadata types and JSON schema types validation failed", strings.Join(errors, "\n"))
}
}
// convertMetadataTypeToJSONSchemaType converts a metadata type (e.g., "pkg.AlpmDBEntry") to its JSON schema type (e.g., "AlpmDbEntry")
func convertMetadataTypeToJSONSchemaType(metadataType string) string {
jsonName := packagemetadata.JSONNameFromString(metadataType)
if jsonName == "" {
return ""
}
return packagemetadata.ToUpperCamelCase(jsonName)
}
// loadConfigStructFields loads the config struct definition from source code using AST parsing
func loadConfigStructFields(repoRoot, configName string) (map[string]string, error) {
// configName format: "package.StructName" (e.g., "golang.CatalogerConfig")
parts := strings.Split(configName, ".")
if len(parts) != 2 {
return nil, fmt.Errorf("invalid config name format: %q", configName)
}
packageName := parts[0]
structName := parts[1]
// find the package directory
packageDir := filepath.Join(repoRoot, "syft", "pkg", "cataloger", packageName)
if _, err := os.Stat(packageDir); os.IsNotExist(err) {
return nil, fmt.Errorf("package directory not found: %s", packageDir)
}
// parse all .go files in the package
files, err := filepath.Glob(filepath.Join(packageDir, "*.go"))
if err != nil {
return nil, err
}
for _, filePath := range files {
if strings.HasSuffix(filePath, "_test.go") {
continue
}
fset := token.NewFileSet()
file, err := parser.ParseFile(fset, filePath, nil, 0)
if err != nil {
continue
}
// find the struct definition
fields := findStructFields(file, structName)
if len(fields) > 0 {
return fields, nil
}
}
return nil, fmt.Errorf("config struct %q not found in package %q", structName, packageName)
}
// findStructFields extracts field names and types from a struct definition
func findStructFields(file *ast.File, structName string) map[string]string {
fields := make(map[string]string)
ast.Inspect(file, func(n ast.Node) bool {
// look for type declarations
typeSpec, ok := n.(*ast.TypeSpec)
if !ok || typeSpec.Name.Name != structName {
return true
}
// check if it's a struct type
structType, ok := typeSpec.Type.(*ast.StructType)
if !ok {
return false
}
// extract field names and types
for _, field := range structType.Fields.List {
if len(field.Names) == 0 {
continue // embedded field
}
fieldName := field.Names[0].Name
fieldType := getTypeName(field.Type)
fields[fieldName] = fieldType
}
return false
})
return fields
}
// getTypeName extracts a string representation of a type
func getTypeName(expr ast.Expr) string {
switch t := expr.(type) {
case *ast.Ident:
return t.Name
case *ast.SelectorExpr:
return fmt.Sprintf("%s.%s", getTypeName(t.X), t.Sel.Name)
case *ast.ArrayType:
return fmt.Sprintf("[]%s", getTypeName(t.Elt))
case *ast.MapType:
return fmt.Sprintf("map[%s]%s", getTypeName(t.Key), getTypeName(t.Value))
case *ast.StarExpr:
return fmt.Sprintf("*%s", getTypeName(t.X))
default:
return "unknown"
}
}
// findMetadataStructFile finds the Go file containing a metadata struct definition
// searches in syft/pkg/*.go for the given struct name
// also handles type aliases and returns the underlying struct name
func findMetadataStructFile(repoRoot, structName string) (filePath string, actualStructName string, err error) {
pkgDir := filepath.Join(repoRoot, "syft", "pkg")
files, err := filepath.Glob(filepath.Join(pkgDir, "*.go"))
if err != nil {
return "", "", err
}
for _, fpath := range files {
if strings.HasSuffix(fpath, "_test.go") {
continue
}
fset := token.NewFileSet()
file, err := parser.ParseFile(fset, fpath, nil, 0)
if err != nil {
continue
}
// check if this file contains the struct definition or type alias
found := false
var resolvedName string
ast.Inspect(file, func(n ast.Node) bool {
typeSpec, ok := n.(*ast.TypeSpec)
if !ok || typeSpec.Name.Name != structName {
return true
}
// check if it's a struct type
if _, ok := typeSpec.Type.(*ast.StructType); ok {
found = true
resolvedName = structName
return false
}
// check if it's a type alias (e.g., type DpkgArchiveEntry DpkgDBEntry)
if ident, ok := typeSpec.Type.(*ast.Ident); ok {
found = true
resolvedName = ident.Name
return false
}
return true
})
if found {
// if it's a type alias, recursively find the underlying struct
if resolvedName != structName {
return findMetadataStructFile(repoRoot, resolvedName)
}
return fpath, structName, nil
}
}
return "", "", fmt.Errorf("struct %q not found in syft/pkg/", structName)
}
// parseEvidenceReference parses an evidence string like "StructName.Field" or "StructName.Field1.Field2"
// into struct name and field path components
// examples:
// - "CondaMetaPackage.MD5" -> ("CondaMetaPackage", []string{"MD5"})
// - "CondaMetaPackage.PathsData.Paths" -> ("CondaMetaPackage", []string{"PathsData", "Paths"})
// - "AlpmDBEntry.Files[].Digest" -> ("AlpmDBEntry", []string{"Files", "[]", "Digest"})
func parseEvidenceReference(evidence string) (structName string, fieldPath []string, err error) {
parts := strings.Split(evidence, ".")
if len(parts) < 2 {
return "", nil, fmt.Errorf("invalid evidence format: %q (expected at least StructName.Field)", evidence)
}
structName = parts[0]
// process the remaining parts, splitting on [] for array notation
for _, part := range parts[1:] {
// check if this part contains array notation
if strings.Contains(part, "[]") {
// split on [] - e.g., "Files[]" becomes ["Files", ""]
subparts := strings.Split(part, "[]")
if len(subparts) > 0 && subparts[0] != "" {
fieldPath = append(fieldPath, subparts[0])
}
fieldPath = append(fieldPath, "[]")
} else {
fieldPath = append(fieldPath, part)
}
}
return structName, fieldPath, nil
}
// validateFieldPath validates that a field path exists in a struct definition
// handles simple fields, nested fields, and array element fields
// fieldPath can contain "[]" to indicate array dereferencing
func validateFieldPath(repoRoot, structName string, fieldPath []string) error {
if len(fieldPath) == 0 {
return fmt.Errorf("empty field path")
}
// find the file containing the struct (handles type aliases)
filePath, actualStructName, err := findMetadataStructFile(repoRoot, structName)
if err != nil {
return err
}
// parse the file
fset := token.NewFileSet()
file, err := parser.ParseFile(fset, filePath, nil, 0)
if err != nil {
return fmt.Errorf("failed to parse %s: %w", filePath, err)
}
// find the struct fields using the actual struct name
fields := findStructFields(file, actualStructName)
if len(fields) == 0 {
return fmt.Errorf("struct %q not found in %s", actualStructName, filePath)
}
// validate each component of the field path
currentFields := fields
currentStructName := actualStructName
for i, component := range fieldPath {
if component == "[]" {
// array dereference - this is a no-op for validation
// the previous component should have been an array type
continue
}
fieldType, exists := currentFields[component]
if !exists {
return fmt.Errorf("field %q not found in struct %q (path: %s)", component, currentStructName, strings.Join(fieldPath[:i+1], "."))
}
// if there are more components, we need to navigate to the next struct
if i < len(fieldPath)-1 {
// extract the actual type name, removing pointer/array/slice markers
typeName := strings.TrimPrefix(fieldType, "*")
typeName = strings.TrimPrefix(typeName, "[]")
// if it's not a simple type name (e.g., "CondaPathsData"), skip validation
// this handles primitive types that don't have further fields
if strings.Contains(typeName, ".") {
// qualified type like "pkg.Something" - extract just "Something"
parts := strings.Split(typeName, ".")
typeName = parts[len(parts)-1]
}
// try to find the nested struct (handles type aliases)
nestedFilePath, nestedStructName, err := findMetadataStructFile(repoRoot, typeName)
if err != nil {
// if we can't find the struct, it might be a primitive or external type
// we'll allow this to pass
continue
}
nestedFset := token.NewFileSet()
nestedFile, err := parser.ParseFile(nestedFset, nestedFilePath, nil, 0)
if err != nil {
continue
}
currentFields = findStructFields(nestedFile, nestedStructName)
currentStructName = nestedStructName
if len(currentFields) == 0 {
// couldn't load the nested struct, but we found the field, so allow it
continue
}
}
}
return nil
}
// TestCapabilityEvidenceFieldReferences validates that evidence field references in capabilities
// actually exist on their corresponding metadata structs
func TestCapabilityEvidenceFieldReferences(t *testing.T) {
repoRoot, err := RepoRoot()
require.NoError(t, err)
// load the packages.yaml
doc, _, err := loadCapabilities(filepath.Join(repoRoot, "internal/capabilities/packages.yaml"))
require.NoError(t, err)
// collect all evidence field references
type evidenceRef struct {
catalogerName string
parserFunction string // empty for cataloger-level
capabilityName string
evidenceField string
}
var allReferences []evidenceRef
// collect from cataloger-level capabilities (custom catalogers)
for _, cataloger := range doc.Catalogers {
if cataloger.Type == "custom" && len(cataloger.Capabilities) > 0 {
for _, capField := range cataloger.Capabilities {
for _, evidence := range capField.Evidence {
allReferences = append(allReferences, evidenceRef{
catalogerName: cataloger.Name,
capabilityName: capField.Name,
evidenceField: evidence,
})
}
}
}
// collect from parser-level capabilities (generic catalogers)
if cataloger.Type == "generic" {
for _, parser := range cataloger.Parsers {
if len(parser.Capabilities) > 0 {
for _, capField := range parser.Capabilities {
for _, evidence := range capField.Evidence {
allReferences = append(allReferences, evidenceRef{
catalogerName: cataloger.Name,
parserFunction: parser.ParserFunction,
capabilityName: capField.Name,
evidenceField: evidence,
})
}
}
}
}
}
}
// validate each evidence reference
for _, ref := range allReferences {
ref := ref // capture for subtest
// create test name
testName := ref.catalogerName
if ref.parserFunction != "" {
testName = fmt.Sprintf("%s/%s", ref.catalogerName, ref.parserFunction)
}
testName = fmt.Sprintf("%s/%s/%s", testName, ref.capabilityName, ref.evidenceField)
t.Run(testName, func(t *testing.T) {
// parse the evidence reference
structName, fieldPath, err := parseEvidenceReference(ref.evidenceField)
require.NoError(t, err, "failed to parse evidence reference")
// validate the field path exists
err = validateFieldPath(repoRoot, structName, fieldPath)
require.NoError(t, err, "evidence field reference is invalid")
})
}
}
// TestDetectorConfigFieldReferences validates that config field names referenced in detector
// conditions actually exist in the cataloger's config struct
func TestDetectorConfigFieldReferences(t *testing.T) {
repoRoot, err := RepoRoot()
require.NoError(t, err)
// load the packages.yaml
doc, _, err := loadCapabilities(filepath.Join(repoRoot, "internal/capabilities/packages.yaml"))
require.NoError(t, err)
// collect all validation errors before failing
var errors []string
// check each cataloger's detectors
for _, cataloger := range doc.Catalogers {
if cataloger.Type != "custom" {
continue // only custom catalogers have detectors
}
for detectorIdx, detector := range cataloger.Detectors {
// if detector has no conditions, skip validation
if len(detector.Conditions) == 0 {
continue
}
// detector has conditions - cataloger must have a config
if cataloger.Config == "" {
errors = append(errors,
fmt.Sprintf("Cataloger %q detector %d has conditions but cataloger has no config struct",
cataloger.Name, detectorIdx))
continue
}
// load the cataloger's config struct
configEntry, exists := doc.Configs[cataloger.Config]
if !exists {
errors = append(errors,
fmt.Sprintf("Cataloger %q references config %q which doesn't exist",
cataloger.Name, cataloger.Config))
continue
}
// build a set of valid config field names
validFields := make(map[string]bool)
for _, field := range configEntry.Fields {
validFields[field.Key] = true
}
// validate each condition
for condIdx, condition := range detector.Conditions {
for fieldName := range condition.When {
if !validFields[fieldName] {
errors = append(errors,
fmt.Sprintf("Cataloger %q detector %d condition %d references config field %q which doesn't exist in config struct %q",
cataloger.Name, detectorIdx, condIdx, fieldName, cataloger.Config))
}
}
}
}
}
// report all errors at once
if len(errors) > 0 {
require.Fail(t, "Detector config field reference validation failed", strings.Join(errors, "\n"))
}
}