From 538430d65d2ccb6d86424130244639cc0a418833 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Thu, 30 Oct 2025 13:19:42 -0400 Subject: [PATCH] describe cataloger capabilities via test observations (#4318) Signed-off-by: Alex Goodman --- .../capabilities/pkgtestobservation/model.go | 46 ++ syft/pkg/cataloger/.gitignore | 2 + .../internal/pkgtest/metadata_tracker.go | 514 ++++++++++++++++++ .../internal/pkgtest/observing_resolver.go | 191 ++++--- .../internal/pkgtest/test_generic_parser.go | 176 ++++++ .../javascript/parse_yarn_lock_test.go | 6 +- syft/pkg/type.go | 4 + 7 files changed, 854 insertions(+), 85 deletions(-) create mode 100644 internal/capabilities/pkgtestobservation/model.go create mode 100644 syft/pkg/cataloger/.gitignore create mode 100644 syft/pkg/cataloger/internal/pkgtest/metadata_tracker.go diff --git a/internal/capabilities/pkgtestobservation/model.go b/internal/capabilities/pkgtestobservation/model.go new file mode 100644 index 000000000..77fbcb409 --- /dev/null +++ b/internal/capabilities/pkgtestobservation/model.go @@ -0,0 +1,46 @@ +package pkgtestobservation + +import "time" + +// Observations represents capability observations during testing +type Observations struct { + License bool `json:"license"` + Relationships Relationship `json:"relationships"` + FileListing Count `json:"file_listing"` + FileDigests Count `json:"file_digests"` + IntegrityHash Count `json:"integrity_hash"` +} + +// Relationship tracks dependency relationship observations +type Relationship struct { + Found bool `json:"found"` + Count int `json:"count"` +} + +// Count tracks whether a capability was found and how many times +type Count struct { + Found bool `json:"found"` + Count int `json:"count"` +} + +// Test is the root structure for test-observations.json +type Test struct { + Package string `json:"package"` + UpdatedAt time.Time `json:"updated_at"` + Catalogers map[string]*Cataloger `json:"catalogers"` + Parsers map[string]*Parser `json:"parsers"` +} + +// Parser captures all observations for a parser +type Parser struct { + MetadataTypes []string `json:"metadata_types"` + PackageTypes []string `json:"package_types"` + Observations Observations `json:"observations"` +} + +// Cataloger captures all observations for a cataloger +type Cataloger struct { + MetadataTypes []string `json:"metadata_types"` + PackageTypes []string `json:"package_types"` + Observations Observations `json:"observations"` +} diff --git a/syft/pkg/cataloger/.gitignore b/syft/pkg/cataloger/.gitignore new file mode 100644 index 000000000..752bd7a6c --- /dev/null +++ b/syft/pkg/cataloger/.gitignore @@ -0,0 +1,2 @@ +# these are generated by pkgtest helpers, no need to check them in +**/test-fixtures/test-observations.json \ No newline at end of file diff --git a/syft/pkg/cataloger/internal/pkgtest/metadata_tracker.go b/syft/pkg/cataloger/internal/pkgtest/metadata_tracker.go new file mode 100644 index 000000000..378a025c8 --- /dev/null +++ b/syft/pkg/cataloger/internal/pkgtest/metadata_tracker.go @@ -0,0 +1,514 @@ +// Package pkgtest provides test helpers for cataloger and parser testing, +// including automatic observation tracking for capability documentation. +package pkgtest + +import ( + "encoding/json" + "os" + "path/filepath" + "reflect" + "sort" + "sync" + "time" + + "github.com/anchore/syft/internal/capabilities/pkgtestobservation" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/pkg" +) + +var ( + globalTracker *MetadataTracker + globalTrackerOnce sync.Once + + // commonPackageIntegrityFields are common field names used to store integrity hashes in package metadata. + // TODO: this is a best-effort list and may need to be expanded as new package types are added. Don't depend on this list to catch everything - it's only for test validation. + commonPackageIntegrityFields = []string{ + "Integrity", "Checksum", "H1Digest", + "OutputHash", "PkgHash", "ContentHash", + "PkgHashExt", "Hash", "IntegrityHash", + } +) + +// MetadataTracker collects metadata type and package type usage during test execution +type MetadataTracker struct { + mu sync.Mutex + parserData map[string]map[string]map[string]bool // package -> parser -> metadata types (set) + catalogerData map[string]map[string]bool // cataloger -> metadata types (set) + parserPackageTypes map[string]map[string]map[string]bool // package -> parser -> package types (set) + catalogerPackageTypes map[string]map[string]bool // cataloger -> package types (set) + + // unified observations for the current test package + observations *pkgtestobservation.Test +} + +// getTracker returns the singleton metadata tracker +func getTracker() *MetadataTracker { + globalTrackerOnce.Do(func() { + globalTracker = &MetadataTracker{ + parserData: make(map[string]map[string]map[string]bool), + catalogerData: make(map[string]map[string]bool), + parserPackageTypes: make(map[string]map[string]map[string]bool), + catalogerPackageTypes: make(map[string]map[string]bool), + } + }) + return globalTracker +} + +// RecordParser records a metadata type usage for a parser function +func (t *MetadataTracker) RecordParser(packageName, parserFunction, metadataType string) { + if packageName == "" || parserFunction == "" || metadataType == "" { + return + } + + // filter out non-metadata types + if metadataType == "pkg.Package" || metadataType == "" { + return + } + + t.mu.Lock() + defer t.mu.Unlock() + + if t.parserData[packageName] == nil { + t.parserData[packageName] = make(map[string]map[string]bool) + } + + if t.parserData[packageName][parserFunction] == nil { + t.parserData[packageName][parserFunction] = make(map[string]bool) + } + + t.parserData[packageName][parserFunction][metadataType] = true +} + +// RecordCataloger records a metadata type usage for a cataloger +func (t *MetadataTracker) RecordCataloger(catalogerName, metadataType string) { + if catalogerName == "" || metadataType == "" { + return + } + + // filter out non-metadata types + if metadataType == "pkg.Package" || metadataType == "" { + return + } + + t.mu.Lock() + defer t.mu.Unlock() + + if t.catalogerData[catalogerName] == nil { + t.catalogerData[catalogerName] = make(map[string]bool) + } + + t.catalogerData[catalogerName][metadataType] = true +} + +// RecordParserPackageType records a package type usage for a parser function +func (t *MetadataTracker) RecordParserPackageType(packageName, parserFunction, pkgType string) { + if packageName == "" || parserFunction == "" || pkgType == "" { + return + } + + // filter out unknown types + if pkgType == pkg.UnknownPkg.String() || pkgType == "" { + return + } + + t.mu.Lock() + defer t.mu.Unlock() + + if t.parserPackageTypes[packageName] == nil { + t.parserPackageTypes[packageName] = make(map[string]map[string]bool) + } + + if t.parserPackageTypes[packageName][parserFunction] == nil { + t.parserPackageTypes[packageName][parserFunction] = make(map[string]bool) + } + + t.parserPackageTypes[packageName][parserFunction][pkgType] = true +} + +// RecordCatalogerPackageType records a package type usage for a cataloger +func (t *MetadataTracker) RecordCatalogerPackageType(catalogerName, pkgType string) { + if catalogerName == "" || pkgType == "" { + return + } + + // filter out unknown types + if pkgType == pkg.UnknownPkg.String() || pkgType == "" { + return + } + + t.mu.Lock() + defer t.mu.Unlock() + + if t.catalogerPackageTypes[catalogerName] == nil { + t.catalogerPackageTypes[catalogerName] = make(map[string]bool) + } + + t.catalogerPackageTypes[catalogerName][pkgType] = true +} + +// RecordParserPackageMetadata extracts and records metadata type and package type from a package for a parser +func (t *MetadataTracker) RecordParserPackageMetadata(packageName, parserFunction string, p pkg.Package) { + if p.Metadata != nil { + metadataType := getMetadataTypeName(p.Metadata) + if metadataType != "" { + t.RecordParser(packageName, parserFunction, metadataType) + } + } + + // record package type + t.RecordParserPackageType(packageName, parserFunction, string(p.Type)) +} + +// RecordCatalogerPackageMetadata extracts and records metadata type and package type from a package for a cataloger +func (t *MetadataTracker) RecordCatalogerPackageMetadata(catalogerName string, p pkg.Package) { + if p.Metadata != nil { + metadataType := getMetadataTypeName(p.Metadata) + if metadataType != "" { + t.RecordCataloger(catalogerName, metadataType) + } + } + + // record package type + t.RecordCatalogerPackageType(catalogerName, string(p.Type)) +} + +// aggregateObservations aggregates package and relationship observations into metadata types, package types, and observations. +// this is used by both parser and cataloger observation recording. +func aggregateObservations( + metadataTypes *[]string, + packageTypes *[]string, + obs *pkgtestobservation.Observations, + pkgs []pkg.Package, + relationships []artifact.Relationship, +) { + // aggregate observations from packages + for _, p := range pkgs { + // metadata types + if p.Metadata != nil { + metadataType := getMetadataTypeName(p.Metadata) + if metadataType != "" && !contains(*metadataTypes, metadataType) { + *metadataTypes = append(*metadataTypes, metadataType) + } + } + + // package types + pkgType := string(p.Type) + if pkgType != "" && pkgType != pkg.UnknownPkg.String() && !contains(*packageTypes, pkgType) { + *packageTypes = append(*packageTypes, pkgType) + } + + // license observation + if !p.Licenses.Empty() { + obs.License = true + } + + // file listing observation + if fileOwner, ok := p.Metadata.(pkg.FileOwner); ok { + files := fileOwner.OwnedFiles() + if len(files) > 0 { + obs.FileListing.Found = true + obs.FileListing.Count += len(files) + } + } + + // file digests observation + if hasFileDigests(p.Metadata) { + obs.FileDigests.Found = true + obs.FileDigests.Count++ + } + + // integrity hash observation + if hasIntegrityHash(p.Metadata) { + obs.IntegrityHash.Found = true + obs.IntegrityHash.Count++ + } + } + + // relationship observations + depCount := countDependencyRelationships(relationships) + if depCount > 0 { + obs.Relationships.Found = true + obs.Relationships.Count = depCount + } + + // sort arrays for consistency + sort.Strings(*metadataTypes) + sort.Strings(*packageTypes) +} + +// ensureObservationsInitialized ensures t.observations is initialized and package name is set. +// must be called with t.mu locked. +func (t *MetadataTracker) ensureObservationsInitialized(packageName string) { + if t.observations == nil { + t.observations = &pkgtestobservation.Test{ + Package: packageName, + Catalogers: make(map[string]*pkgtestobservation.Cataloger), + Parsers: make(map[string]*pkgtestobservation.Parser), + } + return + } + + // update package name if not set (for the first test) or if it matches (for subsequent tests in same package) + if t.observations.Package == "" || t.observations.Package == packageName { + t.observations.Package = packageName + } +} + +// getOrCreateParser gets an existing parser observation or creates a new one. +// must be called with t.mu locked. +func (t *MetadataTracker) getOrCreateParser(parserFunction string) *pkgtestobservation.Parser { + if t.observations.Parsers[parserFunction] == nil { + t.observations.Parsers[parserFunction] = &pkgtestobservation.Parser{ + MetadataTypes: []string{}, + PackageTypes: []string{}, + Observations: pkgtestobservation.Observations{}, + } + } + return t.observations.Parsers[parserFunction] +} + +// getOrCreateCataloger gets an existing cataloger observation or creates a new one. +// must be called with t.mu locked. +func (t *MetadataTracker) getOrCreateCataloger(catalogerName string) *pkgtestobservation.Cataloger { + if t.observations.Catalogers[catalogerName] == nil { + t.observations.Catalogers[catalogerName] = &pkgtestobservation.Cataloger{ + MetadataTypes: []string{}, + PackageTypes: []string{}, + Observations: pkgtestobservation.Observations{}, + } + } + return t.observations.Catalogers[catalogerName] +} + +// RecordParserObservations records comprehensive observations for a parser. +func (t *MetadataTracker) RecordParserObservations( + packageName, parserFunction string, + pkgs []pkg.Package, + relationships []artifact.Relationship, +) { + if packageName == "" || parserFunction == "" { + return + } + + t.mu.Lock() + defer t.mu.Unlock() + + t.ensureObservationsInitialized(packageName) + parser := t.getOrCreateParser(parserFunction) + aggregateObservations(&parser.MetadataTypes, &parser.PackageTypes, &parser.Observations, pkgs, relationships) +} + +// RecordCatalogerObservations records comprehensive observations for a cataloger. +func (t *MetadataTracker) RecordCatalogerObservations( + packageName, catalogerName string, + pkgs []pkg.Package, + relationships []artifact.Relationship, +) { + if packageName == "" || catalogerName == "" { + return + } + + t.mu.Lock() + defer t.mu.Unlock() + + t.ensureObservationsInitialized(packageName) + cataloger := t.getOrCreateCataloger(catalogerName) + aggregateObservations(&cataloger.MetadataTypes, &cataloger.PackageTypes, &cataloger.Observations, pkgs, relationships) +} + +// ===== Metadata Type and Capability Detection ===== +// These functions use reflection to inspect package metadata and detect capabilities. +// They are best-effort and may not catch all cases. + +// getMetadataTypeName returns the fully qualified type name of metadata (e.g., "pkg.ApkDBEntry"). +// extracts just the last package path segment to keep names concise. +func getMetadataTypeName(metadata interface{}) string { + if metadata == nil { + return "" + } + + t := reflect.TypeOf(metadata) + if t == nil { + return "" + } + + // handle pointers + if t.Kind() == reflect.Ptr { + t = t.Elem() + } + + // return pkg path + type name (e.g., "pkg.ApkDBEntry") + if t.PkgPath() != "" { + // extract just "pkg" from "github.com/anchore/syft/syft/pkg" + pkgPath := lastPathSegment(t.PkgPath()) + return pkgPath + "." + t.Name() + } + + return t.Name() +} + +// lastPathSegment extracts the last segment from a package path. +// for example: "github.com/anchore/syft/syft/pkg" -> "pkg" +func lastPathSegment(path string) string { + for i := len(path) - 1; i >= 0; i-- { + if path[i] == '/' { + return path[i+1:] + } + } + return path +} + +// hasIntegrityHash checks if metadata contains an integrity hash field. +// note: this uses a best-effort approach checking common field names. +// DO NOT depend on these values in auto-generated capabilities definitions - use for test validation only. +func hasIntegrityHash(metadata interface{}) bool { + v := dereferenceToStruct(metadata) + if !v.IsValid() || v.Kind() != reflect.Struct { + return false + } + + for _, fieldName := range commonPackageIntegrityFields { + if hasPopulatedStringField(v, fieldName) { + return true + } + } + return false +} + +// hasFileDigests checks if metadata contains file records with digests. +// note: uses a best-effort approach for detection. +// DO NOT depend on these values in auto-generated capabilities definitions - use for test validation only. +func hasFileDigests(metadata interface{}) bool { + v := dereferenceToStruct(metadata) + if !v.IsValid() || v.Kind() != reflect.Struct { + return false + } + + filesField := v.FieldByName("Files") + if !filesField.IsValid() || filesField.Kind() != reflect.Slice { + return false + } + + // check if any file record has a Digest field populated + for i := 0; i < filesField.Len(); i++ { + if hasPopulatedDigest(filesField.Index(i)) { + return true + } + } + return false +} + +// dereferenceToStruct handles pointer dereferencing and returns the underlying value. +// returns an invalid value if the input is nil or not convertible to a struct. +func dereferenceToStruct(v interface{}) reflect.Value { + if v == nil { + return reflect.Value{} + } + + val := reflect.ValueOf(v) + if val.Kind() == reflect.Ptr { + if val.IsNil() { + return reflect.Value{} + } + val = val.Elem() + } + return val +} + +// hasPopulatedStringField checks if a struct has a non-empty string field with the given name. +func hasPopulatedStringField(v reflect.Value, fieldName string) bool { + field := v.FieldByName(fieldName) + return field.IsValid() && field.Kind() == reflect.String && field.String() != "" +} + +// hasPopulatedDigest checks if a file record has a populated Digest field. +func hasPopulatedDigest(fileRecord reflect.Value) bool { + fileRecord = dereferenceToStruct(fileRecord.Interface()) + if !fileRecord.IsValid() || fileRecord.Kind() != reflect.Struct { + return false + } + + digestField := fileRecord.FieldByName("Digest") + if !digestField.IsValid() { + return false + } + + // check if digest is a pointer and not nil, or a non-zero value + switch digestField.Kind() { + case reflect.Ptr: + return !digestField.IsNil() + case reflect.String: + return digestField.String() != "" + case reflect.Struct: + return !digestField.IsZero() + } + return false +} + +// ===== Utility Functions ===== + +// countDependencyRelationships counts the number of dependency relationships. +func countDependencyRelationships(relationships []artifact.Relationship) int { + count := 0 + for _, rel := range relationships { + if rel.Type == artifact.DependencyOfRelationship { + count++ + } + } + return count +} + +// contains checks if a string slice contains a specific string. +func contains(slice []string, item string) bool { + for _, s := range slice { + if s == item { + return true + } + } + return false +} + +// ===== Result Writing ===== + +// WriteResults writes the collected observation data to test-fixtures/test-observations.json. +func (t *MetadataTracker) WriteResults() error { + t.mu.Lock() + defer t.mu.Unlock() + + if t.observations == nil { + // no data to write + return nil + } + + // create output directory + outDir := "test-fixtures" + if err := os.MkdirAll(outDir, 0755); err != nil { + return err + } + + // write unified test-observations.json + t.observations.UpdatedAt = time.Now().UTC() + + filename := filepath.Join(outDir, "test-observations.json") + return writeJSONFile(filename, t.observations) +} + +// writeJSONFile writes data as pretty-printed JSON to the specified path. +func writeJSONFile(path string, data interface{}) error { + file, err := os.Create(path) + if err != nil { + return err + } + defer file.Close() + + encoder := json.NewEncoder(file) + encoder.SetIndent("", " ") + return encoder.Encode(data) +} + +// WriteResultsIfEnabled writes results if tracking is enabled. +// this is typically called via t.Cleanup() in tests. +func WriteResultsIfEnabled() error { + tracker := getTracker() + return tracker.WriteResults() +} diff --git a/syft/pkg/cataloger/internal/pkgtest/observing_resolver.go b/syft/pkg/cataloger/internal/pkgtest/observing_resolver.go index a05d0823b..49fc9c51f 100644 --- a/syft/pkg/cataloger/internal/pkgtest/observing_resolver.go +++ b/syft/pkg/cataloger/internal/pkgtest/observing_resolver.go @@ -1,3 +1,5 @@ +// Package pkgtest provides test helpers for cataloger and parser testing, +// including resolver decorators that track file access patterns. package pkgtest import ( @@ -13,29 +15,36 @@ import ( var _ file.Resolver = (*ObservingResolver)(nil) +// ObservingResolver wraps a file.Resolver to observe and track all file access patterns. +// it records what paths were queried, what was returned, and what file contents were read. +// this is useful for validating that catalogers use appropriate glob patterns and don't over-read files. type ObservingResolver struct { decorated file.Resolver - pathQueries map[string][]string - pathResponses []file.Location - contentQueries []file.Location - emptyPathResponses map[string][]string + pathQueries map[string][]string // method name -> list of query patterns + pathResponses []file.Location // all locations successfully returned + contentQueries []file.Location // all locations whose content was read + emptyPathResponses map[string][]string // method name -> paths that returned empty results } +// NewObservingResolver creates a new ObservingResolver that wraps the given resolver. func NewObservingResolver(resolver file.Resolver) *ObservingResolver { return &ObservingResolver{ decorated: resolver, - pathResponses: make([]file.Location, 0), - emptyPathResponses: make(map[string][]string), pathQueries: make(map[string][]string), + pathResponses: make([]file.Location, 0), + contentQueries: make([]file.Location, 0), + emptyPathResponses: make(map[string][]string), } } -// testing helpers... +// ===== Test Assertion Helpers ===== +// these methods are used by tests to validate expected file access patterns. +// ObservedPathQuery checks if a specific path pattern was queried. func (r *ObservingResolver) ObservedPathQuery(input string) bool { - for _, vs := range r.pathQueries { - for _, v := range vs { - if v == input { + for _, queries := range r.pathQueries { + for _, query := range queries { + if query == input { return true } } @@ -43,6 +52,7 @@ func (r *ObservingResolver) ObservedPathQuery(input string) bool { return false } +// ObservedPathResponses checks if a specific path was returned in any response. func (r *ObservingResolver) ObservedPathResponses(path string) bool { for _, loc := range r.pathResponses { if loc.RealPath == path { @@ -52,6 +62,7 @@ func (r *ObservingResolver) ObservedPathResponses(path string) bool { return false } +// ObservedContentQueries checks if a specific file's content was read. func (r *ObservingResolver) ObservedContentQueries(path string) bool { for _, loc := range r.contentQueries { if loc.RealPath == path { @@ -61,6 +72,7 @@ func (r *ObservingResolver) ObservedContentQueries(path string) bool { return false } +// AllContentQueries returns a deduplicated list of all file paths whose content was read. func (r *ObservingResolver) AllContentQueries() []string { observed := strset.New() for _, loc := range r.contentQueries { @@ -69,155 +81,166 @@ func (r *ObservingResolver) AllContentQueries() []string { return observed.List() } +// AllPathQueries returns all path query patterns grouped by method name. func (r *ObservingResolver) AllPathQueries() map[string][]string { return r.pathQueries } +// PruneUnfulfilledPathResponses removes specified paths from the unfulfilled requests tracking. +// ignore maps method names to paths that should be ignored for that method. +// ignorePaths lists paths that should be ignored for all methods. func (r *ObservingResolver) PruneUnfulfilledPathResponses(ignore map[string][]string, ignorePaths ...string) { - if ignore == nil { - return - } - // remove any paths that were ignored for specific calls - for k, v := range ignore { - results := r.emptyPathResponses[k] - for _, ig := range v { - for i, result := range results { - if result == ig { - results = append(results[:i], results[i+1:]...) - break - } - } - } - if len(results) > 0 { - r.emptyPathResponses[k] = results - } else { - delete(r.emptyPathResponses, k) + // remove paths ignored for specific methods + for methodName, pathsToIgnore := range ignore { + r.emptyPathResponses[methodName] = removeStrings(r.emptyPathResponses[methodName], pathsToIgnore) + if len(r.emptyPathResponses[methodName]) == 0 { + delete(r.emptyPathResponses, methodName) } } - // remove any paths that were ignored for all calls - for _, ig := range ignorePaths { - for k, v := range r.emptyPathResponses { - for i, result := range v { - if result == ig { - v = append(v[:i], v[i+1:]...) - break - } - } - if len(v) > 0 { - r.emptyPathResponses[k] = v - } else { - delete(r.emptyPathResponses, k) + // remove paths ignored for all methods + if len(ignorePaths) > 0 { + for methodName := range r.emptyPathResponses { + r.emptyPathResponses[methodName] = removeStrings(r.emptyPathResponses[methodName], ignorePaths) + if len(r.emptyPathResponses[methodName]) == 0 { + delete(r.emptyPathResponses, methodName) } } } } +// HasUnfulfilledPathRequests returns true if there are any paths that were queried but returned empty. func (r *ObservingResolver) HasUnfulfilledPathRequests() bool { return len(r.emptyPathResponses) > 0 } +// PrettyUnfulfilledPathRequests returns a formatted string of all unfulfilled path requests. func (r *ObservingResolver) PrettyUnfulfilledPathRequests() string { - var res string - var keys []string + if len(r.emptyPathResponses) == 0 { + return "" + } + var keys []string for k := range r.emptyPathResponses { keys = append(keys, k) } - sort.Strings(keys) + var result string for _, k := range keys { - res += fmt.Sprintf(" %s: %+v\n", k, r.emptyPathResponses[k]) + result += fmt.Sprintf(" %s: %+v\n", k, r.emptyPathResponses[k]) } - return res + return result } -// For the file path resolver... +// removeStrings removes all occurrences of toRemove from slice. +func removeStrings(slice []string, toRemove []string) []string { + if len(toRemove) == 0 { + return slice + } -func (r *ObservingResolver) addPathQuery(name string, input ...string) { - r.pathQueries[name] = append(r.pathQueries[name], input...) + // create a set for O(1) lookup + removeSet := make(map[string]bool) + for _, s := range toRemove { + removeSet[s] = true + } + + // filter the slice + result := make([]string, 0, len(slice)) + for _, s := range slice { + if !removeSet[s] { + result = append(result, s) + } + } + return result } -func (r *ObservingResolver) addPathResponse(locs ...file.Location) { +// ===== Internal Tracking Helpers ===== + +// recordQuery records a path query for a given method. +func (r *ObservingResolver) recordQuery(methodName string, queries ...string) { + r.pathQueries[methodName] = append(r.pathQueries[methodName], queries...) +} + +// recordResponses records successful path responses and tracks any unfulfilled queries. +func (r *ObservingResolver) recordResponses(methodName string, locs []file.Location, queriedPaths ...string) { r.pathResponses = append(r.pathResponses, locs...) -} -func (r *ObservingResolver) addEmptyPathResponse(name string, locs []file.Location, paths ...string) { - if len(locs) == 0 { - results := r.emptyPathResponses[name] - results = append(results, paths...) - r.emptyPathResponses[name] = results + // track paths that returned no results + if len(locs) == 0 && len(queriedPaths) > 0 { + r.emptyPathResponses[methodName] = append(r.emptyPathResponses[methodName], queriedPaths...) } } +// ===== file.Resolver Implementation ===== +// these methods delegate to the wrapped resolver while recording observations. + +// FilesByPath returns files matching the given paths. func (r *ObservingResolver) FilesByPath(paths ...string) ([]file.Location, error) { - name := "FilesByPath" - r.addPathQuery(name, paths...) + const methodName = "FilesByPath" + r.recordQuery(methodName, paths...) locs, err := r.decorated.FilesByPath(paths...) + r.recordResponses(methodName, locs, paths...) - r.addPathResponse(locs...) - r.addEmptyPathResponse(name, locs, paths...) return locs, err } +// FilesByGlob returns files matching the given glob patterns. func (r *ObservingResolver) FilesByGlob(patterns ...string) ([]file.Location, error) { - name := "FilesByGlob" - r.addPathQuery(name, patterns...) + const methodName = "FilesByGlob" + r.recordQuery(methodName, patterns...) locs, err := r.decorated.FilesByGlob(patterns...) + r.recordResponses(methodName, locs, patterns...) - r.addPathResponse(locs...) - r.addEmptyPathResponse(name, locs, patterns...) return locs, err } +// FilesByMIMEType returns files matching the given MIME types. func (r *ObservingResolver) FilesByMIMEType(types ...string) ([]file.Location, error) { - name := "FilesByMIMEType" - r.addPathQuery(name, types...) + const methodName = "FilesByMIMEType" + r.recordQuery(methodName, types...) locs, err := r.decorated.FilesByMIMEType(types...) + r.recordResponses(methodName, locs, types...) - r.addPathResponse(locs...) - r.addEmptyPathResponse(name, locs, types...) return locs, err } -func (r *ObservingResolver) RelativeFileByPath(l file.Location, path string) *file.Location { - name := "RelativeFileByPath" - r.addPathQuery(name, path) +// RelativeFileByPath returns a file at a path relative to the given location. +func (r *ObservingResolver) RelativeFileByPath(location file.Location, path string) *file.Location { + const methodName = "RelativeFileByPath" + r.recordQuery(methodName, path) - loc := r.decorated.RelativeFileByPath(l, path) + loc := r.decorated.RelativeFileByPath(location, path) if loc != nil { - r.addPathResponse(*loc) + r.pathResponses = append(r.pathResponses, *loc) } else { - results := r.emptyPathResponses[name] - results = append(results, path) - r.emptyPathResponses[name] = results + r.emptyPathResponses[methodName] = append(r.emptyPathResponses[methodName], path) } + return loc } -// For the content resolver methods... - +// FileContentsByLocation returns a reader for the contents of the file at the given location. func (r *ObservingResolver) FileContentsByLocation(location file.Location) (io.ReadCloser, error) { r.contentQueries = append(r.contentQueries, location) - reader, err := r.decorated.FileContentsByLocation(location) - return reader, err + return r.decorated.FileContentsByLocation(location) } -// For the remaining resolver methods... - +// AllLocations returns all file locations known to the resolver. func (r *ObservingResolver) AllLocations(ctx context.Context) <-chan file.Location { return r.decorated.AllLocations(ctx) } -func (r *ObservingResolver) HasPath(s string) bool { - return r.decorated.HasPath(s) +// HasPath returns true if the resolver knows about the given path. +func (r *ObservingResolver) HasPath(path string) bool { + return r.decorated.HasPath(path) } +// FileMetadataByLocation returns metadata for the file at the given location. func (r *ObservingResolver) FileMetadataByLocation(location file.Location) (file.Metadata, error) { return r.decorated.FileMetadataByLocation(location) } diff --git a/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go b/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go index 493d44355..08e789fbb 100644 --- a/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go +++ b/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go @@ -6,6 +6,8 @@ import ( "io" "os" "path/filepath" + "reflect" + "runtime" "sort" "strings" "sync" @@ -56,6 +58,7 @@ type CatalogTester struct { packageStringer func(pkg.Package) string customAssertions []func(t *testing.T, pkgs []pkg.Package, relationships []artifact.Relationship) context context.Context + skipTestObservations bool } func Context() context.Context { @@ -260,13 +263,23 @@ func (p *CatalogTester) IgnoreUnfulfilledPathResponses(paths ...string) *Catalog return p } +func (p *CatalogTester) WithoutTestObserver() *CatalogTester { + p.skipTestObservations = true + return p +} + func (p *CatalogTester) TestParser(t *testing.T, parser generic.Parser) { t.Helper() pkgs, relationships, err := parser(p.context, p.resolver, p.env, p.reader) + // only test for errors if explicitly requested if p.wantErr != nil { p.wantErr(t, err) } + + // track metadata types for cataloger discovery + p.trackParserMetadata(t, parser, pkgs, relationships) + p.assertPkgs(t, pkgs, relationships) } @@ -292,6 +305,9 @@ func (p *CatalogTester) TestCataloger(t *testing.T, cataloger pkg.Cataloger) { p.wantErr(t, err) } + // track metadata types for cataloger discovery + p.trackCatalogerMetadata(t, cataloger, pkgs, relationships) + if p.assertResultExpectations { p.assertPkgs(t, pkgs, relationships) } @@ -458,3 +474,163 @@ func stringPackage(p pkg.Package) string { return fmt.Sprintf("%s @ %s (%s)", p.Name, p.Version, loc) } + +// getFunctionName extracts the function name from a function pointer using reflection +func getFunctionName(fn interface{}) string { + // get the function pointer + ptr := reflect.ValueOf(fn).Pointer() + + // get the function details + funcForPC := runtime.FuncForPC(ptr) + if funcForPC == nil { + return "" + } + + fullName := funcForPC.Name() + + // extract just the function name from the full path + // e.g., "github.com/anchore/syft/syft/pkg/cataloger/python.parseRequirementsTxt" + // -> "parseRequirementsTxt" + parts := strings.Split(fullName, ".") + if len(parts) > 0 { + name := parts[len(parts)-1] + // strip the -fm suffix that Go's reflection adds for methods + // e.g., "parsePackageLock-fm" -> "parsePackageLock" + return strings.TrimSuffix(name, "-fm") + } + + return fullName +} + +// getCatalogerName extracts the cataloger name from the test context or cataloger name +func getCatalogerName(_ *testing.T, cataloger pkg.Cataloger) string { + // use the cataloger's name method if available + return cataloger.Name() +} + +// getPackagePath extracts the package path from a function name +// e.g., "github.com/anchore/syft/syft/pkg/cataloger/python.parseRequirementsTxt" -> "python" +func getPackagePath(fn interface{}) string { + ptr := reflect.ValueOf(fn).Pointer() + funcForPC := runtime.FuncForPC(ptr) + if funcForPC == nil { + return "" + } + + fullName := funcForPC.Name() + + // extract package name from path + // e.g., "github.com/anchore/syft/syft/pkg/cataloger/python.parseRequirementsTxt" + // -> "python" + if strings.Contains(fullName, "/cataloger/") { + parts := strings.Split(fullName, "/cataloger/") + if len(parts) > 1 { + // get the next segment after "/cataloger/" + remaining := parts[1] + // split by "." to get package name + pkgParts := strings.Split(remaining, ".") + if len(pkgParts) > 0 { + return pkgParts[0] + } + } + } + + return "" +} + +// getPackagePathFromCataloger extracts the package path from the caller's file path +// For generic catalogers, the cataloger type is from the generic package, but we need +// the package where the test is defined (e.g., rust, python, etc.) +func getPackagePathFromCataloger(_ pkg.Cataloger) string { + // walk up the call stack to find the test file + // we're looking for a file in the cataloger directory structure + for i := 0; i < 10; i++ { + _, file, _, ok := runtime.Caller(i) + if !ok { + break + } + + // extract package name from file path + // e.g., "/Users/.../syft/pkg/cataloger/rust/cataloger_test.go" -> "rust" + if strings.Contains(file, "/cataloger/") { + parts := strings.Split(file, "/cataloger/") + if len(parts) > 1 { + // get the next segment after "/cataloger/" + remaining := parts[1] + // split by "/" to get package name + pkgParts := strings.Split(remaining, "/") + if len(pkgParts) > 0 && pkgParts[0] != "internal" { + return pkgParts[0] + } + } + } + } + + return "" +} + +// trackParserMetadata records metadata types for a parser function +func (p *CatalogTester) trackParserMetadata(t *testing.T, parser generic.Parser, pkgs []pkg.Package, relationships []artifact.Relationship) { + if p.skipTestObservations { + return + } + + parserName := getFunctionName(parser) + if parserName == "" { + return + } + + // try to infer package name from function path + packageName := getPackagePath(parser) + if packageName == "" { + return + } + + tracker := getTracker() + + // old tracking (still used by metadata discovery) + for _, pkg := range pkgs { + tracker.RecordParserPackageMetadata(packageName, parserName, pkg) + } + + // new unified observations with capability tracking + tracker.RecordParserObservations(packageName, parserName, pkgs, relationships) + + // ensure results are written when tests complete + t.Cleanup(func() { + _ = WriteResultsIfEnabled() + }) +} + +// trackCatalogerMetadata records metadata types for a cataloger +func (p *CatalogTester) trackCatalogerMetadata(t *testing.T, cataloger pkg.Cataloger, pkgs []pkg.Package, relationships []artifact.Relationship) { + if p.skipTestObservations { + return + } + + catalogerName := getCatalogerName(t, cataloger) + if catalogerName == "" { + return + } + + // try to infer package name from cataloger type + packageName := getPackagePathFromCataloger(cataloger) + if packageName == "" { + return + } + + tracker := getTracker() + + // old tracking (still used by metadata discovery) + for _, pkg := range pkgs { + tracker.RecordCatalogerPackageMetadata(catalogerName, pkg) + } + + // new unified observations with capability tracking + tracker.RecordCatalogerObservations(packageName, catalogerName, pkgs, relationships) + + // ensure results are written when tests complete + t.Cleanup(func() { + _ = WriteResultsIfEnabled() + }) +} diff --git a/syft/pkg/cataloger/javascript/parse_yarn_lock_test.go b/syft/pkg/cataloger/javascript/parse_yarn_lock_test.go index f6719db4a..0cf6b9dba 100644 --- a/syft/pkg/cataloger/javascript/parse_yarn_lock_test.go +++ b/syft/pkg/cataloger/javascript/parse_yarn_lock_test.go @@ -284,7 +284,11 @@ func TestSearchYarnForLicenses(t *testing.T) { } tc.config.NPMBaseURL = url adapter := newGenericYarnLockAdapter(tc.config) - pkgtest.TestFileParser(t, fixture, adapter.parseYarnLock, tc.expectedPackages, nil) + pkgtest.NewCatalogTester(). + FromFile(t, fixture). + Expects(tc.expectedPackages, nil). + WithoutTestObserver(). // this is an online test, thus not the default configuration + TestParser(t, adapter.parseYarnLock) }) } } diff --git a/syft/pkg/type.go b/syft/pkg/type.go index 7dea22586..6ac815f0e 100644 --- a/syft/pkg/type.go +++ b/syft/pkg/type.go @@ -7,6 +7,10 @@ import ( // Type represents a Package Type for or within a language ecosystem (there may be multiple package types within a language ecosystem) type Type string +func (t Type) String() string { + return string(t) +} + const ( // the full set of supported packages UnknownPkg Type = "UnknownPackage"