describe cataloger capabilities via test observations (#4318)

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Alex Goodman 2025-10-30 13:19:42 -04:00 committed by GitHub
parent 5db3a9bf55
commit 538430d65d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 854 additions and 85 deletions

View File

@ -0,0 +1,46 @@
package pkgtestobservation
import "time"
// Observations represents capability observations during testing
type Observations struct {
License bool `json:"license"`
Relationships Relationship `json:"relationships"`
FileListing Count `json:"file_listing"`
FileDigests Count `json:"file_digests"`
IntegrityHash Count `json:"integrity_hash"`
}
// Relationship tracks dependency relationship observations
type Relationship struct {
Found bool `json:"found"`
Count int `json:"count"`
}
// Count tracks whether a capability was found and how many times
type Count struct {
Found bool `json:"found"`
Count int `json:"count"`
}
// Test is the root structure for test-observations.json
type Test struct {
Package string `json:"package"`
UpdatedAt time.Time `json:"updated_at"`
Catalogers map[string]*Cataloger `json:"catalogers"`
Parsers map[string]*Parser `json:"parsers"`
}
// Parser captures all observations for a parser
type Parser struct {
MetadataTypes []string `json:"metadata_types"`
PackageTypes []string `json:"package_types"`
Observations Observations `json:"observations"`
}
// Cataloger captures all observations for a cataloger
type Cataloger struct {
MetadataTypes []string `json:"metadata_types"`
PackageTypes []string `json:"package_types"`
Observations Observations `json:"observations"`
}

2
syft/pkg/cataloger/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
# these are generated by pkgtest helpers, no need to check them in
**/test-fixtures/test-observations.json

View File

@ -0,0 +1,514 @@
// Package pkgtest provides test helpers for cataloger and parser testing,
// including automatic observation tracking for capability documentation.
package pkgtest
import (
"encoding/json"
"os"
"path/filepath"
"reflect"
"sort"
"sync"
"time"
"github.com/anchore/syft/internal/capabilities/pkgtestobservation"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
)
var (
globalTracker *MetadataTracker
globalTrackerOnce sync.Once
// commonPackageIntegrityFields are common field names used to store integrity hashes in package metadata.
// TODO: this is a best-effort list and may need to be expanded as new package types are added. Don't depend on this list to catch everything - it's only for test validation.
commonPackageIntegrityFields = []string{
"Integrity", "Checksum", "H1Digest",
"OutputHash", "PkgHash", "ContentHash",
"PkgHashExt", "Hash", "IntegrityHash",
}
)
// MetadataTracker collects metadata type and package type usage during test execution
type MetadataTracker struct {
mu sync.Mutex
parserData map[string]map[string]map[string]bool // package -> parser -> metadata types (set)
catalogerData map[string]map[string]bool // cataloger -> metadata types (set)
parserPackageTypes map[string]map[string]map[string]bool // package -> parser -> package types (set)
catalogerPackageTypes map[string]map[string]bool // cataloger -> package types (set)
// unified observations for the current test package
observations *pkgtestobservation.Test
}
// getTracker returns the singleton metadata tracker
func getTracker() *MetadataTracker {
globalTrackerOnce.Do(func() {
globalTracker = &MetadataTracker{
parserData: make(map[string]map[string]map[string]bool),
catalogerData: make(map[string]map[string]bool),
parserPackageTypes: make(map[string]map[string]map[string]bool),
catalogerPackageTypes: make(map[string]map[string]bool),
}
})
return globalTracker
}
// RecordParser records a metadata type usage for a parser function
func (t *MetadataTracker) RecordParser(packageName, parserFunction, metadataType string) {
if packageName == "" || parserFunction == "" || metadataType == "" {
return
}
// filter out non-metadata types
if metadataType == "pkg.Package" || metadataType == "" {
return
}
t.mu.Lock()
defer t.mu.Unlock()
if t.parserData[packageName] == nil {
t.parserData[packageName] = make(map[string]map[string]bool)
}
if t.parserData[packageName][parserFunction] == nil {
t.parserData[packageName][parserFunction] = make(map[string]bool)
}
t.parserData[packageName][parserFunction][metadataType] = true
}
// RecordCataloger records a metadata type usage for a cataloger
func (t *MetadataTracker) RecordCataloger(catalogerName, metadataType string) {
if catalogerName == "" || metadataType == "" {
return
}
// filter out non-metadata types
if metadataType == "pkg.Package" || metadataType == "" {
return
}
t.mu.Lock()
defer t.mu.Unlock()
if t.catalogerData[catalogerName] == nil {
t.catalogerData[catalogerName] = make(map[string]bool)
}
t.catalogerData[catalogerName][metadataType] = true
}
// RecordParserPackageType records a package type usage for a parser function
func (t *MetadataTracker) RecordParserPackageType(packageName, parserFunction, pkgType string) {
if packageName == "" || parserFunction == "" || pkgType == "" {
return
}
// filter out unknown types
if pkgType == pkg.UnknownPkg.String() || pkgType == "" {
return
}
t.mu.Lock()
defer t.mu.Unlock()
if t.parserPackageTypes[packageName] == nil {
t.parserPackageTypes[packageName] = make(map[string]map[string]bool)
}
if t.parserPackageTypes[packageName][parserFunction] == nil {
t.parserPackageTypes[packageName][parserFunction] = make(map[string]bool)
}
t.parserPackageTypes[packageName][parserFunction][pkgType] = true
}
// RecordCatalogerPackageType records a package type usage for a cataloger
func (t *MetadataTracker) RecordCatalogerPackageType(catalogerName, pkgType string) {
if catalogerName == "" || pkgType == "" {
return
}
// filter out unknown types
if pkgType == pkg.UnknownPkg.String() || pkgType == "" {
return
}
t.mu.Lock()
defer t.mu.Unlock()
if t.catalogerPackageTypes[catalogerName] == nil {
t.catalogerPackageTypes[catalogerName] = make(map[string]bool)
}
t.catalogerPackageTypes[catalogerName][pkgType] = true
}
// RecordParserPackageMetadata extracts and records metadata type and package type from a package for a parser
func (t *MetadataTracker) RecordParserPackageMetadata(packageName, parserFunction string, p pkg.Package) {
if p.Metadata != nil {
metadataType := getMetadataTypeName(p.Metadata)
if metadataType != "" {
t.RecordParser(packageName, parserFunction, metadataType)
}
}
// record package type
t.RecordParserPackageType(packageName, parserFunction, string(p.Type))
}
// RecordCatalogerPackageMetadata extracts and records metadata type and package type from a package for a cataloger
func (t *MetadataTracker) RecordCatalogerPackageMetadata(catalogerName string, p pkg.Package) {
if p.Metadata != nil {
metadataType := getMetadataTypeName(p.Metadata)
if metadataType != "" {
t.RecordCataloger(catalogerName, metadataType)
}
}
// record package type
t.RecordCatalogerPackageType(catalogerName, string(p.Type))
}
// aggregateObservations aggregates package and relationship observations into metadata types, package types, and observations.
// this is used by both parser and cataloger observation recording.
func aggregateObservations(
metadataTypes *[]string,
packageTypes *[]string,
obs *pkgtestobservation.Observations,
pkgs []pkg.Package,
relationships []artifact.Relationship,
) {
// aggregate observations from packages
for _, p := range pkgs {
// metadata types
if p.Metadata != nil {
metadataType := getMetadataTypeName(p.Metadata)
if metadataType != "" && !contains(*metadataTypes, metadataType) {
*metadataTypes = append(*metadataTypes, metadataType)
}
}
// package types
pkgType := string(p.Type)
if pkgType != "" && pkgType != pkg.UnknownPkg.String() && !contains(*packageTypes, pkgType) {
*packageTypes = append(*packageTypes, pkgType)
}
// license observation
if !p.Licenses.Empty() {
obs.License = true
}
// file listing observation
if fileOwner, ok := p.Metadata.(pkg.FileOwner); ok {
files := fileOwner.OwnedFiles()
if len(files) > 0 {
obs.FileListing.Found = true
obs.FileListing.Count += len(files)
}
}
// file digests observation
if hasFileDigests(p.Metadata) {
obs.FileDigests.Found = true
obs.FileDigests.Count++
}
// integrity hash observation
if hasIntegrityHash(p.Metadata) {
obs.IntegrityHash.Found = true
obs.IntegrityHash.Count++
}
}
// relationship observations
depCount := countDependencyRelationships(relationships)
if depCount > 0 {
obs.Relationships.Found = true
obs.Relationships.Count = depCount
}
// sort arrays for consistency
sort.Strings(*metadataTypes)
sort.Strings(*packageTypes)
}
// ensureObservationsInitialized ensures t.observations is initialized and package name is set.
// must be called with t.mu locked.
func (t *MetadataTracker) ensureObservationsInitialized(packageName string) {
if t.observations == nil {
t.observations = &pkgtestobservation.Test{
Package: packageName,
Catalogers: make(map[string]*pkgtestobservation.Cataloger),
Parsers: make(map[string]*pkgtestobservation.Parser),
}
return
}
// update package name if not set (for the first test) or if it matches (for subsequent tests in same package)
if t.observations.Package == "" || t.observations.Package == packageName {
t.observations.Package = packageName
}
}
// getOrCreateParser gets an existing parser observation or creates a new one.
// must be called with t.mu locked.
func (t *MetadataTracker) getOrCreateParser(parserFunction string) *pkgtestobservation.Parser {
if t.observations.Parsers[parserFunction] == nil {
t.observations.Parsers[parserFunction] = &pkgtestobservation.Parser{
MetadataTypes: []string{},
PackageTypes: []string{},
Observations: pkgtestobservation.Observations{},
}
}
return t.observations.Parsers[parserFunction]
}
// getOrCreateCataloger gets an existing cataloger observation or creates a new one.
// must be called with t.mu locked.
func (t *MetadataTracker) getOrCreateCataloger(catalogerName string) *pkgtestobservation.Cataloger {
if t.observations.Catalogers[catalogerName] == nil {
t.observations.Catalogers[catalogerName] = &pkgtestobservation.Cataloger{
MetadataTypes: []string{},
PackageTypes: []string{},
Observations: pkgtestobservation.Observations{},
}
}
return t.observations.Catalogers[catalogerName]
}
// RecordParserObservations records comprehensive observations for a parser.
func (t *MetadataTracker) RecordParserObservations(
packageName, parserFunction string,
pkgs []pkg.Package,
relationships []artifact.Relationship,
) {
if packageName == "" || parserFunction == "" {
return
}
t.mu.Lock()
defer t.mu.Unlock()
t.ensureObservationsInitialized(packageName)
parser := t.getOrCreateParser(parserFunction)
aggregateObservations(&parser.MetadataTypes, &parser.PackageTypes, &parser.Observations, pkgs, relationships)
}
// RecordCatalogerObservations records comprehensive observations for a cataloger.
func (t *MetadataTracker) RecordCatalogerObservations(
packageName, catalogerName string,
pkgs []pkg.Package,
relationships []artifact.Relationship,
) {
if packageName == "" || catalogerName == "" {
return
}
t.mu.Lock()
defer t.mu.Unlock()
t.ensureObservationsInitialized(packageName)
cataloger := t.getOrCreateCataloger(catalogerName)
aggregateObservations(&cataloger.MetadataTypes, &cataloger.PackageTypes, &cataloger.Observations, pkgs, relationships)
}
// ===== Metadata Type and Capability Detection =====
// These functions use reflection to inspect package metadata and detect capabilities.
// They are best-effort and may not catch all cases.
// getMetadataTypeName returns the fully qualified type name of metadata (e.g., "pkg.ApkDBEntry").
// extracts just the last package path segment to keep names concise.
func getMetadataTypeName(metadata interface{}) string {
if metadata == nil {
return ""
}
t := reflect.TypeOf(metadata)
if t == nil {
return ""
}
// handle pointers
if t.Kind() == reflect.Ptr {
t = t.Elem()
}
// return pkg path + type name (e.g., "pkg.ApkDBEntry")
if t.PkgPath() != "" {
// extract just "pkg" from "github.com/anchore/syft/syft/pkg"
pkgPath := lastPathSegment(t.PkgPath())
return pkgPath + "." + t.Name()
}
return t.Name()
}
// lastPathSegment extracts the last segment from a package path.
// for example: "github.com/anchore/syft/syft/pkg" -> "pkg"
func lastPathSegment(path string) string {
for i := len(path) - 1; i >= 0; i-- {
if path[i] == '/' {
return path[i+1:]
}
}
return path
}
// hasIntegrityHash checks if metadata contains an integrity hash field.
// note: this uses a best-effort approach checking common field names.
// DO NOT depend on these values in auto-generated capabilities definitions - use for test validation only.
func hasIntegrityHash(metadata interface{}) bool {
v := dereferenceToStruct(metadata)
if !v.IsValid() || v.Kind() != reflect.Struct {
return false
}
for _, fieldName := range commonPackageIntegrityFields {
if hasPopulatedStringField(v, fieldName) {
return true
}
}
return false
}
// hasFileDigests checks if metadata contains file records with digests.
// note: uses a best-effort approach for detection.
// DO NOT depend on these values in auto-generated capabilities definitions - use for test validation only.
func hasFileDigests(metadata interface{}) bool {
v := dereferenceToStruct(metadata)
if !v.IsValid() || v.Kind() != reflect.Struct {
return false
}
filesField := v.FieldByName("Files")
if !filesField.IsValid() || filesField.Kind() != reflect.Slice {
return false
}
// check if any file record has a Digest field populated
for i := 0; i < filesField.Len(); i++ {
if hasPopulatedDigest(filesField.Index(i)) {
return true
}
}
return false
}
// dereferenceToStruct handles pointer dereferencing and returns the underlying value.
// returns an invalid value if the input is nil or not convertible to a struct.
func dereferenceToStruct(v interface{}) reflect.Value {
if v == nil {
return reflect.Value{}
}
val := reflect.ValueOf(v)
if val.Kind() == reflect.Ptr {
if val.IsNil() {
return reflect.Value{}
}
val = val.Elem()
}
return val
}
// hasPopulatedStringField checks if a struct has a non-empty string field with the given name.
func hasPopulatedStringField(v reflect.Value, fieldName string) bool {
field := v.FieldByName(fieldName)
return field.IsValid() && field.Kind() == reflect.String && field.String() != ""
}
// hasPopulatedDigest checks if a file record has a populated Digest field.
func hasPopulatedDigest(fileRecord reflect.Value) bool {
fileRecord = dereferenceToStruct(fileRecord.Interface())
if !fileRecord.IsValid() || fileRecord.Kind() != reflect.Struct {
return false
}
digestField := fileRecord.FieldByName("Digest")
if !digestField.IsValid() {
return false
}
// check if digest is a pointer and not nil, or a non-zero value
switch digestField.Kind() {
case reflect.Ptr:
return !digestField.IsNil()
case reflect.String:
return digestField.String() != ""
case reflect.Struct:
return !digestField.IsZero()
}
return false
}
// ===== Utility Functions =====
// countDependencyRelationships counts the number of dependency relationships.
func countDependencyRelationships(relationships []artifact.Relationship) int {
count := 0
for _, rel := range relationships {
if rel.Type == artifact.DependencyOfRelationship {
count++
}
}
return count
}
// contains checks if a string slice contains a specific string.
func contains(slice []string, item string) bool {
for _, s := range slice {
if s == item {
return true
}
}
return false
}
// ===== Result Writing =====
// WriteResults writes the collected observation data to test-fixtures/test-observations.json.
func (t *MetadataTracker) WriteResults() error {
t.mu.Lock()
defer t.mu.Unlock()
if t.observations == nil {
// no data to write
return nil
}
// create output directory
outDir := "test-fixtures"
if err := os.MkdirAll(outDir, 0755); err != nil {
return err
}
// write unified test-observations.json
t.observations.UpdatedAt = time.Now().UTC()
filename := filepath.Join(outDir, "test-observations.json")
return writeJSONFile(filename, t.observations)
}
// writeJSONFile writes data as pretty-printed JSON to the specified path.
func writeJSONFile(path string, data interface{}) error {
file, err := os.Create(path)
if err != nil {
return err
}
defer file.Close()
encoder := json.NewEncoder(file)
encoder.SetIndent("", " ")
return encoder.Encode(data)
}
// WriteResultsIfEnabled writes results if tracking is enabled.
// this is typically called via t.Cleanup() in tests.
func WriteResultsIfEnabled() error {
tracker := getTracker()
return tracker.WriteResults()
}

View File

@ -1,3 +1,5 @@
// Package pkgtest provides test helpers for cataloger and parser testing,
// including resolver decorators that track file access patterns.
package pkgtest package pkgtest
import ( import (
@ -13,29 +15,36 @@ import (
var _ file.Resolver = (*ObservingResolver)(nil) var _ file.Resolver = (*ObservingResolver)(nil)
// ObservingResolver wraps a file.Resolver to observe and track all file access patterns.
// it records what paths were queried, what was returned, and what file contents were read.
// this is useful for validating that catalogers use appropriate glob patterns and don't over-read files.
type ObservingResolver struct { type ObservingResolver struct {
decorated file.Resolver decorated file.Resolver
pathQueries map[string][]string pathQueries map[string][]string // method name -> list of query patterns
pathResponses []file.Location pathResponses []file.Location // all locations successfully returned
contentQueries []file.Location contentQueries []file.Location // all locations whose content was read
emptyPathResponses map[string][]string emptyPathResponses map[string][]string // method name -> paths that returned empty results
} }
// NewObservingResolver creates a new ObservingResolver that wraps the given resolver.
func NewObservingResolver(resolver file.Resolver) *ObservingResolver { func NewObservingResolver(resolver file.Resolver) *ObservingResolver {
return &ObservingResolver{ return &ObservingResolver{
decorated: resolver, decorated: resolver,
pathResponses: make([]file.Location, 0),
emptyPathResponses: make(map[string][]string),
pathQueries: make(map[string][]string), pathQueries: make(map[string][]string),
pathResponses: make([]file.Location, 0),
contentQueries: make([]file.Location, 0),
emptyPathResponses: make(map[string][]string),
} }
} }
// testing helpers... // ===== Test Assertion Helpers =====
// these methods are used by tests to validate expected file access patterns.
// ObservedPathQuery checks if a specific path pattern was queried.
func (r *ObservingResolver) ObservedPathQuery(input string) bool { func (r *ObservingResolver) ObservedPathQuery(input string) bool {
for _, vs := range r.pathQueries { for _, queries := range r.pathQueries {
for _, v := range vs { for _, query := range queries {
if v == input { if query == input {
return true return true
} }
} }
@ -43,6 +52,7 @@ func (r *ObservingResolver) ObservedPathQuery(input string) bool {
return false return false
} }
// ObservedPathResponses checks if a specific path was returned in any response.
func (r *ObservingResolver) ObservedPathResponses(path string) bool { func (r *ObservingResolver) ObservedPathResponses(path string) bool {
for _, loc := range r.pathResponses { for _, loc := range r.pathResponses {
if loc.RealPath == path { if loc.RealPath == path {
@ -52,6 +62,7 @@ func (r *ObservingResolver) ObservedPathResponses(path string) bool {
return false return false
} }
// ObservedContentQueries checks if a specific file's content was read.
func (r *ObservingResolver) ObservedContentQueries(path string) bool { func (r *ObservingResolver) ObservedContentQueries(path string) bool {
for _, loc := range r.contentQueries { for _, loc := range r.contentQueries {
if loc.RealPath == path { if loc.RealPath == path {
@ -61,6 +72,7 @@ func (r *ObservingResolver) ObservedContentQueries(path string) bool {
return false return false
} }
// AllContentQueries returns a deduplicated list of all file paths whose content was read.
func (r *ObservingResolver) AllContentQueries() []string { func (r *ObservingResolver) AllContentQueries() []string {
observed := strset.New() observed := strset.New()
for _, loc := range r.contentQueries { for _, loc := range r.contentQueries {
@ -69,155 +81,166 @@ func (r *ObservingResolver) AllContentQueries() []string {
return observed.List() return observed.List()
} }
// AllPathQueries returns all path query patterns grouped by method name.
func (r *ObservingResolver) AllPathQueries() map[string][]string { func (r *ObservingResolver) AllPathQueries() map[string][]string {
return r.pathQueries return r.pathQueries
} }
// PruneUnfulfilledPathResponses removes specified paths from the unfulfilled requests tracking.
// ignore maps method names to paths that should be ignored for that method.
// ignorePaths lists paths that should be ignored for all methods.
func (r *ObservingResolver) PruneUnfulfilledPathResponses(ignore map[string][]string, ignorePaths ...string) { func (r *ObservingResolver) PruneUnfulfilledPathResponses(ignore map[string][]string, ignorePaths ...string) {
if ignore == nil { // remove paths ignored for specific methods
return for methodName, pathsToIgnore := range ignore {
} r.emptyPathResponses[methodName] = removeStrings(r.emptyPathResponses[methodName], pathsToIgnore)
// remove any paths that were ignored for specific calls if len(r.emptyPathResponses[methodName]) == 0 {
for k, v := range ignore { delete(r.emptyPathResponses, methodName)
results := r.emptyPathResponses[k]
for _, ig := range v {
for i, result := range results {
if result == ig {
results = append(results[:i], results[i+1:]...)
break
}
}
}
if len(results) > 0 {
r.emptyPathResponses[k] = results
} else {
delete(r.emptyPathResponses, k)
} }
} }
// remove any paths that were ignored for all calls // remove paths ignored for all methods
for _, ig := range ignorePaths { if len(ignorePaths) > 0 {
for k, v := range r.emptyPathResponses { for methodName := range r.emptyPathResponses {
for i, result := range v { r.emptyPathResponses[methodName] = removeStrings(r.emptyPathResponses[methodName], ignorePaths)
if result == ig { if len(r.emptyPathResponses[methodName]) == 0 {
v = append(v[:i], v[i+1:]...) delete(r.emptyPathResponses, methodName)
break
}
}
if len(v) > 0 {
r.emptyPathResponses[k] = v
} else {
delete(r.emptyPathResponses, k)
} }
} }
} }
} }
// HasUnfulfilledPathRequests returns true if there are any paths that were queried but returned empty.
func (r *ObservingResolver) HasUnfulfilledPathRequests() bool { func (r *ObservingResolver) HasUnfulfilledPathRequests() bool {
return len(r.emptyPathResponses) > 0 return len(r.emptyPathResponses) > 0
} }
// PrettyUnfulfilledPathRequests returns a formatted string of all unfulfilled path requests.
func (r *ObservingResolver) PrettyUnfulfilledPathRequests() string { func (r *ObservingResolver) PrettyUnfulfilledPathRequests() string {
var res string if len(r.emptyPathResponses) == 0 {
var keys []string return ""
}
var keys []string
for k := range r.emptyPathResponses { for k := range r.emptyPathResponses {
keys = append(keys, k) keys = append(keys, k)
} }
sort.Strings(keys) sort.Strings(keys)
var result string
for _, k := range keys { for _, k := range keys {
res += fmt.Sprintf(" %s: %+v\n", k, r.emptyPathResponses[k]) result += fmt.Sprintf(" %s: %+v\n", k, r.emptyPathResponses[k])
} }
return res return result
} }
// For the file path resolver... // removeStrings removes all occurrences of toRemove from slice.
func removeStrings(slice []string, toRemove []string) []string {
if len(toRemove) == 0 {
return slice
}
func (r *ObservingResolver) addPathQuery(name string, input ...string) { // create a set for O(1) lookup
r.pathQueries[name] = append(r.pathQueries[name], input...) removeSet := make(map[string]bool)
for _, s := range toRemove {
removeSet[s] = true
}
// filter the slice
result := make([]string, 0, len(slice))
for _, s := range slice {
if !removeSet[s] {
result = append(result, s)
}
}
return result
} }
func (r *ObservingResolver) addPathResponse(locs ...file.Location) { // ===== Internal Tracking Helpers =====
// recordQuery records a path query for a given method.
func (r *ObservingResolver) recordQuery(methodName string, queries ...string) {
r.pathQueries[methodName] = append(r.pathQueries[methodName], queries...)
}
// recordResponses records successful path responses and tracks any unfulfilled queries.
func (r *ObservingResolver) recordResponses(methodName string, locs []file.Location, queriedPaths ...string) {
r.pathResponses = append(r.pathResponses, locs...) r.pathResponses = append(r.pathResponses, locs...)
}
func (r *ObservingResolver) addEmptyPathResponse(name string, locs []file.Location, paths ...string) { // track paths that returned no results
if len(locs) == 0 { if len(locs) == 0 && len(queriedPaths) > 0 {
results := r.emptyPathResponses[name] r.emptyPathResponses[methodName] = append(r.emptyPathResponses[methodName], queriedPaths...)
results = append(results, paths...)
r.emptyPathResponses[name] = results
} }
} }
// ===== file.Resolver Implementation =====
// these methods delegate to the wrapped resolver while recording observations.
// FilesByPath returns files matching the given paths.
func (r *ObservingResolver) FilesByPath(paths ...string) ([]file.Location, error) { func (r *ObservingResolver) FilesByPath(paths ...string) ([]file.Location, error) {
name := "FilesByPath" const methodName = "FilesByPath"
r.addPathQuery(name, paths...) r.recordQuery(methodName, paths...)
locs, err := r.decorated.FilesByPath(paths...) locs, err := r.decorated.FilesByPath(paths...)
r.recordResponses(methodName, locs, paths...)
r.addPathResponse(locs...)
r.addEmptyPathResponse(name, locs, paths...)
return locs, err return locs, err
} }
// FilesByGlob returns files matching the given glob patterns.
func (r *ObservingResolver) FilesByGlob(patterns ...string) ([]file.Location, error) { func (r *ObservingResolver) FilesByGlob(patterns ...string) ([]file.Location, error) {
name := "FilesByGlob" const methodName = "FilesByGlob"
r.addPathQuery(name, patterns...) r.recordQuery(methodName, patterns...)
locs, err := r.decorated.FilesByGlob(patterns...) locs, err := r.decorated.FilesByGlob(patterns...)
r.recordResponses(methodName, locs, patterns...)
r.addPathResponse(locs...)
r.addEmptyPathResponse(name, locs, patterns...)
return locs, err return locs, err
} }
// FilesByMIMEType returns files matching the given MIME types.
func (r *ObservingResolver) FilesByMIMEType(types ...string) ([]file.Location, error) { func (r *ObservingResolver) FilesByMIMEType(types ...string) ([]file.Location, error) {
name := "FilesByMIMEType" const methodName = "FilesByMIMEType"
r.addPathQuery(name, types...) r.recordQuery(methodName, types...)
locs, err := r.decorated.FilesByMIMEType(types...) locs, err := r.decorated.FilesByMIMEType(types...)
r.recordResponses(methodName, locs, types...)
r.addPathResponse(locs...)
r.addEmptyPathResponse(name, locs, types...)
return locs, err return locs, err
} }
func (r *ObservingResolver) RelativeFileByPath(l file.Location, path string) *file.Location { // RelativeFileByPath returns a file at a path relative to the given location.
name := "RelativeFileByPath" func (r *ObservingResolver) RelativeFileByPath(location file.Location, path string) *file.Location {
r.addPathQuery(name, path) const methodName = "RelativeFileByPath"
r.recordQuery(methodName, path)
loc := r.decorated.RelativeFileByPath(l, path) loc := r.decorated.RelativeFileByPath(location, path)
if loc != nil { if loc != nil {
r.addPathResponse(*loc) r.pathResponses = append(r.pathResponses, *loc)
} else { } else {
results := r.emptyPathResponses[name] r.emptyPathResponses[methodName] = append(r.emptyPathResponses[methodName], path)
results = append(results, path)
r.emptyPathResponses[name] = results
} }
return loc return loc
} }
// For the content resolver methods... // FileContentsByLocation returns a reader for the contents of the file at the given location.
func (r *ObservingResolver) FileContentsByLocation(location file.Location) (io.ReadCloser, error) { func (r *ObservingResolver) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
r.contentQueries = append(r.contentQueries, location) r.contentQueries = append(r.contentQueries, location)
reader, err := r.decorated.FileContentsByLocation(location) return r.decorated.FileContentsByLocation(location)
return reader, err
} }
// For the remaining resolver methods... // AllLocations returns all file locations known to the resolver.
func (r *ObservingResolver) AllLocations(ctx context.Context) <-chan file.Location { func (r *ObservingResolver) AllLocations(ctx context.Context) <-chan file.Location {
return r.decorated.AllLocations(ctx) return r.decorated.AllLocations(ctx)
} }
func (r *ObservingResolver) HasPath(s string) bool { // HasPath returns true if the resolver knows about the given path.
return r.decorated.HasPath(s) func (r *ObservingResolver) HasPath(path string) bool {
return r.decorated.HasPath(path)
} }
// FileMetadataByLocation returns metadata for the file at the given location.
func (r *ObservingResolver) FileMetadataByLocation(location file.Location) (file.Metadata, error) { func (r *ObservingResolver) FileMetadataByLocation(location file.Location) (file.Metadata, error) {
return r.decorated.FileMetadataByLocation(location) return r.decorated.FileMetadataByLocation(location)
} }

View File

@ -6,6 +6,8 @@ import (
"io" "io"
"os" "os"
"path/filepath" "path/filepath"
"reflect"
"runtime"
"sort" "sort"
"strings" "strings"
"sync" "sync"
@ -56,6 +58,7 @@ type CatalogTester struct {
packageStringer func(pkg.Package) string packageStringer func(pkg.Package) string
customAssertions []func(t *testing.T, pkgs []pkg.Package, relationships []artifact.Relationship) customAssertions []func(t *testing.T, pkgs []pkg.Package, relationships []artifact.Relationship)
context context.Context context context.Context
skipTestObservations bool
} }
func Context() context.Context { func Context() context.Context {
@ -260,13 +263,23 @@ func (p *CatalogTester) IgnoreUnfulfilledPathResponses(paths ...string) *Catalog
return p return p
} }
func (p *CatalogTester) WithoutTestObserver() *CatalogTester {
p.skipTestObservations = true
return p
}
func (p *CatalogTester) TestParser(t *testing.T, parser generic.Parser) { func (p *CatalogTester) TestParser(t *testing.T, parser generic.Parser) {
t.Helper() t.Helper()
pkgs, relationships, err := parser(p.context, p.resolver, p.env, p.reader) pkgs, relationships, err := parser(p.context, p.resolver, p.env, p.reader)
// only test for errors if explicitly requested // only test for errors if explicitly requested
if p.wantErr != nil { if p.wantErr != nil {
p.wantErr(t, err) p.wantErr(t, err)
} }
// track metadata types for cataloger discovery
p.trackParserMetadata(t, parser, pkgs, relationships)
p.assertPkgs(t, pkgs, relationships) p.assertPkgs(t, pkgs, relationships)
} }
@ -292,6 +305,9 @@ func (p *CatalogTester) TestCataloger(t *testing.T, cataloger pkg.Cataloger) {
p.wantErr(t, err) p.wantErr(t, err)
} }
// track metadata types for cataloger discovery
p.trackCatalogerMetadata(t, cataloger, pkgs, relationships)
if p.assertResultExpectations { if p.assertResultExpectations {
p.assertPkgs(t, pkgs, relationships) p.assertPkgs(t, pkgs, relationships)
} }
@ -458,3 +474,163 @@ func stringPackage(p pkg.Package) string {
return fmt.Sprintf("%s @ %s (%s)", p.Name, p.Version, loc) return fmt.Sprintf("%s @ %s (%s)", p.Name, p.Version, loc)
} }
// getFunctionName extracts the function name from a function pointer using reflection
func getFunctionName(fn interface{}) string {
// get the function pointer
ptr := reflect.ValueOf(fn).Pointer()
// get the function details
funcForPC := runtime.FuncForPC(ptr)
if funcForPC == nil {
return ""
}
fullName := funcForPC.Name()
// extract just the function name from the full path
// e.g., "github.com/anchore/syft/syft/pkg/cataloger/python.parseRequirementsTxt"
// -> "parseRequirementsTxt"
parts := strings.Split(fullName, ".")
if len(parts) > 0 {
name := parts[len(parts)-1]
// strip the -fm suffix that Go's reflection adds for methods
// e.g., "parsePackageLock-fm" -> "parsePackageLock"
return strings.TrimSuffix(name, "-fm")
}
return fullName
}
// getCatalogerName extracts the cataloger name from the test context or cataloger name
func getCatalogerName(_ *testing.T, cataloger pkg.Cataloger) string {
// use the cataloger's name method if available
return cataloger.Name()
}
// getPackagePath extracts the package path from a function name
// e.g., "github.com/anchore/syft/syft/pkg/cataloger/python.parseRequirementsTxt" -> "python"
func getPackagePath(fn interface{}) string {
ptr := reflect.ValueOf(fn).Pointer()
funcForPC := runtime.FuncForPC(ptr)
if funcForPC == nil {
return ""
}
fullName := funcForPC.Name()
// extract package name from path
// e.g., "github.com/anchore/syft/syft/pkg/cataloger/python.parseRequirementsTxt"
// -> "python"
if strings.Contains(fullName, "/cataloger/") {
parts := strings.Split(fullName, "/cataloger/")
if len(parts) > 1 {
// get the next segment after "/cataloger/"
remaining := parts[1]
// split by "." to get package name
pkgParts := strings.Split(remaining, ".")
if len(pkgParts) > 0 {
return pkgParts[0]
}
}
}
return ""
}
// getPackagePathFromCataloger extracts the package path from the caller's file path
// For generic catalogers, the cataloger type is from the generic package, but we need
// the package where the test is defined (e.g., rust, python, etc.)
func getPackagePathFromCataloger(_ pkg.Cataloger) string {
// walk up the call stack to find the test file
// we're looking for a file in the cataloger directory structure
for i := 0; i < 10; i++ {
_, file, _, ok := runtime.Caller(i)
if !ok {
break
}
// extract package name from file path
// e.g., "/Users/.../syft/pkg/cataloger/rust/cataloger_test.go" -> "rust"
if strings.Contains(file, "/cataloger/") {
parts := strings.Split(file, "/cataloger/")
if len(parts) > 1 {
// get the next segment after "/cataloger/"
remaining := parts[1]
// split by "/" to get package name
pkgParts := strings.Split(remaining, "/")
if len(pkgParts) > 0 && pkgParts[0] != "internal" {
return pkgParts[0]
}
}
}
}
return ""
}
// trackParserMetadata records metadata types for a parser function
func (p *CatalogTester) trackParserMetadata(t *testing.T, parser generic.Parser, pkgs []pkg.Package, relationships []artifact.Relationship) {
if p.skipTestObservations {
return
}
parserName := getFunctionName(parser)
if parserName == "" {
return
}
// try to infer package name from function path
packageName := getPackagePath(parser)
if packageName == "" {
return
}
tracker := getTracker()
// old tracking (still used by metadata discovery)
for _, pkg := range pkgs {
tracker.RecordParserPackageMetadata(packageName, parserName, pkg)
}
// new unified observations with capability tracking
tracker.RecordParserObservations(packageName, parserName, pkgs, relationships)
// ensure results are written when tests complete
t.Cleanup(func() {
_ = WriteResultsIfEnabled()
})
}
// trackCatalogerMetadata records metadata types for a cataloger
func (p *CatalogTester) trackCatalogerMetadata(t *testing.T, cataloger pkg.Cataloger, pkgs []pkg.Package, relationships []artifact.Relationship) {
if p.skipTestObservations {
return
}
catalogerName := getCatalogerName(t, cataloger)
if catalogerName == "" {
return
}
// try to infer package name from cataloger type
packageName := getPackagePathFromCataloger(cataloger)
if packageName == "" {
return
}
tracker := getTracker()
// old tracking (still used by metadata discovery)
for _, pkg := range pkgs {
tracker.RecordCatalogerPackageMetadata(catalogerName, pkg)
}
// new unified observations with capability tracking
tracker.RecordCatalogerObservations(packageName, catalogerName, pkgs, relationships)
// ensure results are written when tests complete
t.Cleanup(func() {
_ = WriteResultsIfEnabled()
})
}

View File

@ -284,7 +284,11 @@ func TestSearchYarnForLicenses(t *testing.T) {
} }
tc.config.NPMBaseURL = url tc.config.NPMBaseURL = url
adapter := newGenericYarnLockAdapter(tc.config) adapter := newGenericYarnLockAdapter(tc.config)
pkgtest.TestFileParser(t, fixture, adapter.parseYarnLock, tc.expectedPackages, nil) pkgtest.NewCatalogTester().
FromFile(t, fixture).
Expects(tc.expectedPackages, nil).
WithoutTestObserver(). // this is an online test, thus not the default configuration
TestParser(t, adapter.parseYarnLock)
}) })
} }
} }

View File

@ -7,6 +7,10 @@ import (
// Type represents a Package Type for or within a language ecosystem (there may be multiple package types within a language ecosystem) // Type represents a Package Type for or within a language ecosystem (there may be multiple package types within a language ecosystem)
type Type string type Type string
func (t Type) String() string {
return string(t)
}
const ( const (
// the full set of supported packages // the full set of supported packages
UnknownPkg Type = "UnknownPackage" UnknownPkg Type = "UnknownPackage"