Alex Goodman 63832e5e5a expose json schema types
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
2025-10-24 09:21:07 -04:00

660 lines
17 KiB
Go

package main
import (
"fmt"
"go/ast"
"go/parser"
"go/token"
"os"
"os/exec"
"path/filepath"
"strings"
"github.com/anchore/syft/internal/capabilities"
"github.com/anchore/syft/syft/pkg/cataloger/binary"
)
// DiscoveredCataloger represents a cataloger discovered by parsing source code
type DiscoveredCataloger struct {
Name string
Type string // "generic" or "custom"
PackageName string // e.g., "python", "swift" - extracted from source file path
SourceFile string
SourceFunction string
Parsers []DiscoveredParser // only for generic catalogers
}
// DiscoveredParser represents a parser function and its detection criteria discovered from source
type DiscoveredParser struct {
ParserFunction string
Method capabilities.ArtifactDetectionMethod
Criteria []string
MetadataTypes []string // populated from cataloger-type-uses.json files
PackageTypes []string // populated from cataloger-type-uses.json files
JSONSchemaTypes []string // populated from MetadataTypes
}
// discoverGenericCatalogers finds all uses of generic.NewCataloger() in the codebase
// Returns map[catalogerName]DiscoveredCataloger
func discoverGenericCatalogers(repoRoot string) (map[string]DiscoveredCataloger, error) {
catalogerRoot := filepath.Join(repoRoot, "syft", "pkg", "cataloger")
// find all .go files under syft/pkg/cataloger/ recursively
var files []string
err := filepath.Walk(catalogerRoot, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() && strings.HasSuffix(path, ".go") && !strings.HasSuffix(path, "_test.go") {
files = append(files, path)
}
return nil
})
if err != nil {
return nil, fmt.Errorf("failed to walk cataloger directory: %w", err)
}
discovered := make(map[string]DiscoveredCataloger)
for _, file := range files {
catalogers, err := discoverGenericCatalogersInFile(file, repoRoot)
if err != nil {
return nil, fmt.Errorf("failed to parse %s: %w", file, err)
}
for name, cataloger := range catalogers {
if existing, ok := discovered[name]; ok {
return nil, fmt.Errorf("duplicate cataloger name %q found in %s and %s", name, existing.SourceFile, cataloger.SourceFile)
}
discovered[name] = cataloger
}
}
return discovered, nil
}
func discoverGenericCatalogersInFile(path, repoRoot string) (map[string]DiscoveredCataloger, error) {
fset := token.NewFileSet()
f, err := parser.ParseFile(fset, path, nil, parser.ParseComments)
if err != nil {
return nil, err
}
discovered := make(map[string]DiscoveredCataloger)
// find all function declarations
for _, decl := range f.Decls {
funcDecl, ok := decl.(*ast.FuncDecl)
if !ok {
continue
}
// check if function name matches New*Cataloger pattern
if !strings.HasPrefix(funcDecl.Name.Name, "New") || !strings.HasSuffix(funcDecl.Name.Name, "Cataloger") {
continue
}
// check if return type is pkg.Cataloger
if !returnsPackageCataloger(funcDecl) {
continue
}
// parse the function body to find generic.NewCataloger calls
cataloger, err := parseGenericCatalogerFunction(funcDecl, path, repoRoot)
if err != nil {
// not a generic cataloger, skip
continue
}
if cataloger != nil {
discovered[cataloger.Name] = *cataloger
}
}
return discovered, nil
}
func returnsPackageCataloger(funcDecl *ast.FuncDecl) bool {
if funcDecl.Type.Results == nil || len(funcDecl.Type.Results.List) != 1 {
return false
}
// check if the return type is pkg.Cataloger or just Cataloger
returnType := funcDecl.Type.Results.List[0].Type
selector, ok := returnType.(*ast.SelectorExpr)
if !ok {
// might be just "Cataloger" without package prefix
ident, ok := returnType.(*ast.Ident)
return ok && ident.Name == "Cataloger"
}
pkg, ok := selector.X.(*ast.Ident)
if !ok {
return false
}
return pkg.Name == "pkg" && selector.Sel.Name == "Cataloger"
}
func parseGenericCatalogerFunction(funcDecl *ast.FuncDecl, filePath, repoRoot string) (*DiscoveredCataloger, error) {
if funcDecl.Body == nil {
return nil, fmt.Errorf("function has no body")
}
// parse the file again to get imports and constants context
fset := token.NewFileSet()
f, err := parser.ParseFile(fset, filePath, nil, parser.ParseComments)
if err != nil {
return nil, err
}
ctx := &parseContext{
file: f,
filePath: filePath,
repoRoot: repoRoot,
}
var catalogerName string
var parsers []DiscoveredParser
// walk the function body to find generic.NewCataloger call
ast.Inspect(funcDecl.Body, func(n ast.Node) bool {
// look for call expressions
callExpr, ok := n.(*ast.CallExpr)
if !ok {
return true
}
// check if this is a call to generic.NewCataloger
if isGenericNewCatalogerCall(callExpr) {
// extract the cataloger name from the first argument
if len(callExpr.Args) > 0 {
// handle string literals
if lit, ok := callExpr.Args[0].(*ast.BasicLit); ok && lit.Kind == token.STRING {
catalogerName = strings.Trim(lit.Value, `"`)
}
// handle constant identifiers
if ident, ok := callExpr.Args[0].(*ast.Ident); ok {
catalogerName = resolveLocalConstant(ident.Name, ctx)
}
}
}
// check if this is a WithParserBy* call
if parser := parseWithParserByCall(callExpr, ctx); parser != nil {
parsers = append(parsers, *parser)
}
return true
})
if catalogerName == "" {
// not a generic cataloger
return nil, fmt.Errorf("no generic.NewCataloger call found")
}
// make file path relative to repo root
relPath, err := filepath.Rel(repoRoot, filePath)
if err != nil {
relPath = filePath
}
return &DiscoveredCataloger{
Name: catalogerName,
Type: genericCatalogerType,
PackageName: extractPackageNameFromPath(relPath),
SourceFile: relPath,
SourceFunction: funcDecl.Name.Name,
Parsers: parsers,
}, nil
}
// extractPackageNameFromPath extracts the package name from a cataloger source file path
// e.g., "syft/pkg/cataloger/swift/cataloger.go" -> "swift"
func extractPackageNameFromPath(filePath string) string {
parts := strings.Split(filePath, string(filepath.Separator))
// find the index of "cataloger" in the path
for i, part := range parts {
if part == "cataloger" && i+1 < len(parts) {
// return the next segment after "cataloger"
return parts[i+1]
}
}
return ""
}
func isGenericNewCatalogerCall(callExpr *ast.CallExpr) bool {
selector, ok := callExpr.Fun.(*ast.SelectorExpr)
if !ok {
return false
}
pkg, ok := selector.X.(*ast.Ident)
if !ok {
return false
}
return pkg.Name == "generic" && selector.Sel.Name == "NewCataloger"
}
type parseContext struct {
file *ast.File
filePath string
repoRoot string
}
func parseWithParserByCall(callExpr *ast.CallExpr, ctx *parseContext) *DiscoveredParser {
selector, ok := callExpr.Fun.(*ast.SelectorExpr)
if !ok {
return nil
}
methodName := selector.Sel.Name
var method capabilities.ArtifactDetectionMethod
switch {
case strings.HasPrefix(methodName, "WithParserByGlobs"):
method = capabilities.GlobDetection
case strings.HasPrefix(methodName, "WithParserByPath"):
method = capabilities.PathDetection
case strings.HasPrefix(methodName, "WithParserByMimeTypes"):
method = capabilities.MIMETypeDetection
default:
return nil
}
if len(callExpr.Args) < 2 {
return nil
}
// first argument is the parser function name
var parserFunction string
switch arg := callExpr.Args[0].(type) {
case *ast.Ident:
// simple identifier: parseFunc
parserFunction = arg.Name
case *ast.SelectorExpr:
// selector expression: adapter.parseFunc
parserFunction = arg.Sel.Name
default:
return nil
}
// remaining arguments are detection criteria (can be string literals, constants, or method calls)
var criteria []string
for _, arg := range callExpr.Args[1:] {
values := resolveCriteriaValues(arg, ctx)
criteria = append(criteria, values...)
}
if len(criteria) == 0 {
return nil
}
return &DiscoveredParser{
ParserFunction: parserFunction,
Method: method,
Criteria: criteria,
}
}
// resolveCriteriaValues resolves criteria argument(s) to string value(s)
// handles string literals, constants, and method calls like .List()
func resolveCriteriaValues(arg ast.Expr, ctx *parseContext) []string {
switch expr := arg.(type) {
case *ast.BasicLit:
// direct string literal
if expr.Kind == token.STRING {
value := strings.Trim(expr.Value, `"`)
if value != "" {
return []string{value}
}
}
case *ast.Ident:
// local constant reference
value := resolveLocalConstant(expr.Name, ctx)
if value != "" {
return []string{value}
}
case *ast.SelectorExpr:
// imported constant reference (e.g., pkg.ApkDBGlob)
if pkgIdent, ok := expr.X.(*ast.Ident); ok {
value := resolveImportedConstant(pkgIdent.Name, expr.Sel.Name, ctx)
if value != "" {
return []string{value}
}
}
case *ast.CallExpr:
// method call like mimetype.ExecutableMIMETypeSet.List()
return resolveMethodCallValues(expr, ctx)
}
return nil
}
// resolveMethodCallValues resolves method calls that return string slices
// specifically handles .List() calls on string sets
func resolveMethodCallValues(callExpr *ast.CallExpr, ctx *parseContext) []string {
// check if this is a .List() method call
selector, ok := callExpr.Fun.(*ast.SelectorExpr)
if !ok || selector.Sel.Name != "List" {
return nil
}
// get the receiver (e.g., mimetype.ExecutableMIMETypeSet from mimetype.ExecutableMIMETypeSet.List())
var pkgName, varName string
switch recv := selector.X.(type) {
case *ast.SelectorExpr:
// format: package.Variable (e.g., mimetype.ExecutableMIMETypeSet)
if pkgIdent, ok := recv.X.(*ast.Ident); ok {
pkgName = pkgIdent.Name
varName = recv.Sel.Name
}
case *ast.Ident:
// format: Variable (local variable)
varName = recv.Name
}
if varName == "" {
return nil
}
// try to resolve the variable to its declaration and extract the string slice
if pkgName != "" {
// imported variable (e.g., mimetype.ExecutableMIMETypeSet)
return resolveImportedVariable(pkgName, varName, ctx)
}
// local variable
return resolveLocalVariable(varName, ctx)
}
// resolveImportedVariable finds a variable in an imported package and extracts its string slice values
func resolveImportedVariable(pkgName, varName string, ctx *parseContext) []string {
// find the import path for this package
var importPath string
for _, imp := range ctx.file.Imports {
path := strings.Trim(imp.Path.Value, `"`)
if imp.Name != nil && imp.Name.Name == pkgName {
importPath = path
break
}
parts := strings.Split(path, "/")
if len(parts) > 0 && parts[len(parts)-1] == pkgName {
importPath = path
break
}
}
if importPath == "" {
return nil
}
// resolve import path to file system path
pkgDir := resolveImportPath(importPath, ctx.repoRoot)
if pkgDir == "" {
return nil
}
// find the variable in the package and extract string slice
return findVariableStringSlice(pkgDir, varName)
}
// resolveLocalVariable finds a local variable and extracts its string slice values
func resolveLocalVariable(varName string, ctx *parseContext) []string {
return extractStringSliceFromFile(ctx.file, varName)
}
// findVariableStringSlice searches for a variable in a package directory and extracts its string slice
func findVariableStringSlice(pkgDir, varName string) []string {
files, err := filepath.Glob(filepath.Join(pkgDir, "*.go"))
if err != nil {
return nil
}
for _, file := range files {
if strings.HasSuffix(file, "_test.go") {
continue
}
fset := token.NewFileSet()
f, err := parser.ParseFile(fset, file, nil, 0)
if err != nil {
continue
}
if values := extractStringSliceFromFile(f, varName); len(values) > 0 {
return values
}
}
return nil
}
// extractStringSliceFromFile extracts string values from a variable declaration like:
// var Foo = strset.New([]string{"a", "b", "c"}...)
func extractStringSliceFromFile(file *ast.File, varName string) []string {
for _, decl := range file.Decls {
genDecl, ok := decl.(*ast.GenDecl)
if !ok || genDecl.Tok != token.VAR {
continue
}
for _, spec := range genDecl.Specs {
valueSpec, ok := spec.(*ast.ValueSpec)
if !ok {
continue
}
for i, ident := range valueSpec.Names {
if ident.Name == varName && i < len(valueSpec.Values) {
return extractStringSliceFromExpr(valueSpec.Values[i])
}
}
}
}
return nil
}
// extractStringSliceFromExpr extracts string literals from expressions like:
// strset.New([]string{"a", "b"}...)
func extractStringSliceFromExpr(expr ast.Expr) []string {
// handle strset.New(...) calls
callExpr, ok := expr.(*ast.CallExpr)
if !ok || len(callExpr.Args) == 0 {
return nil
}
// get the first argument (should be a composite literal with strings)
arg := callExpr.Args[0]
// handle []string{...} composite literals
compositeLit, ok := arg.(*ast.CompositeLit)
if !ok {
return nil
}
var values []string
for _, elt := range compositeLit.Elts {
if lit, ok := elt.(*ast.BasicLit); ok && lit.Kind == token.STRING {
value := strings.Trim(lit.Value, `"`)
if value != "" {
values = append(values, value)
}
}
}
return values
}
// resolveLocalConstant finds a constant definition in the current file
func resolveLocalConstant(name string, ctx *parseContext) string {
for _, decl := range ctx.file.Decls {
genDecl, ok := decl.(*ast.GenDecl)
if !ok || genDecl.Tok != token.CONST {
continue
}
for _, spec := range genDecl.Specs {
valueSpec, ok := spec.(*ast.ValueSpec)
if !ok {
continue
}
for i, ident := range valueSpec.Names {
if ident.Name == name && i < len(valueSpec.Values) {
if lit, ok := valueSpec.Values[i].(*ast.BasicLit); ok && lit.Kind == token.STRING {
return strings.Trim(lit.Value, `"`)
}
}
}
}
}
return ""
}
// resolveImportedConstant finds a constant in an imported package
func resolveImportedConstant(pkgName, constName string, ctx *parseContext) string {
// find the import path for this package alias
var importPath string
for _, imp := range ctx.file.Imports {
path := strings.Trim(imp.Path.Value, `"`)
// check if this import has the alias we're looking for
if imp.Name != nil && imp.Name.Name == pkgName {
importPath = path
break
}
// check if the package name matches (last component of import path)
parts := strings.Split(path, "/")
if len(parts) > 0 && parts[len(parts)-1] == pkgName {
importPath = path
break
}
}
if importPath == "" {
return ""
}
// resolve import path to file system path
pkgDir := resolveImportPath(importPath, ctx.repoRoot)
if pkgDir == "" {
return ""
}
// parse all .go files in the package directory to find the constant
return findConstantInPackage(pkgDir, constName)
}
// resolveImportPath converts an import path to a file system path
func resolveImportPath(importPath, repoRoot string) string {
// for github.com/anchore/syft/... imports, convert to repo-relative path
if strings.HasPrefix(importPath, "github.com/anchore/syft/") {
relPath := strings.TrimPrefix(importPath, "github.com/anchore/syft/")
return filepath.Join(repoRoot, relPath)
}
return ""
}
// findConstantInPackage searches for a constant definition in a package directory
func findConstantInPackage(pkgDir, constName string) string {
files, err := filepath.Glob(filepath.Join(pkgDir, "*.go"))
if err != nil {
return ""
}
for _, file := range files {
if strings.HasSuffix(file, "_test.go") {
continue
}
if value := findConstantInFile(file, constName); value != "" {
return value
}
}
return ""
}
func findConstantInFile(filePath, constName string) string {
fset := token.NewFileSet()
f, err := parser.ParseFile(fset, filePath, nil, 0)
if err != nil {
return ""
}
for _, decl := range f.Decls {
if value := searchConstInDecl(decl, constName); value != "" {
return value
}
}
return ""
}
func searchConstInDecl(decl ast.Decl, constName string) string {
genDecl, ok := decl.(*ast.GenDecl)
if !ok || genDecl.Tok != token.CONST {
return ""
}
for _, spec := range genDecl.Specs {
valueSpec, ok := spec.(*ast.ValueSpec)
if !ok {
continue
}
if value := getConstValue(valueSpec, constName); value != "" {
return value
}
}
return ""
}
func getConstValue(valueSpec *ast.ValueSpec, constName string) string {
for i, ident := range valueSpec.Names {
if ident.Name == constName && i < len(valueSpec.Values) {
if lit, ok := valueSpec.Values[i].(*ast.BasicLit); ok && lit.Kind == token.STRING {
return strings.Trim(lit.Value, `"`)
}
}
}
return ""
}
// RepoRoot finds the git repository root directory.
// Exported for use by the generator in generate/main.go
func RepoRoot() (string, error) {
root, err := exec.Command("git", "rev-parse", "--show-toplevel").Output()
if err != nil {
return "", fmt.Errorf("unable to find repo root dir: %+v", err)
}
absRepoRoot, err := filepath.Abs(strings.TrimSpace(string(root)))
if err != nil {
return "", fmt.Errorf("unable to get abs path to repo root: %w", err)
}
return absRepoRoot, nil
}
// extractBinaryClassifiers extracts all binary classifiers with their full information
func extractBinaryClassifiers() []binary.Classifier {
classifiers := binary.DefaultClassifiers()
// return all classifiers (already sorted by the default function)
return classifiers
}