Add test to ensure package metadata is represented in the JSON schema (#1841)

* [wip] try to reflect metadata types... probably wont work

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* refactor to add unit test to ensure there is coverage in the schema

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* [wip] generate metadata container

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add generation of metadata container struct for JSON schema generation

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* fix linting

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* update linter script to account for code generation

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

---------

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2023-05-25 13:26:56 -04:00 committed by GitHub
parent 6afbffce28
commit 74013d7da7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 296 additions and 62 deletions

View File

@ -1,27 +1,17 @@
#!/usr/bin/env bash
set -u
if ! git diff-index --quiet HEAD --; then
git diff-index HEAD --
git --no-pager diff
echo "there are uncommitted changes, please commit them before running this check"
if [ "$(git status --porcelain | wc -l)" -ne "0" ]; then
echo " 🔴 there are uncommitted changes, please commit them before running this check"
exit 1
fi
success=true
if ! make generate-json-schema; then
echo "Generating json schema failed"
success=false
fi
if ! git diff-index --quiet HEAD --; then
git diff-index HEAD --
git --no-pager diff
echo "JSON schema drift detected!"
success=false
fi
if ! $success; then
exit 1
fi
if [ "$(git status --porcelain | wc -l)" -ne "0" ]; then
echo " 🔴 there are uncommitted changes, please commit them before running this check"
exit 1
fi

View File

@ -302,7 +302,7 @@ compare-test-rpm-package-install: $(TEMP_DIR) $(SNAPSHOT_DIR)
.PHONY: generate-json-schema
generate-json-schema: ## Generate a new json schema
cd schema/json && go run generate.go
cd schema/json && go generate . && go run .
.PHONY: generate-license-list
generate-license-list: ## Generate an updated spdx license list

1
go.mod
View File

@ -54,6 +54,7 @@ require (
github.com/Masterminds/sprig/v3 v3.2.3
github.com/anchore/go-logger v0.0.0-20220728155337-03b66a5207d8
github.com/anchore/stereoscope v0.0.0-20230522170632-e14bc4437b2e
github.com/dave/jennifer v1.6.1
github.com/deitch/magic v0.0.0-20230404182410-1ff89d7342da
github.com/docker/docker v24.0.1+incompatible
github.com/github/go-spdx/v2 v2.1.2

2
go.sum
View File

@ -155,6 +155,8 @@ github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV
github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/dave/jennifer v1.6.1 h1:T4T/67t6RAA5AIV6+NP8Uk/BIsXgDoqEowgycdQQLuk=
github.com/dave/jennifer v1.6.1/go.mod h1:nXbxhEmQfOZhWml3D1cDK5M1FLnMSozpbFN/m3RmGZc=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=

View File

@ -0,0 +1,50 @@
package main
import (
"fmt"
"os"
"github.com/dave/jennifer/jen"
"github.com/anchore/syft/schema/json/internal"
)
// This program generates internal/generated.go.
const (
pkgImport = "github.com/anchore/syft/syft/pkg"
path = "internal/generated.go"
)
func main() {
typeNames, err := internal.AllSyftMetadataTypeNames()
if err != nil {
panic(fmt.Errorf("unable to get all metadata type names: %w", err))
}
fmt.Printf("updating metadata container object with %+v types\n", len(typeNames))
f := jen.NewFile("internal")
f.HeaderComment("DO NOT EDIT: generated by schema/json/generate/main.go")
f.ImportName(pkgImport, "pkg")
f.Comment("ArtifactMetadataContainer is a struct that contains all the metadata types for a package, as represented in the pkg.Package.Metadata field.")
f.Type().Id("ArtifactMetadataContainer").StructFunc(func(g *jen.Group) {
for _, typeName := range typeNames {
g.Id(typeName).Qual(pkgImport, typeName)
}
})
rendered := fmt.Sprintf("%#v", f)
fh, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
panic(fmt.Errorf("unable to open file: %w", err))
}
_, err = fh.WriteString(rendered)
if err != nil {
panic(fmt.Errorf("unable to write file: %w", err))
}
if err := fh.Close(); err != nil {
panic(fmt.Errorf("unable to close file: %w", err))
}
}

View File

@ -0,0 +1,39 @@
// DO NOT EDIT: generated by schema/json/generate/main.go
package internal
import "github.com/anchore/syft/syft/pkg"
// ArtifactMetadataContainer is a struct that contains all the metadata types for a package, as represented in the pkg.Package.Metadata field.
type ArtifactMetadataContainer struct {
AlpmMetadata pkg.AlpmMetadata
ApkMetadata pkg.ApkMetadata
BinaryMetadata pkg.BinaryMetadata
CargoPackageMetadata pkg.CargoPackageMetadata
CocoapodsMetadata pkg.CocoapodsMetadata
ConanLockMetadata pkg.ConanLockMetadata
ConanMetadata pkg.ConanMetadata
DartPubMetadata pkg.DartPubMetadata
DotnetDepsMetadata pkg.DotnetDepsMetadata
DpkgMetadata pkg.DpkgMetadata
GemMetadata pkg.GemMetadata
GolangBinMetadata pkg.GolangBinMetadata
GolangModMetadata pkg.GolangModMetadata
HackageMetadata pkg.HackageMetadata
JavaMetadata pkg.JavaMetadata
KbPackageMetadata pkg.KbPackageMetadata
LinuxKernelMetadata pkg.LinuxKernelMetadata
LinuxKernelModuleMetadata pkg.LinuxKernelModuleMetadata
MixLockMetadata pkg.MixLockMetadata
NixStoreMetadata pkg.NixStoreMetadata
NpmPackageJSONMetadata pkg.NpmPackageJSONMetadata
NpmPackageLockJSONMetadata pkg.NpmPackageLockJSONMetadata
PhpComposerJSONMetadata pkg.PhpComposerJSONMetadata
PortageMetadata pkg.PortageMetadata
PythonPackageMetadata pkg.PythonPackageMetadata
PythonPipfileLockMetadata pkg.PythonPipfileLockMetadata
PythonRequirementsMetadata pkg.PythonRequirementsMetadata
RDescriptionFileMetadata pkg.RDescriptionFileMetadata
RebarLockMetadata pkg.RebarLockMetadata
RpmMetadata pkg.RpmMetadata
}

View File

@ -0,0 +1,150 @@
package internal
import (
"fmt"
"go/ast"
"go/parser"
"go/token"
"os/exec"
"path/filepath"
"sort"
"strings"
"unicode"
"github.com/scylladb/go-set/strset"
)
var metadataExceptions = strset.New(
"FileMetadata",
)
func AllSyftMetadataTypeNames() ([]string, error) {
root, err := repoRoot()
if err != nil {
return nil, err
}
files, err := filepath.Glob(filepath.Join(root, "syft/pkg/*.go"))
if err != nil {
return nil, err
}
return findMetadataDefinitionNames(files...)
}
func repoRoot() (string, error) {
root, err := exec.Command("git", "rev-parse", "--show-toplevel").Output()
if err != nil {
return "", fmt.Errorf("unable to find repo root dir: %+v", err)
}
absRepoRoot, err := filepath.Abs(strings.TrimSpace(string(root)))
if err != nil {
return "", fmt.Errorf("unable to get abs path to repo root: %w", err)
}
return absRepoRoot, nil
}
func findMetadataDefinitionNames(paths ...string) ([]string, error) {
names := strset.New()
usedNames := strset.New()
for _, path := range paths {
metadataDefinitions, usedTypeNames, err := findMetadataDefinitionNamesInFile(path)
if err != nil {
return nil, err
}
// useful for debugging...
// fmt.Println(path)
// fmt.Println("Defs:", metadataDefinitions)
// fmt.Println("Used Types:", usedTypeNames)
// fmt.Println()
names.Add(metadataDefinitions...)
usedNames.Add(usedTypeNames...)
}
// any definition that is used within another struct should not be considered a top-level metadata definition
names.Remove(usedNames.List()...)
strNames := names.List()
sort.Strings(strNames)
// note: 30 is a point-in-time gut check. This number could be updated if new metadata definitions are added, but is not required.
// it is really intended to catch any major issues with the generation process that would generate, say, 0 definitions.
if len(strNames) < 30 {
return nil, fmt.Errorf("not enough metadata definitions found (discovered: " + fmt.Sprintf("%d", len(strNames)) + ")")
}
return strNames, nil
}
func findMetadataDefinitionNamesInFile(path string) ([]string, []string, error) {
// set up the parser
fs := token.NewFileSet()
f, err := parser.ParseFile(fs, path, nil, parser.ParseComments)
if err != nil {
return nil, nil, err
}
var metadataDefinitions []string
var usedTypeNames []string
for _, decl := range f.Decls {
// check if the declaration is a type declaration
spec, ok := decl.(*ast.GenDecl)
if !ok || spec.Tok != token.TYPE {
continue
}
// loop over all types declared in the type declaration
for _, typ := range spec.Specs {
// check if the type is a struct type
spec, ok := typ.(*ast.TypeSpec)
if !ok || spec.Type == nil {
continue
}
structType, ok := spec.Type.(*ast.StructType)
if !ok {
continue
}
// check if the struct type ends with "Metadata"
name := spec.Name.String()
// only look for exported types that end with "Metadata"
if isMetadataTypeCandidate(name) {
// print the full declaration of the struct type
metadataDefinitions = append(metadataDefinitions, name)
usedTypeNames = append(usedTypeNames, typeNamesUsedInStruct(structType)...)
}
}
}
return metadataDefinitions, usedTypeNames, nil
}
func typeNamesUsedInStruct(structType *ast.StructType) []string {
// recursively find all type names used in the struct type
var names []string
for i := range structType.Fields.List {
// capture names of all of the types (not field names)
ast.Inspect(structType.Fields.List[i].Type, func(n ast.Node) bool {
ident, ok := n.(*ast.Ident)
if !ok {
return true
}
// add the type name to the list
names = append(names, ident.Name)
// continue inspecting
return true
})
}
return names
}
func isMetadataTypeCandidate(name string) bool {
return len(name) > 0 &&
strings.HasSuffix(name, "Metadata") &&
unicode.IsUpper(rune(name[0])) && // must be exported
!metadataExceptions.Has(name)
}

View File

@ -13,8 +13,8 @@ import (
"github.com/invopop/jsonschema"
"github.com/anchore/syft/internal"
genInt "github.com/anchore/syft/schema/json/internal"
syftjsonModel "github.com/anchore/syft/syft/formats/syftjson/model"
"github.com/anchore/syft/syft/pkg"
)
/*
@ -24,46 +24,9 @@ are not captured (empty interfaces). This means that pkg.Package.Metadata is not
can be extended to include specific package metadata struct shapes in the future.
*/
// This should represent all possible metadatas represented in the pkg.Package.Metadata field (an interface{}).
// When a new package metadata definition is created it will need to be manually added here. The variable name does
// not matter as long as it is exported.
//go:generate go run ./generate/main.go
// TODO: this should be generated from reflection of whats in the pkg package
// Should be created during generation below; use reflection's ability to
// create types at runtime.
// should be same name as struct minus metadata
type artifactMetadataContainer struct {
Alpm pkg.AlpmMetadata
Apk pkg.ApkMetadata
Binary pkg.BinaryMetadata
Cocopods pkg.CocoapodsMetadata
Conan pkg.ConanMetadata
ConanLock pkg.ConanLockMetadata
Dart pkg.DartPubMetadata
Dotnet pkg.DotnetDepsMetadata
Dpkg pkg.DpkgMetadata
Gem pkg.GemMetadata
GoBin pkg.GolangBinMetadata
GoMod pkg.GolangModMetadata
Hackage pkg.HackageMetadata
Java pkg.JavaMetadata
KbPackage pkg.KbPackageMetadata
LinuxKernel pkg.LinuxKernelMetadata
LinuxKernelModule pkg.LinuxKernelModuleMetadata
Nix pkg.NixStoreMetadata
NpmPackage pkg.NpmPackageJSONMetadata
NpmPackageLock pkg.NpmPackageLockJSONMetadata
MixLock pkg.MixLockMetadata
Php pkg.PhpComposerJSONMetadata
Portage pkg.PortageMetadata
PythonPackage pkg.PythonPackageMetadata
PythonPipfilelock pkg.PythonPipfileLockMetadata
PythonRequirements pkg.PythonRequirementsMetadata
RDescriptionFile pkg.RDescriptionFileMetadata
Rebar pkg.RebarLockMetadata
Rpm pkg.RpmMetadata
RustCargo pkg.CargoPackageMetadata
}
const schemaVersion = internal.JSONSchemaVersion
func main() {
write(encode(build()))
@ -77,14 +40,14 @@ func build() *jsonschema.Schema {
},
}
documentSchema := reflector.ReflectFromType(reflect.TypeOf(&syftjsonModel.Document{}))
metadataSchema := reflector.ReflectFromType(reflect.TypeOf(&artifactMetadataContainer{}))
metadataSchema := reflector.ReflectFromType(reflect.TypeOf(&genInt.ArtifactMetadataContainer{}))
// TODO: inject source definitions
// inject the definitions of all metadatas into the schema definitions
var metadataNames []string
for name, definition := range metadataSchema.Definitions {
if name == "artifactMetadataContainer" {
if name == reflect.TypeOf(genInt.ArtifactMetadataContainer{}).Name() {
// ignore the definition for the fake container
continue
}
@ -130,7 +93,7 @@ func encode(schema *jsonschema.Schema) []byte {
}
func write(schema []byte) {
filename := fmt.Sprintf("schema-%s.json", internal.JSONSchemaVersion)
filename := fmt.Sprintf("schema-%s.json", schemaVersion)
if _, err := os.Stat(filename); !os.IsNotExist(err) {
// check if the schema is the same...
@ -167,5 +130,5 @@ func write(schema []byte) {
defer fh.Close()
fmt.Printf("wrote new schema to %q\n", filename)
fmt.Printf("Wrote new schema to %q\n", filename)
}

39
schema/json/main_test.go Normal file
View File

@ -0,0 +1,39 @@
package main
import (
"reflect"
"sort"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/schema/json/internal"
)
func TestAllMetadataRepresented(t *testing.T) {
// this test checks that all the metadata types are represented in the currently generated ArtifactMetadataContainer struct
// such that PRs will reflect when there is drift from the implemented set of metadata types and the generated struct
// which controls the JSON schema content.
expected, err := internal.AllSyftMetadataTypeNames()
require.NoError(t, err)
actual := allTypeNamesFromStruct(internal.ArtifactMetadataContainer{})
if !assert.ElementsMatch(t, expected, actual) {
t.Errorf("metadata types not fully represented: \n%s", cmp.Diff(expected, actual))
t.Log("did you add a new pkg.*Metadata type without updating the JSON schema?")
t.Log("if so, you need to update the schema version and regenerate the JSON schema (make generate-json-schema)")
}
}
func allTypeNamesFromStruct(instance any) []string {
// get all the type names from the struct (not recursively)
var typeNames []string
tt := reflect.TypeOf(instance)
for i := 0; i < tt.NumField(); i++ {
field := tt.Field(i)
typeNames = append(typeNames, field.Type.Name())
}
sort.Strings(typeNames)
return typeNames
}