update json schema with optional poweruser data shape

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2021-03-18 08:56:00 -04:00
parent 97f0f83544
commit 6a960ec1f3
No known key found for this signature in database
GPG Key ID: 5CB45AE22BAB7EA7
12 changed files with 201 additions and 75 deletions

View File

@ -6,5 +6,5 @@ const (
// JSONSchemaVersion is the current schema version output by the JSON presenter // JSONSchemaVersion is the current schema version output by the JSON presenter
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment. // This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
JSONSchemaVersion = "1.0.3" JSONSchemaVersion = "1.0.4"
) )

View File

@ -1,10 +1,10 @@
# JSON Schema # JSON Schema
This is the JSON schema for output from the JSON presenter (`syft <img> -o json`). The required inputs for defining the JSON schema are as follows: This is the JSON schema for output from the JSON presenters (`syft packages <img> -o json` and `syft power-user <img>`). The required inputs for defining the JSON schema are as follows:
- the value of `internal.JSONSchemaVersion` that governs the schema filename - the value of `internal.JSONSchemaVersion` that governs the schema filename
- the `Document` struct definition within `syft/presenters/json/document.go` that governs the overall document shape - the `Document` struct definition within `internal/presenters/poweruser/json_document.go` that governs the overall document shape
- the `metadataContainer` struct definition within `schema/json/generate.go` that governs the allowable shapes of `pkg.Package.Metadata` - the `artifactMetadataContainer` struct definition within `schema/json/generate.go` that governs the allowable shapes of `pkg.Package.Metadata`
With regard to testing the JSON schema, integration test cases provided by the developer are used as examples to validate that JSON output from Syft is always valid relative to the `schema/json/schema-$VERSION.json` file. With regard to testing the JSON schema, integration test cases provided by the developer are used as examples to validate that JSON output from Syft is always valid relative to the `schema/json/schema-$VERSION.json` file.
@ -26,7 +26,7 @@ When adding a new `pkg.*Metadata` that is assigned to the `pkg.Package.Metadata`
are done: are done:
- a new integration test case is added to `test/integration/pkg_cases_test.go` that exercises the new package type with the new metadata - a new integration test case is added to `test/integration/pkg_cases_test.go` that exercises the new package type with the new metadata
- the new metadata struct is added to the `metadataContainer` struct within `schema/json/generate.go` - the new metadata struct is added to the `artifactMetadataContainer` struct within `schema/json/generate.go`
## Generating a New Schema ## Generating a New Schema

View File

@ -6,13 +6,14 @@ import (
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"os" "os"
"reflect"
"sort" "sort"
"strings" "strings"
"github.com/alecthomas/jsonschema" "github.com/alecthomas/jsonschema"
"github.com/anchore/syft/internal" "github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/presenter/poweruser"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
jsonPresenter "github.com/anchore/syft/syft/presenter/json"
) )
/* /*
@ -25,7 +26,7 @@ can be extended to include specific package metadata struct shapes in the future
// This should represent all possible metadatas represented in the pkg.Package.Metadata field (an interface{}). // This should represent all possible metadatas represented in the pkg.Package.Metadata field (an interface{}).
// When a new package metadata definition is created it will need to be manually added here. The variable name does // When a new package metadata definition is created it will need to be manually added here. The variable name does
// not matter as long as it is exported. // not matter as long as it is exported.
type metadataContainer struct { type artifactMetadataContainer struct {
Apk pkg.ApkMetadata Apk pkg.ApkMetadata
Dpkg pkg.DpkgMetadata Dpkg pkg.DpkgMetadata
Gem pkg.GemMetadata Gem pkg.GemMetadata
@ -36,10 +37,23 @@ type metadataContainer struct {
Cargo pkg.CargoPackageMetadata Cargo pkg.CargoPackageMetadata
} }
// nolint:funlen
func main() { func main() {
metadataSchema := jsonschema.Reflect(&metadataContainer{}) write(encode(build()))
documentSchema := jsonschema.Reflect(&jsonPresenter.Document{}) }
func build() *jsonschema.Schema {
reflector := &jsonschema.Reflector{
AllowAdditionalProperties: true,
TypeNamer: func(r reflect.Type) string {
name := r.Name()
if strings.HasPrefix(name, "JSON") {
name = strings.TrimPrefix(name, "JSON")
}
return name
},
}
documentSchema := reflector.ReflectFromType(reflect.TypeOf(&poweruser.JSONDocument{}))
metadataSchema := reflector.ReflectFromType(reflect.TypeOf(&artifactMetadataContainer{}))
// TODO: inject source definitions // TODO: inject source definitions
@ -47,7 +61,7 @@ func main() {
var metadataNames []string var metadataNames []string
for name, definition := range metadataSchema.Definitions { for name, definition := range metadataSchema.Definitions {
if name == "metadataContainer" { if name == "artifactMetadataContainer" {
// ignore the definition for the fake container // ignore the definition for the fake container
continue continue
} }
@ -71,22 +85,30 @@ func main() {
} }
// set the "anyOf" field for Package.Metadata to be a conjunction of several types // set the "anyOf" field for Package.Metadata to be a conjunction of several types
documentSchema.Definitions["Package"].Properties.Set("metadata", map[string][]map[string]string{ documentSchema.Definitions["Document"].Properties.Set("artifacts.metadata", map[string][]map[string]string{
"anyOf": metadataTypes, "anyOf": metadataTypes,
}) })
filename := fmt.Sprintf("schema-%s.json", internal.JSONSchemaVersion) return documentSchema
}
func encode(schema *jsonschema.Schema) []byte {
var newSchemaBuffer = new(bytes.Buffer) var newSchemaBuffer = new(bytes.Buffer)
enc := json.NewEncoder(newSchemaBuffer) enc := json.NewEncoder(newSchemaBuffer)
// prevent > and < from being escaped in the payload // prevent > and < from being escaped in the payload
enc.SetEscapeHTML(false) enc.SetEscapeHTML(false)
enc.SetIndent("", " ") enc.SetIndent("", " ")
err := enc.Encode(&documentSchema) err := enc.Encode(&schema)
if err != nil { if err != nil {
panic(err) panic(err)
} }
return newSchemaBuffer.Bytes()
}
func write(schema []byte) {
filename := fmt.Sprintf("schema-%s.json", internal.JSONSchemaVersion)
if _, err := os.Stat(filename); !os.IsNotExist(err) { if _, err := os.Stat(filename); !os.IsNotExist(err) {
// check if the schema is the same... // check if the schema is the same...
existingFh, err := os.Open(filename) existingFh, err := os.Open(filename)
@ -99,7 +121,7 @@ func main() {
panic(err) panic(err)
} }
if bytes.Equal(existingSchemaBytes, newSchemaBuffer.Bytes()) { if bytes.Equal(existingSchemaBytes, schema) {
// the generated schema is the same, bail with no error :) // the generated schema is the same, bail with no error :)
fmt.Println("No change to the existing schema!") fmt.Println("No change to the existing schema!")
os.Exit(0) os.Exit(0)
@ -115,7 +137,7 @@ func main() {
panic(err) panic(err)
} }
_, err = fh.Write(newSchemaBuffer.Bytes()) _, err = fh.Write(schema)
if err != nil { if err != nil {
panic(err) panic(err)
} }

View File

@ -749,4 +749,4 @@
"type": "object" "type": "object"
} }
} }
} }

View File

@ -36,7 +36,7 @@ var identityFiles = []parseEntry{
} }
// Identify parses distro-specific files to determine distro metadata like version and release. // Identify parses distro-specific files to determine distro metadata like version and release.
func Identify(resolver source.Resolver) *Distro { func Identify(resolver source.FileResolver) *Distro {
var distro *Distro var distro *Distro
identifyLoop: identifyLoop:

View File

@ -99,7 +99,12 @@ func TestIdentifyDistro(t *testing.T) {
t.Fatalf("unable to produce a new source for testing: %s", test.fixture) t.Fatalf("unable to produce a new source for testing: %s", test.fixture)
} }
d := Identify(s.Resolver) resolver, err := s.FileResolver(source.SquashedScope)
if err != nil {
t.Fatalf("unable to get resolver: %+v", err)
}
d := Identify(resolver)
if d == nil { if d == nil {
if test.Type == UnknownDistroType { if test.Type == UnknownDistroType {
return return

View File

@ -10,11 +10,11 @@ const (
// AppUpdateAvailable is a partybus event that occurs when an application update is available // AppUpdateAvailable is a partybus event that occurs when an application update is available
AppUpdateAvailable partybus.EventType = "syft-app-update-available" AppUpdateAvailable partybus.EventType = "syft-app-update-available"
// CatalogerStarted is a partybus event that occurs when the package cataloging has begun // PackageCatalogerStarted is a partybus event that occurs when the package cataloging has begun
CatalogerStarted partybus.EventType = "syft-cataloger-started-event" PackageCatalogerStarted partybus.EventType = "syft-cataloger-started-event"
// CatalogerFinished is a partybus event that occurs when the package cataloging has completed // PresenterReady is a partybus event that occurs when an analysis result is ready for final presentation
CatalogerFinished partybus.EventType = "syft-cataloger-finished-event" PresenterReady partybus.EventType = "syft-presenter-ready-event"
// ImportStarted is a partybus event that occurs when an SBOM upload process has begun // ImportStarted is a partybus event that occurs when an SBOM upload process has begun
ImportStarted partybus.EventType = "syft-import-started-event" ImportStarted partybus.EventType = "syft-import-started-event"

View File

@ -6,11 +6,12 @@ package parsers
import ( import (
"fmt" "fmt"
"github.com/anchore/syft/internal/presenter"
"github.com/wagoodman/go-progress" "github.com/wagoodman/go-progress"
"github.com/anchore/syft/syft/cataloger"
"github.com/anchore/syft/syft/event" "github.com/anchore/syft/syft/event"
"github.com/anchore/syft/syft/presenter" "github.com/anchore/syft/syft/pkg/cataloger"
"github.com/wagoodman/go-partybus" "github.com/wagoodman/go-partybus"
) )
@ -40,7 +41,7 @@ func checkEventType(actual, expected partybus.EventType) error {
} }
func ParseCatalogerStarted(e partybus.Event) (*cataloger.Monitor, error) { func ParseCatalogerStarted(e partybus.Event) (*cataloger.Monitor, error) {
if err := checkEventType(e.Type, event.CatalogerStarted); err != nil { if err := checkEventType(e.Type, event.PackageCatalogerStarted); err != nil {
return nil, err return nil, err
} }
@ -52,8 +53,8 @@ func ParseCatalogerStarted(e partybus.Event) (*cataloger.Monitor, error) {
return &monitor, nil return &monitor, nil
} }
func ParseCatalogerFinished(e partybus.Event) (presenter.Presenter, error) { func ParsePresenterReady(e partybus.Event) (presenter.Presenter, error) {
if err := checkEventType(e.Type, event.CatalogerFinished); err != nil { if err := checkEventType(e.Type, event.PresenterReady); err != nil {
return nil, err return nil, err
} }

6
syft/file/digest.go Normal file
View File

@ -0,0 +1,6 @@
package file
type Digest struct {
Algorithm string `json:"algorithm"`
Value string `json:"value"`
}

View File

@ -0,0 +1,98 @@
package file
import (
"crypto"
"fmt"
"hash"
"io"
"strings"
"github.com/anchore/syft/syft/source"
)
var supportedHashAlgorithms = make(map[string]crypto.Hash)
type DigestsCataloger struct {
resolver source.FileResolver
hashes []crypto.Hash
}
func init() {
for _, h := range []crypto.Hash{
crypto.MD5,
crypto.SHA1,
crypto.SHA256,
} {
supportedHashAlgorithms[cleanAlgorithmName(h.String())] = h
}
}
func NewDigestsCataloger(resolver source.FileResolver, hashAlgorithms []string) (*DigestsCataloger, error) {
var hashes []crypto.Hash
for _, hashStr := range hashAlgorithms {
name := cleanAlgorithmName(hashStr)
hashObj, ok := supportedHashAlgorithms[name]
if !ok {
return nil, fmt.Errorf("unsupported hash algorithm: %s", hashStr)
}
hashes = append(hashes, hashObj)
}
return &DigestsCataloger{
resolver: resolver,
hashes: hashes,
}, nil
}
func (i *DigestsCataloger) Catalog() (map[source.Location][]Digest, error) {
results := make(map[source.Location][]Digest)
for location := range i.resolver.AllLocations() {
result, err := i.catalogLocation(location)
if err != nil {
return nil, err
}
results[location] = result
}
return results, nil
}
func (i *DigestsCataloger) catalogLocation(location source.Location) ([]Digest, error) {
contentReader, err := i.resolver.FileContentsByLocation(location)
if err != nil {
return nil, err
}
defer contentReader.Close()
// create a set of hasher objects tied together with a single writer to feed content into
hashers := make([]hash.Hash, len(i.hashes))
writers := make([]io.Writer, len(i.hashes))
for idx, hashObj := range i.hashes {
hashers[idx] = hashObj.New()
writers[idx] = hashers[idx]
}
size, err := io.Copy(io.MultiWriter(writers...), contentReader)
if err != nil {
return nil, fmt.Errorf("unable to observe contents of %+v: %+v", location.RealPath, err)
}
result := make([]Digest, len(i.hashes))
if size > 0 {
// only capture digests when there is content. It is important to do this based on SIZE and not
// FILE TYPE. The reasoning is that it is possible for a tar to be crafted with a header-only
// file type but a body is still allowed.
for idx, hasher := range hashers {
result[idx] = Digest{
Algorithm: cleanAlgorithmName(i.hashes[idx].String()),
Value: fmt.Sprintf("%+x", hasher.Sum(nil)),
}
}
}
return result, nil
}
func cleanAlgorithmName(name string) string {
lower := strings.ToLower(name)
return strings.Replace(lower, "-", "", -1)
}

View File

@ -0,0 +1,28 @@
package file
import (
"github.com/anchore/syft/syft/source"
)
type MetadataCataloger struct {
resolver source.FileResolver
}
func NewMetadataCataloger(resolver source.FileResolver) *MetadataCataloger {
return &MetadataCataloger{
resolver: resolver,
}
}
func (i *MetadataCataloger) Catalog() (map[source.Location]source.FileMetadata, error) {
results := make(map[source.Location]source.FileMetadata)
for location := range i.resolver.AllLocations() {
metadata, err := i.resolver.FileMetadataByLocation(location)
if err != nil {
return nil, err
}
results[location] = metadata
}
return results, nil
}

View File

@ -17,32 +17,29 @@ Similar to the cataloging process, Linux distribution identification is also per
package syft package syft
import ( import (
"encoding/json"
"fmt" "fmt"
"io"
"github.com/anchore/syft/internal/bus" "github.com/anchore/syft/internal/bus"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/cataloger"
"github.com/anchore/syft/syft/distro" "github.com/anchore/syft/syft/distro"
"github.com/anchore/syft/syft/logger" "github.com/anchore/syft/syft/logger"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
jsonPresenter "github.com/anchore/syft/syft/presenter/json" "github.com/anchore/syft/syft/pkg/cataloger"
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
"github.com/wagoodman/go-partybus" "github.com/wagoodman/go-partybus"
) )
// Catalog the given image from a particular perspective (e.g. squashed source, all-layers source). Returns the discovered // CatalogPackages takes an inventory of packages from the given image from a particular perspective
// set of packages, the identified Linux distribution, and the source object used to wrap the data source. // (e.g. squashed source, all-layers source). Returns the discovered set of packages, the identified Linux
func Catalog(userInput string, scope source.Scope) (source.Source, *pkg.Catalog, *distro.Distro, error) { // distribution, and the source object used to wrap the data source.
theSource, cleanup, err := source.New(userInput, scope) func CatalogPackages(src source.Source, scope source.Scope) (*pkg.Catalog, *distro.Distro, error) {
defer cleanup() resolver, err := src.FileResolver(scope)
if err != nil { if err != nil {
return source.Source{}, nil, nil, err return nil, nil, fmt.Errorf("unable to determine FileResolver while cataloging packages: %w", err)
} }
// find the distro // find the distro
theDistro := distro.Identify(theSource.Resolver) theDistro := distro.Identify(resolver)
if theDistro != nil { if theDistro != nil {
log.Infof("identified distro: %s", theDistro.String()) log.Infof("identified distro: %s", theDistro.String())
} else { } else {
@ -51,7 +48,7 @@ func Catalog(userInput string, scope source.Scope) (source.Source, *pkg.Catalog,
// conditionally use the correct set of loggers based on the input type (container image or directory) // conditionally use the correct set of loggers based on the input type (container image or directory)
var catalogers []cataloger.Cataloger var catalogers []cataloger.Cataloger
switch theSource.Metadata.Scheme { switch src.Metadata.Scheme {
case source.ImageScheme: case source.ImageScheme:
log.Info("cataloging image") log.Info("cataloging image")
catalogers = cataloger.ImageCatalogers() catalogers = cataloger.ImageCatalogers()
@ -59,46 +56,15 @@ func Catalog(userInput string, scope source.Scope) (source.Source, *pkg.Catalog,
log.Info("cataloging directory") log.Info("cataloging directory")
catalogers = cataloger.DirectoryCatalogers() catalogers = cataloger.DirectoryCatalogers()
default: default:
return source.Source{}, nil, nil, fmt.Errorf("unable to determine cataloger set from scheme=%+v", theSource.Metadata.Scheme) return nil, nil, fmt.Errorf("unable to determine cataloger set from scheme=%+v", src.Metadata.Scheme)
} }
catalog, err := cataloger.Catalog(theSource.Resolver, theDistro, catalogers...) catalog, err := cataloger.Catalog(resolver, theDistro, catalogers...)
if err != nil { if err != nil {
return source.Source{}, nil, nil, err return nil, nil, err
} }
return theSource, catalog, theDistro, nil return catalog, theDistro, nil
}
// CatalogFromJSON takes an existing syft report and generates native syft objects.
func CatalogFromJSON(reader io.Reader) (source.Metadata, *pkg.Catalog, *distro.Distro, error) {
var doc jsonPresenter.Document
var err error
decoder := json.NewDecoder(reader)
if err := decoder.Decode(&doc); err != nil {
return source.Metadata{}, nil, nil, err
}
var pkgs = make([]pkg.Package, len(doc.Artifacts))
for i, a := range doc.Artifacts {
pkgs[i], err = a.ToPackage()
if err != nil {
return source.Metadata{}, nil, nil, err
}
}
catalog := pkg.NewCatalog(pkgs...)
var theDistro *distro.Distro
if doc.Distro.Name != "" {
d, err := distro.NewDistro(distro.Type(doc.Distro.Name), doc.Distro.Version, doc.Distro.IDLike)
if err != nil {
return source.Metadata{}, nil, nil, err
}
theDistro = &d
}
return doc.Source.ToSourceMetadata(), catalog, theDistro, nil
} }
// SetLogger sets the logger object used for all syft logging calls. // SetLogger sets the logger object used for all syft logging calls.