diff --git a/internal/constants.go b/internal/constants.go
index 5cb49659a..b027f8502 100644
--- a/internal/constants.go
+++ b/internal/constants.go
@@ -6,5 +6,5 @@ const (
// JSONSchemaVersion is the current schema version output by the JSON presenter
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
- JSONSchemaVersion = "1.0.3"
+ JSONSchemaVersion = "1.0.4"
)
diff --git a/schema/json/README.md b/schema/json/README.md
index 957bb2bec..e9fe920db 100644
--- a/schema/json/README.md
+++ b/schema/json/README.md
@@ -1,10 +1,10 @@
# JSON Schema
-This is the JSON schema for output from the JSON presenter (`syft
-o json`). The required inputs for defining the JSON schema are as follows:
+This is the JSON schema for output from the JSON presenters (`syft packages
-o json` and `syft power-user
`). The required inputs for defining the JSON schema are as follows:
- the value of `internal.JSONSchemaVersion` that governs the schema filename
-- the `Document` struct definition within `syft/presenters/json/document.go` that governs the overall document shape
-- the `metadataContainer` struct definition within `schema/json/generate.go` that governs the allowable shapes of `pkg.Package.Metadata`
+- the `Document` struct definition within `internal/presenters/poweruser/json_document.go` that governs the overall document shape
+- the `artifactMetadataContainer` struct definition within `schema/json/generate.go` that governs the allowable shapes of `pkg.Package.Metadata`
With regard to testing the JSON schema, integration test cases provided by the developer are used as examples to validate that JSON output from Syft is always valid relative to the `schema/json/schema-$VERSION.json` file.
@@ -26,7 +26,7 @@ When adding a new `pkg.*Metadata` that is assigned to the `pkg.Package.Metadata`
are done:
- a new integration test case is added to `test/integration/pkg_cases_test.go` that exercises the new package type with the new metadata
-- the new metadata struct is added to the `metadataContainer` struct within `schema/json/generate.go`
+- the new metadata struct is added to the `artifactMetadataContainer` struct within `schema/json/generate.go`
## Generating a New Schema
diff --git a/schema/json/generate.go b/schema/json/generate.go
index 4896e1924..dd65c45e6 100644
--- a/schema/json/generate.go
+++ b/schema/json/generate.go
@@ -6,13 +6,14 @@ import (
"fmt"
"io/ioutil"
"os"
+ "reflect"
"sort"
"strings"
"github.com/alecthomas/jsonschema"
"github.com/anchore/syft/internal"
+ "github.com/anchore/syft/internal/presenter/poweruser"
"github.com/anchore/syft/syft/pkg"
- jsonPresenter "github.com/anchore/syft/syft/presenter/json"
)
/*
@@ -25,7 +26,7 @@ can be extended to include specific package metadata struct shapes in the future
// This should represent all possible metadatas represented in the pkg.Package.Metadata field (an interface{}).
// When a new package metadata definition is created it will need to be manually added here. The variable name does
// not matter as long as it is exported.
-type metadataContainer struct {
+type artifactMetadataContainer struct {
Apk pkg.ApkMetadata
Dpkg pkg.DpkgMetadata
Gem pkg.GemMetadata
@@ -36,10 +37,23 @@ type metadataContainer struct {
Cargo pkg.CargoPackageMetadata
}
-// nolint:funlen
func main() {
- metadataSchema := jsonschema.Reflect(&metadataContainer{})
- documentSchema := jsonschema.Reflect(&jsonPresenter.Document{})
+ write(encode(build()))
+}
+
+func build() *jsonschema.Schema {
+ reflector := &jsonschema.Reflector{
+ AllowAdditionalProperties: true,
+ TypeNamer: func(r reflect.Type) string {
+ name := r.Name()
+ if strings.HasPrefix(name, "JSON") {
+ name = strings.TrimPrefix(name, "JSON")
+ }
+ return name
+ },
+ }
+ documentSchema := reflector.ReflectFromType(reflect.TypeOf(&poweruser.JSONDocument{}))
+ metadataSchema := reflector.ReflectFromType(reflect.TypeOf(&artifactMetadataContainer{}))
// TODO: inject source definitions
@@ -47,7 +61,7 @@ func main() {
var metadataNames []string
for name, definition := range metadataSchema.Definitions {
- if name == "metadataContainer" {
+ if name == "artifactMetadataContainer" {
// ignore the definition for the fake container
continue
}
@@ -71,22 +85,30 @@ func main() {
}
// set the "anyOf" field for Package.Metadata to be a conjunction of several types
- documentSchema.Definitions["Package"].Properties.Set("metadata", map[string][]map[string]string{
+ documentSchema.Definitions["Document"].Properties.Set("artifacts.metadata", map[string][]map[string]string{
"anyOf": metadataTypes,
})
- filename := fmt.Sprintf("schema-%s.json", internal.JSONSchemaVersion)
+ return documentSchema
+}
+func encode(schema *jsonschema.Schema) []byte {
var newSchemaBuffer = new(bytes.Buffer)
enc := json.NewEncoder(newSchemaBuffer)
// prevent > and < from being escaped in the payload
enc.SetEscapeHTML(false)
enc.SetIndent("", " ")
- err := enc.Encode(&documentSchema)
+ err := enc.Encode(&schema)
if err != nil {
panic(err)
}
+ return newSchemaBuffer.Bytes()
+}
+
+func write(schema []byte) {
+ filename := fmt.Sprintf("schema-%s.json", internal.JSONSchemaVersion)
+
if _, err := os.Stat(filename); !os.IsNotExist(err) {
// check if the schema is the same...
existingFh, err := os.Open(filename)
@@ -99,7 +121,7 @@ func main() {
panic(err)
}
- if bytes.Equal(existingSchemaBytes, newSchemaBuffer.Bytes()) {
+ if bytes.Equal(existingSchemaBytes, schema) {
// the generated schema is the same, bail with no error :)
fmt.Println("No change to the existing schema!")
os.Exit(0)
@@ -115,7 +137,7 @@ func main() {
panic(err)
}
- _, err = fh.Write(newSchemaBuffer.Bytes())
+ _, err = fh.Write(schema)
if err != nil {
panic(err)
}
diff --git a/schema/json/schema-1.0.3.json b/schema/json/schema-1.0.3.json
index de3d7f933..108ba7479 100644
--- a/schema/json/schema-1.0.3.json
+++ b/schema/json/schema-1.0.3.json
@@ -749,4 +749,4 @@
"type": "object"
}
}
-}
+}
\ No newline at end of file
diff --git a/syft/distro/identify.go b/syft/distro/identify.go
index fc6fe52bc..72be74b97 100644
--- a/syft/distro/identify.go
+++ b/syft/distro/identify.go
@@ -36,7 +36,7 @@ var identityFiles = []parseEntry{
}
// Identify parses distro-specific files to determine distro metadata like version and release.
-func Identify(resolver source.Resolver) *Distro {
+func Identify(resolver source.FileResolver) *Distro {
var distro *Distro
identifyLoop:
diff --git a/syft/distro/identify_test.go b/syft/distro/identify_test.go
index 3e73aa2c6..d0d4470ee 100644
--- a/syft/distro/identify_test.go
+++ b/syft/distro/identify_test.go
@@ -99,7 +99,12 @@ func TestIdentifyDistro(t *testing.T) {
t.Fatalf("unable to produce a new source for testing: %s", test.fixture)
}
- d := Identify(s.Resolver)
+ resolver, err := s.FileResolver(source.SquashedScope)
+ if err != nil {
+ t.Fatalf("unable to get resolver: %+v", err)
+ }
+
+ d := Identify(resolver)
if d == nil {
if test.Type == UnknownDistroType {
return
diff --git a/syft/event/event.go b/syft/event/event.go
index a88d38422..8c6faefc1 100644
--- a/syft/event/event.go
+++ b/syft/event/event.go
@@ -10,11 +10,11 @@ const (
// AppUpdateAvailable is a partybus event that occurs when an application update is available
AppUpdateAvailable partybus.EventType = "syft-app-update-available"
- // CatalogerStarted is a partybus event that occurs when the package cataloging has begun
- CatalogerStarted partybus.EventType = "syft-cataloger-started-event"
+ // PackageCatalogerStarted is a partybus event that occurs when the package cataloging has begun
+ PackageCatalogerStarted partybus.EventType = "syft-cataloger-started-event"
- // CatalogerFinished is a partybus event that occurs when the package cataloging has completed
- CatalogerFinished partybus.EventType = "syft-cataloger-finished-event"
+ // PresenterReady is a partybus event that occurs when an analysis result is ready for final presentation
+ PresenterReady partybus.EventType = "syft-presenter-ready-event"
// ImportStarted is a partybus event that occurs when an SBOM upload process has begun
ImportStarted partybus.EventType = "syft-import-started-event"
diff --git a/syft/event/parsers/parsers.go b/syft/event/parsers/parsers.go
index 1eddcf245..a5d02a2f0 100644
--- a/syft/event/parsers/parsers.go
+++ b/syft/event/parsers/parsers.go
@@ -6,11 +6,12 @@ package parsers
import (
"fmt"
+ "github.com/anchore/syft/internal/presenter"
+
"github.com/wagoodman/go-progress"
- "github.com/anchore/syft/syft/cataloger"
"github.com/anchore/syft/syft/event"
- "github.com/anchore/syft/syft/presenter"
+ "github.com/anchore/syft/syft/pkg/cataloger"
"github.com/wagoodman/go-partybus"
)
@@ -40,7 +41,7 @@ func checkEventType(actual, expected partybus.EventType) error {
}
func ParseCatalogerStarted(e partybus.Event) (*cataloger.Monitor, error) {
- if err := checkEventType(e.Type, event.CatalogerStarted); err != nil {
+ if err := checkEventType(e.Type, event.PackageCatalogerStarted); err != nil {
return nil, err
}
@@ -52,8 +53,8 @@ func ParseCatalogerStarted(e partybus.Event) (*cataloger.Monitor, error) {
return &monitor, nil
}
-func ParseCatalogerFinished(e partybus.Event) (presenter.Presenter, error) {
- if err := checkEventType(e.Type, event.CatalogerFinished); err != nil {
+func ParsePresenterReady(e partybus.Event) (presenter.Presenter, error) {
+ if err := checkEventType(e.Type, event.PresenterReady); err != nil {
return nil, err
}
diff --git a/syft/file/digest.go b/syft/file/digest.go
new file mode 100644
index 000000000..87b53dbb8
--- /dev/null
+++ b/syft/file/digest.go
@@ -0,0 +1,6 @@
+package file
+
+type Digest struct {
+ Algorithm string `json:"algorithm"`
+ Value string `json:"value"`
+}
diff --git a/syft/file/digest_cataloger.go b/syft/file/digest_cataloger.go
new file mode 100644
index 000000000..37a8ff1b9
--- /dev/null
+++ b/syft/file/digest_cataloger.go
@@ -0,0 +1,98 @@
+package file
+
+import (
+ "crypto"
+ "fmt"
+ "hash"
+ "io"
+ "strings"
+
+ "github.com/anchore/syft/syft/source"
+)
+
+var supportedHashAlgorithms = make(map[string]crypto.Hash)
+
+type DigestsCataloger struct {
+ resolver source.FileResolver
+ hashes []crypto.Hash
+}
+
+func init() {
+ for _, h := range []crypto.Hash{
+ crypto.MD5,
+ crypto.SHA1,
+ crypto.SHA256,
+ } {
+ supportedHashAlgorithms[cleanAlgorithmName(h.String())] = h
+ }
+}
+
+func NewDigestsCataloger(resolver source.FileResolver, hashAlgorithms []string) (*DigestsCataloger, error) {
+ var hashes []crypto.Hash
+ for _, hashStr := range hashAlgorithms {
+ name := cleanAlgorithmName(hashStr)
+ hashObj, ok := supportedHashAlgorithms[name]
+ if !ok {
+ return nil, fmt.Errorf("unsupported hash algorithm: %s", hashStr)
+ }
+ hashes = append(hashes, hashObj)
+ }
+
+ return &DigestsCataloger{
+ resolver: resolver,
+ hashes: hashes,
+ }, nil
+}
+
+func (i *DigestsCataloger) Catalog() (map[source.Location][]Digest, error) {
+ results := make(map[source.Location][]Digest)
+ for location := range i.resolver.AllLocations() {
+ result, err := i.catalogLocation(location)
+ if err != nil {
+ return nil, err
+ }
+ results[location] = result
+ }
+ return results, nil
+}
+
+func (i *DigestsCataloger) catalogLocation(location source.Location) ([]Digest, error) {
+ contentReader, err := i.resolver.FileContentsByLocation(location)
+ if err != nil {
+ return nil, err
+ }
+ defer contentReader.Close()
+
+ // create a set of hasher objects tied together with a single writer to feed content into
+ hashers := make([]hash.Hash, len(i.hashes))
+ writers := make([]io.Writer, len(i.hashes))
+ for idx, hashObj := range i.hashes {
+ hashers[idx] = hashObj.New()
+ writers[idx] = hashers[idx]
+ }
+
+ size, err := io.Copy(io.MultiWriter(writers...), contentReader)
+ if err != nil {
+ return nil, fmt.Errorf("unable to observe contents of %+v: %+v", location.RealPath, err)
+ }
+
+ result := make([]Digest, len(i.hashes))
+ if size > 0 {
+ // only capture digests when there is content. It is important to do this based on SIZE and not
+ // FILE TYPE. The reasoning is that it is possible for a tar to be crafted with a header-only
+ // file type but a body is still allowed.
+ for idx, hasher := range hashers {
+ result[idx] = Digest{
+ Algorithm: cleanAlgorithmName(i.hashes[idx].String()),
+ Value: fmt.Sprintf("%+x", hasher.Sum(nil)),
+ }
+ }
+ }
+
+ return result, nil
+}
+
+func cleanAlgorithmName(name string) string {
+ lower := strings.ToLower(name)
+ return strings.Replace(lower, "-", "", -1)
+}
diff --git a/syft/file/metadata_cataloger.go b/syft/file/metadata_cataloger.go
new file mode 100644
index 000000000..8f0565902
--- /dev/null
+++ b/syft/file/metadata_cataloger.go
@@ -0,0 +1,28 @@
+package file
+
+import (
+ "github.com/anchore/syft/syft/source"
+)
+
+type MetadataCataloger struct {
+ resolver source.FileResolver
+}
+
+func NewMetadataCataloger(resolver source.FileResolver) *MetadataCataloger {
+ return &MetadataCataloger{
+ resolver: resolver,
+ }
+}
+
+func (i *MetadataCataloger) Catalog() (map[source.Location]source.FileMetadata, error) {
+ results := make(map[source.Location]source.FileMetadata)
+ for location := range i.resolver.AllLocations() {
+ metadata, err := i.resolver.FileMetadataByLocation(location)
+ if err != nil {
+ return nil, err
+ }
+
+ results[location] = metadata
+ }
+ return results, nil
+}
diff --git a/syft/lib.go b/syft/lib.go
index 7f63e2210..9b9bfd965 100644
--- a/syft/lib.go
+++ b/syft/lib.go
@@ -17,32 +17,29 @@ Similar to the cataloging process, Linux distribution identification is also per
package syft
import (
- "encoding/json"
"fmt"
- "io"
"github.com/anchore/syft/internal/bus"
"github.com/anchore/syft/internal/log"
- "github.com/anchore/syft/syft/cataloger"
"github.com/anchore/syft/syft/distro"
"github.com/anchore/syft/syft/logger"
"github.com/anchore/syft/syft/pkg"
- jsonPresenter "github.com/anchore/syft/syft/presenter/json"
+ "github.com/anchore/syft/syft/pkg/cataloger"
"github.com/anchore/syft/syft/source"
"github.com/wagoodman/go-partybus"
)
-// Catalog the given image from a particular perspective (e.g. squashed source, all-layers source). Returns the discovered
-// set of packages, the identified Linux distribution, and the source object used to wrap the data source.
-func Catalog(userInput string, scope source.Scope) (source.Source, *pkg.Catalog, *distro.Distro, error) {
- theSource, cleanup, err := source.New(userInput, scope)
- defer cleanup()
+// CatalogPackages takes an inventory of packages from the given image from a particular perspective
+// (e.g. squashed source, all-layers source). Returns the discovered set of packages, the identified Linux
+// distribution, and the source object used to wrap the data source.
+func CatalogPackages(src source.Source, scope source.Scope) (*pkg.Catalog, *distro.Distro, error) {
+ resolver, err := src.FileResolver(scope)
if err != nil {
- return source.Source{}, nil, nil, err
+ return nil, nil, fmt.Errorf("unable to determine FileResolver while cataloging packages: %w", err)
}
// find the distro
- theDistro := distro.Identify(theSource.Resolver)
+ theDistro := distro.Identify(resolver)
if theDistro != nil {
log.Infof("identified distro: %s", theDistro.String())
} else {
@@ -51,7 +48,7 @@ func Catalog(userInput string, scope source.Scope) (source.Source, *pkg.Catalog,
// conditionally use the correct set of loggers based on the input type (container image or directory)
var catalogers []cataloger.Cataloger
- switch theSource.Metadata.Scheme {
+ switch src.Metadata.Scheme {
case source.ImageScheme:
log.Info("cataloging image")
catalogers = cataloger.ImageCatalogers()
@@ -59,46 +56,15 @@ func Catalog(userInput string, scope source.Scope) (source.Source, *pkg.Catalog,
log.Info("cataloging directory")
catalogers = cataloger.DirectoryCatalogers()
default:
- return source.Source{}, nil, nil, fmt.Errorf("unable to determine cataloger set from scheme=%+v", theSource.Metadata.Scheme)
+ return nil, nil, fmt.Errorf("unable to determine cataloger set from scheme=%+v", src.Metadata.Scheme)
}
- catalog, err := cataloger.Catalog(theSource.Resolver, theDistro, catalogers...)
+ catalog, err := cataloger.Catalog(resolver, theDistro, catalogers...)
if err != nil {
- return source.Source{}, nil, nil, err
+ return nil, nil, err
}
- return theSource, catalog, theDistro, nil
-}
-
-// CatalogFromJSON takes an existing syft report and generates native syft objects.
-func CatalogFromJSON(reader io.Reader) (source.Metadata, *pkg.Catalog, *distro.Distro, error) {
- var doc jsonPresenter.Document
- var err error
- decoder := json.NewDecoder(reader)
- if err := decoder.Decode(&doc); err != nil {
- return source.Metadata{}, nil, nil, err
- }
-
- var pkgs = make([]pkg.Package, len(doc.Artifacts))
- for i, a := range doc.Artifacts {
- pkgs[i], err = a.ToPackage()
- if err != nil {
- return source.Metadata{}, nil, nil, err
- }
- }
-
- catalog := pkg.NewCatalog(pkgs...)
-
- var theDistro *distro.Distro
- if doc.Distro.Name != "" {
- d, err := distro.NewDistro(distro.Type(doc.Distro.Name), doc.Distro.Version, doc.Distro.IDLike)
- if err != nil {
- return source.Metadata{}, nil, nil, err
- }
- theDistro = &d
- }
-
- return doc.Source.ToSourceMetadata(), catalog, theDistro, nil
+ return catalog, theDistro, nil
}
// SetLogger sets the logger object used for all syft logging calls.