diff --git a/syft/distro/distro.go b/syft/distro/distro.go index 7e01b2493..446357401 100644 --- a/syft/distro/distro.go +++ b/syft/distro/distro.go @@ -57,5 +57,5 @@ func (d Distro) String() string { // Name provides a string repr of the distro func (d Distro) Name() string { - return d.Type.String() + return string(d.Type) } diff --git a/syft/distro/identify_test.go b/syft/distro/identify_test.go index fc61965c2..4371e20dc 100644 --- a/syft/distro/identify_test.go +++ b/syft/distro/identify_test.go @@ -78,8 +78,8 @@ func TestIdentifyDistro(t *testing.T) { observedDistros := internal.NewStringSet() definedDistros := internal.NewStringSet() - for _, d := range All { - definedDistros.Add(d.String()) + for _, distroType := range All { + definedDistros.Add(string(distroType)) } for _, test := range tests { diff --git a/syft/distro/type.go b/syft/distro/type.go index 0f42fca07..9b6f4c035 100644 --- a/syft/distro/type.go +++ b/syft/distro/type.go @@ -1,37 +1,22 @@ package distro +type Type string + const ( - UnknownDistroType Type = iota - Debian - Ubuntu - RedHat - CentOS - Fedora - Alpine - Busybox - AmazonLinux - OracleLinux - ArchLinux - OpenSuseLeap + UnknownDistroType Type = "UnknownDistroType" + Debian Type = "debian" + Ubuntu Type = "ubuntu" + RedHat Type = "redhat" + CentOS Type = "centos" + Fedora Type = "fedora" + Alpine Type = "alpine" + Busybox Type = "busybox" + AmazonLinux Type = "amazonlinux" + OracleLinux Type = "oraclelinux" + ArchLinux Type = "archlinux" + OpenSuseLeap Type = "opensuseleap" ) -type Type int - -var distroStr = []string{ - "UnknownDistroType", - "debian", - "ubuntu", - "redhat", - "centos", - "fedora", - "alpine", - "busybox", - "amazn", - "oraclelinux", - "archlinux", - "opensuse-leap", -} - var All = []Type{ Debian, Ubuntu, @@ -46,14 +31,6 @@ var All = []Type{ OpenSuseLeap, } -func (t Type) String() string { - if int(t) >= len(distroStr) || t < 0 { - return distroStr[0] - } - - return distroStr[t] -} - // IDMapping connects a distro ID like "ubuntu" to a Distro type var IDMapping = map[string]Type{ "debian": Debian, @@ -68,3 +45,7 @@ var IDMapping = map[string]Type{ "arch": ArchLinux, "opensuse-leap": OpenSuseLeap, } + +func (t Type) String() string { + return string(t) +} diff --git a/syft/lib.go b/syft/lib.go index cb7dc66a1..87f69fe1e 100644 --- a/syft/lib.go +++ b/syft/lib.go @@ -17,7 +17,9 @@ Similar to the cataloging process, Linux distribution identification is also per package syft import ( + "encoding/json" "fmt" + "io" "github.com/anchore/syft/internal/bus" "github.com/anchore/syft/internal/log" @@ -25,6 +27,7 @@ import ( "github.com/anchore/syft/syft/distro" "github.com/anchore/syft/syft/logger" "github.com/anchore/syft/syft/pkg" + jsonPresenter "github.com/anchore/syft/syft/presenter/json" "github.com/anchore/syft/syft/scope" "github.com/wagoodman/go-partybus" ) @@ -79,6 +82,42 @@ func CatalogFromScope(s scope.Scope) (*pkg.Catalog, error) { return cataloger.Catalog(s.Resolver, catalogers...) } +// TODO: we shouldn't return the jsonPresenter.Image object! this is leaky +func CatalogFromJSON(reader io.Reader) (*pkg.Catalog, *distro.Distro, error) { + var doc jsonPresenter.Document + decoder := json.NewDecoder(reader) + if err := decoder.Decode(&doc); err != nil { + return nil, nil, err + } + + var pkgs = make([]pkg.Package, len(doc.Artifacts)) + for i, a := range doc.Artifacts { + pkgs[i] = a.ToPackage() + } + + catalog := pkg.NewCatalog(pkgs...) + + var distroType distro.Type + if doc.Distro.Name == "" { + distroType = distro.UnknownDistroType + } else { + distroType = distro.Type(doc.Distro.Name) + } + + d, err := distro.NewDistro(distroType, doc.Distro.Version, doc.Distro.IDLike) + if err != nil { + return nil, nil, err + } + + //var theImg *jsonPresenter.Image + //if doc.Source.Type == "image" { + // img := doc.Source.Target.(jsonPresenter.Image) + // theImg = &img + //} + + return catalog, &d, nil +} + // SetLogger sets the logger object used for all syft logging calls. func SetLogger(logger logger.Logger) { log.Log = logger diff --git a/syft/pkg/catalog.go b/syft/pkg/catalog.go index 5afde2998..0e7ae49e0 100644 --- a/syft/pkg/catalog.go +++ b/syft/pkg/catalog.go @@ -19,12 +19,18 @@ type Catalog struct { } // NewCatalog returns a new empty Catalog -func NewCatalog() *Catalog { - return &Catalog{ +func NewCatalog(pkgs ...Package) *Catalog { + catalog := Catalog{ byID: make(map[ID]*Package), byType: make(map[Type][]*Package), byFile: make(map[file.Reference][]*Package), } + + for _, p := range pkgs { + catalog.Add(p) + } + + return &catalog } // PackageCount returns the total number of packages that have been added. @@ -111,6 +117,9 @@ func (c *Catalog) Sorted(types ...Type) []*Package { sort.SliceStable(pkgs, func(i, j int) bool { if pkgs[i].Name == pkgs[j].Name { + if pkgs[i].Version == pkgs[j].Version { + return pkgs[i].Type < pkgs[j].Type + } return pkgs[i].Version < pkgs[j].Version } return pkgs[i].Name < pkgs[j].Name diff --git a/syft/pkg/language.go b/syft/pkg/language.go index 75032937d..6fc1b184a 100644 --- a/syft/pkg/language.go +++ b/syft/pkg/language.go @@ -1,25 +1,16 @@ package pkg +type Language string + const ( - UnknownLanguage Language = iota - Java - JavaScript - Python - Ruby - Go + UnknownLanguage Language = "UnknownLanguage" + Java Language = "java" + JavaScript Language = "javascript" + Python Language = "python" + Ruby Language = "ruby" + Go Language = "go" ) -type Language uint - -var languageStr = []string{ - "UnknownLanguage", - "java", - "javascript", - "python", - "ruby", - "go", -} - var AllLanguages = []Language{ Java, JavaScript, @@ -28,9 +19,6 @@ var AllLanguages = []Language{ Go, } -func (t Language) String() string { - if int(t) >= len(languageStr) { - return languageStr[0] - } - return languageStr[t] +func (l Language) String() string { + return string(l) } diff --git a/syft/pkg/package.go b/syft/pkg/package.go index c3224a07f..805c9fb6c 100644 --- a/syft/pkg/package.go +++ b/syft/pkg/package.go @@ -23,11 +23,11 @@ type Package struct { FoundBy string `json:"foundBy"` // the specific cataloger that discovered this package Source []file.Reference `json:"sources"` // the locations that lead to the discovery of this package (note: this is not necessarily the locations that make up this package) // TODO: should we move licenses into metadata? - Licenses []string `json:"licenses"` // licenses discovered with the package metadata - Language Language `json:"language"` // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc) - Type Type `json:"type"` // the package type (e.g. Npm, Yarn, Python, Rpm, Deb, etc) - MetadataType MetadataType `json:"metadataType"` // the shape of the additional data in the "metadata" field - Metadata interface{} `json:"metadata,omitempty"` // additional data found while parsing the package source + Licenses []string `json:"licenses"` // licenses discovered with the package metadata + Language Language `json:"language"` // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc) + Type Type `json:"type"` // the package type (e.g. Npm, Yarn, Python, Rpm, Deb, etc) + MetadataType MetadataType `json:"metadataType,omitempty"` // the shape of the additional data in the "metadata" field + Metadata interface{} `json:"metadata,omitempty"` // additional data found while parsing the package source } // ID returns the package ID, which is unique relative to a package catalog. diff --git a/syft/presenter/json/artifact.go b/syft/presenter/json/artifact.go index 3c7957cef..a031e33d2 100644 --- a/syft/presenter/json/artifact.go +++ b/syft/presenter/json/artifact.go @@ -1,18 +1,36 @@ package json import ( + "encoding/json" + "fmt" + "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/scope" ) type Artifact struct { - Name string `json:"name"` - Version string `json:"version"` - Type string `json:"type"` - FoundBy []string `json:"foundBy"` - Locations Locations `json:"locations,omitempty"` - Licenses []string `json:"licenses"` - Metadata interface{} `json:"metadata,omitempty"` + ArtifactBasicMetadata + ArtifactCustomMetadata +} + +type ArtifactBasicMetadata struct { + Name string `json:"name"` + Version string `json:"version"` + Type string `json:"type"` + FoundBy []string `json:"foundBy"` + Locations Locations `json:"locations,omitempty"` + Licenses []string `json:"licenses"` + Language string `json:"language"` +} + +type ArtifactCustomMetadata struct { + MetadataType pkg.MetadataType `json:"metadataType"` + Metadata interface{} `json:"metadata,omitempty"` +} + +type ArtifactMetadataUnpacker struct { + MetadataType string `json:"metadataType"` + Metadata json.RawMessage `json:"metadata"` } func NewArtifact(p *pkg.Package, s scope.Scope) (Artifact, error) { @@ -22,12 +40,98 @@ func NewArtifact(p *pkg.Package, s scope.Scope) (Artifact, error) { } return Artifact{ - Name: p.Name, - Version: p.Version, - Type: string(p.Type), - FoundBy: []string{p.FoundBy}, - Locations: locations, - Licenses: p.Licenses, - Metadata: p.Metadata, + ArtifactBasicMetadata: ArtifactBasicMetadata{ + Name: p.Name, + Version: p.Version, + Type: string(p.Type), + FoundBy: []string{p.FoundBy}, + Locations: locations, + Licenses: p.Licenses, + Language: string(p.Language), + }, + ArtifactCustomMetadata: ArtifactCustomMetadata{ + MetadataType: p.MetadataType, + Metadata: p.Metadata, + }, }, nil } + +func (a Artifact) ToPackage() pkg.Package { + return pkg.Package{ + // does not include found-by and locations + Name: a.Name, + Version: a.Version, + Licenses: a.Licenses, + Language: pkg.Language(a.Language), + Type: pkg.Type(a.Type), + MetadataType: a.MetadataType, + Metadata: a.Metadata, + } +} + +func (a *Artifact) UnmarshalJSON(b []byte) error { + var basic ArtifactBasicMetadata + if err := json.Unmarshal(b, &basic); err != nil { + return err + } + a.ArtifactBasicMetadata = basic + + var unpacker ArtifactMetadataUnpacker + if err := json.Unmarshal(b, &unpacker); err != nil { + return err + } + + a.MetadataType = pkg.MetadataType(unpacker.MetadataType) + + switch a.MetadataType { + case pkg.RpmdbMetadataType: + var payload pkg.RpmdbMetadata + if err := json.Unmarshal(unpacker.Metadata, &payload); err != nil { + return err + } + a.Metadata = payload + case pkg.PythonPackageMetadataType: + var payload pkg.PythonPackageMetadata + if err := json.Unmarshal(unpacker.Metadata, &payload); err != nil { + return err + } + a.Metadata = payload + case pkg.DpkgMetadataType: + var payload pkg.DpkgMetadata + if err := json.Unmarshal(unpacker.Metadata, &payload); err != nil { + return err + } + a.Metadata = payload + case pkg.ApkMetadataType: + var payload pkg.ApkMetadata + if err := json.Unmarshal(unpacker.Metadata, &payload); err != nil { + return err + } + a.Metadata = payload + case pkg.JavaMetadataType: + var payload pkg.JavaMetadata + if err := json.Unmarshal(unpacker.Metadata, &payload); err != nil { + return err + } + a.Metadata = payload + case pkg.NpmPackageJSONMetadataType: + var payload pkg.NpmPackageJSONMetadata + if err := json.Unmarshal(unpacker.Metadata, &payload); err != nil { + return err + } + a.Metadata = payload + case pkg.GemMetadataType: + var payload pkg.GemMetadata + if err := json.Unmarshal(unpacker.Metadata, &payload); err != nil { + return err + } + a.Metadata = payload + case "": + // there may be packages with no metadata, which is OK + default: + return fmt.Errorf("unsupported package metadata type: %+v", a.MetadataType) + + } + + return nil +} diff --git a/syft/presenter/json/document.go b/syft/presenter/json/document.go index 713dc7d7a..e1ca6dc51 100644 --- a/syft/presenter/json/document.go +++ b/syft/presenter/json/document.go @@ -1,18 +1,31 @@ package json import ( + "time" + + "github.com/anchore/syft/internal" + "github.com/anchore/syft/internal/version" "github.com/anchore/syft/syft/distro" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/scope" ) type Document struct { - Artifacts []Artifact `json:"artifacts"` - Source Source `json:"source"` - Distro Distribution `json:"distro"` + Artifacts []Artifact `json:"artifacts"` + Source Source `json:"source"` + Distro Distribution `json:"distro"` + Descriptor Descriptor `json:"descriptor"` } -// Distritbution provides information about a detected Linux Distribution +// Descriptor describes what created the document as well as surrounding metadata +type Descriptor struct { + Name string `json:"name"` + Version string `json:"version"` + ReportTimestamp string `json:"reportTimestamp"` + // TODO: we should include scope option here as well (or in source) +} + +// Distribution provides information about a detected Linux Distribution type Distribution struct { Name string `json:"name"` Version string `json:"version"` @@ -20,23 +33,29 @@ type Distribution struct { } func NewDocument(catalog *pkg.Catalog, s scope.Scope, d distro.Distro) (Document, error) { - doc := Document{ - Artifacts: make([]Artifact, 0), - } - src, err := NewSource(s) if err != nil { return Document{}, nil } - doc.Source = src + distroName := d.Name() if distroName == "UnknownDistroType" { distroName = "" } - doc.Distro = Distribution{ - Name: distroName, - Version: d.FullVersion(), - IDLike: d.IDLike, + + doc := Document{ + Artifacts: make([]Artifact, 0), + Source: src, + Distro: Distribution{ + Name: distroName, + Version: d.FullVersion(), + IDLike: d.IDLike, + }, + Descriptor: Descriptor{ + Name: internal.ApplicationName, + Version: version.FromBuild().Version, + ReportTimestamp: time.Now().Format(time.RFC3339), + }, } for _, p := range catalog.Sorted() { diff --git a/syft/presenter/json/presenter_test.go b/syft/presenter/json/presenter_test.go index b935050a2..f60d53dba 100644 --- a/syft/presenter/json/presenter_test.go +++ b/syft/presenter/json/presenter_test.go @@ -25,11 +25,18 @@ func TestJsonDirsPresenter(t *testing.T) { catalog.Add(pkg.Package{ Name: "package-1", Version: "1.0.1", - Type: pkg.DebPkg, + Type: pkg.PythonPkg, FoundBy: "the-cataloger-1", Source: []file.Reference{ {Path: "/some/path/pkg1"}, }, + Language: pkg.Python, + MetadataType: pkg.PythonPackageMetadataType, + Licenses: []string{"MIT"}, + Metadata: pkg.PythonPackageMetadata{ + Name: "package-1", + Version: "1.0.1", + }, }) catalog.Add(pkg.Package{ Name: "package-2", @@ -39,6 +46,11 @@ func TestJsonDirsPresenter(t *testing.T) { Source: []file.Reference{ {Path: "/some/path/pkg1"}, }, + MetadataType: pkg.DpkgMetadataType, + Metadata: pkg.DpkgMetadata{ + Package: "package-2", + Version: "2.0.1", + }, }) d := distro.NewUnknownDistro() s, err := scope.NewScopeFromDir("/some/path") @@ -87,8 +99,15 @@ func TestJsonImgsPresenter(t *testing.T) { Source: []file.Reference{ *img.SquashedTree().File("/somefile-1.txt"), }, - Type: pkg.DebPkg, - FoundBy: "the-cataloger-1", + Type: pkg.PythonPkg, + FoundBy: "the-cataloger-1", + Language: pkg.Python, + MetadataType: pkg.PythonPackageMetadataType, + Licenses: []string{"MIT"}, + Metadata: pkg.PythonPackageMetadata{ + Name: "package-1", + Version: "1.0.1", + }, }) catalog.Add(pkg.Package{ Name: "package-2", @@ -96,8 +115,13 @@ func TestJsonImgsPresenter(t *testing.T) { Source: []file.Reference{ *img.SquashedTree().File("/somefile-2.txt"), }, - Type: pkg.DebPkg, - FoundBy: "the-cataloger-2", + Type: pkg.DebPkg, + FoundBy: "the-cataloger-2", + MetadataType: pkg.DpkgMetadataType, + Metadata: pkg.DpkgMetadata{ + Package: "package-2", + Version: "2.0.1", + }, }) s, err := scope.NewScopeFromImage(img, scope.AllLayersScope) diff --git a/syft/presenter/json/source.go b/syft/presenter/json/source.go index 471390999..d103bc567 100644 --- a/syft/presenter/json/source.go +++ b/syft/presenter/json/source.go @@ -1,6 +1,7 @@ package json import ( + "encoding/json" "fmt" "github.com/anchore/syft/syft/scope" @@ -11,6 +12,11 @@ type Source struct { Target interface{} `json:"target"` } +type SourceUnpacker struct { + Type string `json:"type"` + Target json.RawMessage `json:"target"` +} + func NewSource(s scope.Scope) (Source, error) { switch src := s.Source.(type) { case scope.ImageSource: @@ -27,3 +33,26 @@ func NewSource(s scope.Scope) (Source, error) { return Source{}, fmt.Errorf("unsupported source: %T", src) } } + +func (s *Source) UnmarshalJSON(b []byte) error { + var unpacker SourceUnpacker + if err := json.Unmarshal(b, &unpacker); err != nil { + return err + } + + s.Type = unpacker.Type + + switch s.Type { + case "image": + var payload Image + if err := json.Unmarshal(unpacker.Target, &payload); err != nil { + return err + } + s.Target = payload + default: + return fmt.Errorf("unsupported package metadata type: %+v", s.Type) + + } + + return nil +} diff --git a/test/integration/document_import_test.go b/test/integration/document_import_test.go new file mode 100644 index 000000000..f42492a21 --- /dev/null +++ b/test/integration/document_import_test.go @@ -0,0 +1,92 @@ +package integration + +import ( + "bytes" + "testing" + + "github.com/anchore/stereoscope/pkg/imagetest" + "github.com/anchore/syft/syft" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/presenter/json" + "github.com/anchore/syft/syft/scope" + "github.com/go-test/deep" +) + +func TestCatalogFromJSON(t *testing.T) { + + // ensure each of our fixture images results in roughly the same shape when: + // generate json -> import json -> assert packages and distro are the same (except for select fields) + + tests := []struct { + fixture string + }{ + { + fixture: "image-pkg-coverage", + }, + } + + for _, test := range tests { + t.Run(test.fixture, func(t *testing.T) { + _, cleanup := imagetest.GetFixtureImage(t, "docker-archive", test.fixture) + tarPath := imagetest.GetFixtureImageTarPath(t, test.fixture) + defer cleanup() + + expectedCatalog, s, expectedDistro, err := syft.Catalog("docker-archive:"+tarPath, scope.AllLayersScope) + if err != nil { + t.Fatalf("failed to catalog image: %+v", err) + } + + var buf bytes.Buffer + jsonPres := json.NewPresenter(expectedCatalog, *s, *expectedDistro) + if err = jsonPres.Present(&buf); err != nil { + t.Fatalf("failed to write to presenter: %+v", err) + } + + // TODO: test img + + actualCatalog, actualDistro, err := syft.CatalogFromJSON(&buf) + if err != nil { + t.Fatalf("failed to import document: %+v", err) + } + + for _, d := range deep.Equal(actualDistro, expectedDistro) { + t.Errorf(" distro diff: %+v", d) + } + + var actualPackages, expectedPackages []*pkg.Package + + // TODO: take out pkg.RpmdbMetadataType filter + + for _, p := range expectedCatalog.Sorted() { + expectedPackages = append(expectedPackages, p) + } + + for _, p := range actualCatalog.Sorted() { + actualPackages = append(actualPackages, p) + } + + if len(actualPackages) != len(expectedPackages) { + t.Fatalf("mismatched package length: %d != %d", len(actualPackages), len(expectedPackages)) + } + + for i, e := range expectedPackages { + a := actualPackages[i] + + // omit fields that should be missing + e.Source = nil + e.FoundBy = "" + if e.MetadataType == pkg.JavaMetadataType { + metadata := e.Metadata.(pkg.JavaMetadata) + metadata.Parent = nil + e.Metadata = metadata + } + + for _, d := range deep.Equal(a, e) { + t.Errorf(" package %d (name=%s) diff: %+v", i, e.Name, d) + } + } + + }) + } + +}