diff --git a/go.mod b/go.mod index ae5094679..038fe513e 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.14 require ( github.com/adrg/xdg v0.2.1 github.com/alecthomas/jsonschema v0.0.0-20200530073317-71f438968921 - github.com/anchore/client-go v0.0.0-20201216213038-a486b838e238 + github.com/anchore/client-go v0.0.0-20210222170800-9c70f9b80bcf github.com/anchore/go-rpmdb v0.0.0-20201106153645-0043963c2e12 github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04 github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b diff --git a/go.sum b/go.sum index aafeaa465..8267da7eb 100644 --- a/go.sum +++ b/go.sum @@ -126,20 +126,14 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= -github.com/anchore/client-go v0.0.0-20201216213038-a486b838e238 h1:/iI+1cj1a27ow0wj378pPJIm8sCSy6I21Tz6oLbLDQY= -github.com/anchore/client-go v0.0.0-20201216213038-a486b838e238/go.mod h1:FaODhIA06mxO1E6R32JE0TL1JWZZkmjRIAd4ULvHUKk= +github.com/anchore/client-go v0.0.0-20210222170800-9c70f9b80bcf h1:DYssiUV1pBmKqzKsm4mqXx8artqC0Q8HgZsVI3lMsAg= +github.com/anchore/client-go v0.0.0-20210222170800-9c70f9b80bcf/go.mod h1:FaODhIA06mxO1E6R32JE0TL1JWZZkmjRIAd4ULvHUKk= github.com/anchore/go-rpmdb v0.0.0-20201106153645-0043963c2e12 h1:xbeIbn5F52JVx3RUIajxCj8b0y+9lywspql4sFhcxWQ= github.com/anchore/go-rpmdb v0.0.0-20201106153645-0043963c2e12/go.mod h1:juoyWXIj7sJ1IDl4E/KIfyLtovbs5XQVSIdaQifFQT8= github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04 h1:VzprUTpc0vW0nnNKJfJieyH/TZ9UYAnTZs5/gHTdAe8= github.com/anchore/go-testutils v0.0.0-20200925183923-d5f45b0d3c04/go.mod h1:6dK64g27Qi1qGQZ67gFmBFvEHScy0/C8qhQhNe5B5pQ= github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b h1:e1bmaoJfZVsCYMrIZBpFxwV26CbsuoEh5muXD5I1Ods= github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b/go.mod h1:Bkc+JYWjMCF8OyZ340IMSIi2Ebf3uwByOk6ho4wne1E= -github.com/anchore/stereoscope v0.0.0-20210104203718-4c1d1bd9a255 h1:Ng7BDr9PQTCztANogjfEdEjjWUylhlPyZPhtarIGo00= -github.com/anchore/stereoscope v0.0.0-20210104203718-4c1d1bd9a255/go.mod h1:BMdPL0QEIYfpjQ3M7sHYZvuh6+vcomqF3TMHL8gr6Vw= -github.com/anchore/stereoscope v0.0.0-20210105000809-428eda0b2ec6 h1:JWpsV/8x1fuCYjJmNjT43cVFblLTpO/ISDnePukiTNw= -github.com/anchore/stereoscope v0.0.0-20210105000809-428eda0b2ec6/go.mod h1:BMdPL0QEIYfpjQ3M7sHYZvuh6+vcomqF3TMHL8gr6Vw= -github.com/anchore/stereoscope v0.0.0-20210105001222-7beea73cb7e5 h1:NGRfS6BZKElgiMbqdoH9iQn+6oxT7CJdZYrqgwvGkWY= -github.com/anchore/stereoscope v0.0.0-20210105001222-7beea73cb7e5/go.mod h1:BMdPL0QEIYfpjQ3M7sHYZvuh6+vcomqF3TMHL8gr6Vw= github.com/anchore/stereoscope v0.0.0-20210201165248-e94c52b4052d h1:2hv5NOZ0fD8tPk1UdGiW9PHxmjBmBLL+sFlhLXjjKgo= github.com/anchore/stereoscope v0.0.0-20210201165248-e94c52b4052d/go.mod h1:lhSEYyGLXTXMIFHAz7Ls/MNQ5EjYd5ziLxovKZp1xOs= github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8= diff --git a/internal/anchore/import_package_sbom_test.go b/internal/anchore/import_package_sbom_test.go index e97cd549d..becb74bec 100644 --- a/internal/anchore/import_package_sbom_test.go +++ b/internal/anchore/import_package_sbom_test.go @@ -107,6 +107,10 @@ func TestPackageSbomToModel(t *testing.T) { } for _, d := range deep.Equal(actualDoc, expectedDoc) { + if strings.HasSuffix(d, " != []") { + // do not consider nil vs empty collection semantics as a "difference" + continue + } t.Errorf("diff: %+v", d) } } diff --git a/internal/constants.go b/internal/constants.go index 675381dad..faddcf2ec 100644 --- a/internal/constants.go +++ b/internal/constants.go @@ -6,5 +6,5 @@ const ( // JSONSchemaVersion is the current schema version output by the JSON presenter // This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment. - JSONSchemaVersion = "1.0.1" + JSONSchemaVersion = "1.0.2" ) diff --git a/schema/json/schema-1.0.2.json b/schema/json/schema-1.0.2.json new file mode 100644 index 000000000..640081d48 --- /dev/null +++ b/schema/json/schema-1.0.2.json @@ -0,0 +1,718 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "$ref": "#/definitions/Document", + "definitions": { + "ApkFileRecord": { + "required": [ + "path" + ], + "properties": { + "path": { + "type": "string" + }, + "ownerUid": { + "type": "string" + }, + "ownerGid": { + "type": "string" + }, + "permissions": { + "type": "string" + }, + "checksum": { + "type": "string" + } + }, + "additionalProperties": false, + "type": "object" + }, + "ApkMetadata": { + "required": [ + "package", + "originPackage", + "maintainer", + "version", + "license", + "architecture", + "url", + "description", + "size", + "installedSize", + "pullDependencies", + "pullChecksum", + "gitCommitOfApkPort", + "files" + ], + "properties": { + "package": { + "type": "string" + }, + "originPackage": { + "type": "string" + }, + "maintainer": { + "type": "string" + }, + "version": { + "type": "string" + }, + "license": { + "type": "string" + }, + "architecture": { + "type": "string" + }, + "url": { + "type": "string" + }, + "description": { + "type": "string" + }, + "size": { + "type": "integer" + }, + "installedSize": { + "type": "integer" + }, + "pullDependencies": { + "type": "string" + }, + "pullChecksum": { + "type": "string" + }, + "gitCommitOfApkPort": { + "type": "string" + }, + "files": { + "items": { + "$schema": "http://json-schema.org/draft-04/schema#", + "$ref": "#/definitions/ApkFileRecord" + }, + "type": "array" + } + }, + "additionalProperties": false, + "type": "object" + }, + "Descriptor": { + "required": [ + "name", + "version" + ], + "properties": { + "name": { + "type": "string" + }, + "version": { + "type": "string" + } + }, + "additionalProperties": false, + "type": "object" + }, + "Distribution": { + "required": [ + "name", + "version", + "idLike" + ], + "properties": { + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "idLike": { + "type": "string" + } + }, + "additionalProperties": false, + "type": "object" + }, + "Document": { + "required": [ + "artifacts", + "source", + "distro", + "descriptor", + "schema", + "artifactRelationships" + ], + "properties": { + "artifacts": { + "items": { + "$schema": "http://json-schema.org/draft-04/schema#", + "$ref": "#/definitions/Package" + }, + "type": "array" + }, + "source": { + "$schema": "http://json-schema.org/draft-04/schema#", + "$ref": "#/definitions/Source" + }, + "distro": { + "$schema": "http://json-schema.org/draft-04/schema#", + "$ref": "#/definitions/Distribution" + }, + "descriptor": { + "$schema": "http://json-schema.org/draft-04/schema#", + "$ref": "#/definitions/Descriptor" + }, + "schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "$ref": "#/definitions/Schema" + }, + "artifactRelationships": { + "items": { + "$schema": "http://json-schema.org/draft-04/schema#", + "$ref": "#/definitions/Relationship" + }, + "type": "array" + } + }, + "additionalProperties": false, + "type": "object" + }, + "DpkgFileRecord": { + "required": [ + "path", + "md5" + ], + "properties": { + "path": { + "type": "string" + }, + "md5": { + "type": "string" + } + }, + "additionalProperties": false, + "type": "object" + }, + "DpkgMetadata": { + "required": [ + "package", + "source", + "version", + "sourceVersion", + "architecture", + "maintainer", + "installedSize", + "files" + ], + "properties": { + "package": { + "type": "string" + }, + "source": { + "type": "string" + }, + "version": { + "type": "string" + }, + "sourceVersion": { + "type": "string" + }, + "architecture": { + "type": "string" + }, + "maintainer": { + "type": "string" + }, + "installedSize": { + "type": "integer" + }, + "files": { + "items": { + "$schema": "http://json-schema.org/draft-04/schema#", + "$ref": "#/definitions/DpkgFileRecord" + }, + "type": "array" + } + }, + "additionalProperties": false, + "type": "object" + }, + "GemMetadata": { + "required": [ + "name", + "version" + ], + "properties": { + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "files": { + "items": { + "type": "string" + }, + "type": "array" + }, + "authors": { + "items": { + "type": "string" + }, + "type": "array" + }, + "licenses": { + "items": { + "type": "string" + }, + "type": "array" + }, + "homepage": { + "type": "string" + } + }, + "additionalProperties": false, + "type": "object" + }, + "JavaManifest": { + "properties": { + "main": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object" + }, + "namedSections": { + "patternProperties": { + ".*": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object" + } + }, + "type": "object" + } + }, + "additionalProperties": false, + "type": "object" + }, + "JavaMetadata": { + "required": [ + "virtualPath" + ], + "properties": { + "virtualPath": { + "type": "string" + }, + "manifest": { + "$schema": "http://json-schema.org/draft-04/schema#", + "$ref": "#/definitions/JavaManifest" + }, + "pomProperties": { + "$schema": "http://json-schema.org/draft-04/schema#", + "$ref": "#/definitions/PomProperties" + } + }, + "additionalProperties": false, + "type": "object" + }, + "Location": { + "required": [ + "path" + ], + "properties": { + "path": { + "type": "string" + }, + "layerID": { + "type": "string" + } + }, + "additionalProperties": false, + "type": "object" + }, + "NpmPackageJSONMetadata": { + "required": [ + "author", + "licenses", + "homepage", + "description", + "url" + ], + "properties": { + "files": { + "items": { + "type": "string" + }, + "type": "array" + }, + "author": { + "type": "string" + }, + "licenses": { + "items": { + "type": "string" + }, + "type": "array" + }, + "homepage": { + "type": "string" + }, + "description": { + "type": "string" + }, + "url": { + "type": "string" + } + }, + "additionalProperties": false, + "type": "object" + }, + "Package": { + "required": [ + "id", + "name", + "version", + "type", + "foundBy", + "locations", + "licenses", + "language", + "cpes", + "purl", + "metadataType", + "metadata" + ], + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "type": { + "type": "string" + }, + "foundBy": { + "type": "string" + }, + "locations": { + "items": { + "$schema": "http://json-schema.org/draft-04/schema#", + "$ref": "#/definitions/Location" + }, + "type": "array" + }, + "licenses": { + "items": { + "type": "string" + }, + "type": "array" + }, + "language": { + "type": "string" + }, + "cpes": { + "items": { + "type": "string" + }, + "type": "array" + }, + "purl": { + "type": "string" + }, + "metadataType": { + "type": "string" + }, + "metadata": { + "anyOf": [ + { + "type": "null" + }, + { + "$ref": "#/definitions/ApkMetadata" + }, + { + "$ref": "#/definitions/DpkgMetadata" + }, + { + "$ref": "#/definitions/GemMetadata" + }, + { + "$ref": "#/definitions/JavaMetadata" + }, + { + "$ref": "#/definitions/NpmPackageJSONMetadata" + }, + { + "$ref": "#/definitions/PythonPackageMetadata" + }, + { + "$ref": "#/definitions/RpmdbMetadata" + } + ] + } + }, + "additionalProperties": false, + "type": "object" + }, + "PomProperties": { + "required": [ + "path", + "name", + "groupId", + "artifactId", + "version", + "extraFields" + ], + "properties": { + "path": { + "type": "string" + }, + "name": { + "type": "string" + }, + "groupId": { + "type": "string" + }, + "artifactId": { + "type": "string" + }, + "version": { + "type": "string" + }, + "extraFields": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object" + } + }, + "additionalProperties": false, + "type": "object" + }, + "PythonFileDigest": { + "required": [ + "algorithm", + "value" + ], + "properties": { + "algorithm": { + "type": "string" + }, + "value": { + "type": "string" + } + }, + "additionalProperties": false, + "type": "object" + }, + "PythonFileRecord": { + "required": [ + "path" + ], + "properties": { + "path": { + "type": "string" + }, + "digest": { + "$schema": "http://json-schema.org/draft-04/schema#", + "$ref": "#/definitions/PythonFileDigest" + }, + "size": { + "type": "string" + } + }, + "additionalProperties": false, + "type": "object" + }, + "PythonPackageMetadata": { + "required": [ + "name", + "version", + "license", + "author", + "authorEmail", + "platform", + "sitePackagesRootPath" + ], + "properties": { + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "license": { + "type": "string" + }, + "author": { + "type": "string" + }, + "authorEmail": { + "type": "string" + }, + "platform": { + "type": "string" + }, + "files": { + "items": { + "$schema": "http://json-schema.org/draft-04/schema#", + "$ref": "#/definitions/PythonFileRecord" + }, + "type": "array" + }, + "sitePackagesRootPath": { + "type": "string" + }, + "topLevelPackages": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "additionalProperties": false, + "type": "object" + }, + "Relationship": { + "required": [ + "parent", + "child", + "type", + "metadata" + ], + "properties": { + "parent": { + "type": "string" + }, + "child": { + "type": "string" + }, + "type": { + "type": "string" + }, + "metadata": { + "additionalProperties": true + } + }, + "additionalProperties": false, + "type": "object" + }, + "RpmdbFileRecord": { + "required": [ + "path", + "mode", + "size", + "sha256" + ], + "properties": { + "path": { + "type": "string" + }, + "mode": { + "type": "integer" + }, + "size": { + "type": "integer" + }, + "sha256": { + "type": "string" + } + }, + "additionalProperties": false, + "type": "object" + }, + "RpmdbMetadata": { + "required": [ + "name", + "version", + "epoch", + "architecture", + "release", + "sourceRpm", + "size", + "license", + "vendor", + "files" + ], + "properties": { + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "epoch": { + "type": "integer" + }, + "architecture": { + "type": "string" + }, + "release": { + "type": "string" + }, + "sourceRpm": { + "type": "string" + }, + "size": { + "type": "integer" + }, + "license": { + "type": "string" + }, + "vendor": { + "type": "string" + }, + "files": { + "items": { + "$schema": "http://json-schema.org/draft-04/schema#", + "$ref": "#/definitions/RpmdbFileRecord" + }, + "type": "array" + } + }, + "additionalProperties": false, + "type": "object" + }, + "Schema": { + "required": [ + "version", + "url" + ], + "properties": { + "version": { + "type": "string" + }, + "url": { + "type": "string" + } + }, + "additionalProperties": false, + "type": "object" + }, + "Source": { + "required": [ + "type", + "target" + ], + "properties": { + "type": { + "type": "string" + }, + "target": { + "additionalProperties": true + } + }, + "additionalProperties": false, + "type": "object" + } + } +} diff --git a/syft/cataloger/apkdb/cataloger.go b/syft/cataloger/apkdb/cataloger.go index 4511e9d57..51ab165f7 100644 --- a/syft/cataloger/apkdb/cataloger.go +++ b/syft/cataloger/apkdb/cataloger.go @@ -5,12 +5,13 @@ package apkdb import ( "github.com/anchore/syft/syft/cataloger/common" + "github.com/anchore/syft/syft/pkg" ) // NewApkdbCataloger returns a new Alpine DB cataloger object. func NewApkdbCataloger() *common.GenericCataloger { globParsers := map[string]common.ParserFn{ - "**/lib/apk/db/installed": parseApkDB, + pkg.ApkDbGlob: parseApkDB, } return common.NewGenericCataloger(nil, globParsers, "apkdb-cataloger") diff --git a/syft/cataloger/catalog.go b/syft/cataloger/catalog.go index 6829c7182..27c10556d 100644 --- a/syft/cataloger/catalog.go +++ b/syft/cataloger/catalog.go @@ -39,6 +39,7 @@ func newMonitor() (*progress.Manual, *progress.Manual) { // request. func Catalog(resolver source.Resolver, theDistro *distro.Distro, catalogers ...Cataloger) (*pkg.Catalog, error) { catalog := pkg.NewCatalog() + filesProcessed, packagesDiscovered := newMonitor() // perform analysis, accumulating errors for each failed analysis diff --git a/syft/cataloger/deb/cataloger.go b/syft/cataloger/deb/cataloger.go index 8d206a6d3..a8d9422c4 100644 --- a/syft/cataloger/deb/cataloger.go +++ b/syft/cataloger/deb/cataloger.go @@ -14,9 +14,8 @@ import ( ) const ( - dpkgStatusGlob = "**/var/lib/dpkg/status" - md5sumsExt = ".md5sums" - docsPath = "/usr/share/doc" + md5sumsExt = ".md5sums" + docsPath = "/usr/share/doc" ) type Cataloger struct{} @@ -34,7 +33,7 @@ func (c *Cataloger) Name() string { // Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing dpkg support files. // nolint:funlen func (c *Cataloger) Catalog(resolver source.Resolver) ([]pkg.Package, error) { - dbFileMatches, err := resolver.FilesByGlob(dpkgStatusGlob) + dbFileMatches, err := resolver.FilesByGlob(pkg.DpkgDbGlob) if err != nil { return nil, fmt.Errorf("failed to find dpkg status files's by glob: %w", err) } diff --git a/syft/cataloger/rpmdb/cataloger.go b/syft/cataloger/rpmdb/cataloger.go index 59bfd53d3..6f7a93288 100644 --- a/syft/cataloger/rpmdb/cataloger.go +++ b/syft/cataloger/rpmdb/cataloger.go @@ -10,10 +10,7 @@ import ( "github.com/anchore/syft/syft/source" ) -const ( - packagesGlob = "**/var/lib/rpm/Packages" - catalogerName = "rpmdb-cataloger" -) +const catalogerName = "rpmdb-cataloger" type Cataloger struct{} @@ -29,7 +26,7 @@ func (c *Cataloger) Name() string { // Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing rpm db installation. func (c *Cataloger) Catalog(resolver source.Resolver) ([]pkg.Package, error) { - fileMatches, err := resolver.FilesByGlob(packagesGlob) + fileMatches, err := resolver.FilesByGlob(pkg.RpmDbGlob) if err != nil { return nil, fmt.Errorf("failed to find rpmdb's by glob: %w", err) } diff --git a/syft/pkg/apk_metadata.go b/syft/pkg/apk_metadata.go index 705528fd0..f8fac514f 100644 --- a/syft/pkg/apk_metadata.go +++ b/syft/pkg/apk_metadata.go @@ -1,9 +1,16 @@ package pkg import ( + "sort" + "github.com/package-url/packageurl-go" + "github.com/scylladb/go-set/strset" ) +const ApkDbGlob = "**/lib/apk/db/installed" + +var _ fileOwner = (*ApkMetadata)(nil) + // ApkMetadata represents all captured data for a Alpine DB package entry. // See the following sources for more information: // - https://wiki.alpinelinux.org/wiki/Apk_spec @@ -53,3 +60,15 @@ func (m ApkMetadata) PackageURL() string { "") return pURL.ToString() } + +func (m ApkMetadata) ownedFiles() (result []string) { + s := strset.New() + for _, f := range m.Files { + if f.Path != "" { + s.Add(f.Path) + } + } + result = s.List() + sort.Strings(result) + return result +} diff --git a/syft/pkg/apk_metadata_test.go b/syft/pkg/apk_metadata_test.go index ddf87cf95..1ff43a12b 100644 --- a/syft/pkg/apk_metadata_test.go +++ b/syft/pkg/apk_metadata_test.go @@ -1,8 +1,11 @@ package pkg import ( - "github.com/sergi/go-diff/diffmatchpatch" + "strings" "testing" + + "github.com/go-test/deep" + "github.com/sergi/go-diff/diffmatchpatch" ) func TestApkMetadata_pURL(t *testing.T) { @@ -31,3 +34,45 @@ func TestApkMetadata_pURL(t *testing.T) { }) } } + +func TestApkMetadata_fileOwner(t *testing.T) { + tests := []struct { + metadata ApkMetadata + expected []string + }{ + { + metadata: ApkMetadata{ + Files: []ApkFileRecord{ + {Path: "/somewhere"}, + {Path: "/else"}, + }, + }, + expected: []string{ + "/else", + "/somewhere", + }, + }, + { + metadata: ApkMetadata{ + Files: []ApkFileRecord{ + {Path: "/somewhere"}, + {Path: ""}, + }, + }, + expected: []string{ + "/somewhere", + }, + }, + } + + for _, test := range tests { + t.Run(strings.Join(test.expected, ","), func(t *testing.T) { + var i interface{} + i = test.metadata + actual := i.(fileOwner).ownedFiles() + for _, d := range deep.Equal(test.expected, actual) { + t.Errorf("diff: %+v", d) + } + }) + } +} diff --git a/syft/pkg/catalog.go b/syft/pkg/catalog.go index 81a1d4652..6325ae1cf 100644 --- a/syft/pkg/catalog.go +++ b/syft/pkg/catalog.go @@ -4,27 +4,29 @@ import ( "sort" "sync" - "github.com/anchore/syft/syft/source" - "github.com/anchore/syft/internal/log" ) -var nextPackageID int64 +var globsForbiddenFromBeingOwned = []string{ + ApkDbGlob, + DpkgDbGlob, + RpmDbGlob, +} // Catalog represents a collection of Packages. type Catalog struct { - byID map[ID]*Package - byType map[Type][]*Package - byFile map[source.Location][]*Package - lock sync.RWMutex + byID map[ID]*Package + idsByType map[Type][]ID + idsByPath map[string][]ID // note: this is real path or virtual path + lock sync.RWMutex } // NewCatalog returns a new empty Catalog func NewCatalog(pkgs ...Package) *Catalog { catalog := Catalog{ - byID: make(map[ID]*Package), - byType: make(map[Type][]*Package), - byFile: make(map[source.Location][]*Package), + byID: make(map[ID]*Package), + idsByType: make(map[Type][]ID), + idsByPath: make(map[string][]ID), } for _, p := range pkgs { @@ -41,52 +43,96 @@ func (c *Catalog) PackageCount() int { // Package returns the package with the given ID. func (c *Catalog) Package(id ID) *Package { - return c.byID[id] + v, exists := c.byID[id] + if !exists { + return nil + } + return v } -// PackagesByFile returns all packages that were discovered from the given source file reference. -func (c *Catalog) PackagesByFile(location source.Location) []*Package { - return c.byFile[location] +// PackagesByPath returns all packages that were discovered from the given path. +func (c *Catalog) PackagesByPath(path string) []*Package { + return c.Packages(c.idsByPath[path]) +} + +// Packages returns all packages for the given ID. +func (c *Catalog) Packages(ids []ID) (result []*Package) { + for _, i := range ids { + p, exists := c.byID[i] + if exists { + result = append(result, p) + } + } + return result } // Add a package to the Catalog. func (c *Catalog) Add(p Package) { - if p.id != 0 { - log.Errorf("package already added to catalog: %s", p) - return - } c.lock.Lock() defer c.lock.Unlock() - p.id = ID(nextPackageID) - nextPackageID++ + _, exists := c.byID[p.ID] + if exists { + log.Errorf("package ID already exists in the catalog : id=%+v %+v", p.ID, p) + return + } + + if p.ID == "" { + p.ID = newID() + } // store by package ID - c.byID[p.id] = &p + c.byID[p.ID] = &p // store by package type - _, ok := c.byType[p.Type] - if !ok { - c.byType[p.Type] = make([]*Package, 0) - } - c.byType[p.Type] = append(c.byType[p.Type], &p) + c.idsByType[p.Type] = append(c.idsByType[p.Type], p.ID) - // store by file references - for _, s := range p.Locations { - _, ok := c.byFile[s] - if !ok { - c.byFile[s] = make([]*Package, 0) + // store by file location paths + for _, l := range p.Locations { + if l.RealPath != "" { + c.idsByPath[l.RealPath] = append(c.idsByPath[l.RealPath], p.ID) + } + if l.VirtualPath != "" { + c.idsByPath[l.VirtualPath] = append(c.idsByPath[l.VirtualPath], p.ID) } - c.byFile[s] = append(c.byFile[s], &p) } } +func (c *Catalog) Remove(id ID) { + c.lock.Lock() + defer c.lock.Unlock() + + _, exists := c.byID[id] + if !exists { + log.Errorf("package ID does not exist in the catalog : id=%+v", id) + return + } + + // Remove all index references to this package ID + for t, ids := range c.idsByType { + c.idsByType[t] = removeID(id, ids) + if len(c.idsByType[t]) == 0 { + delete(c.idsByType, t) + } + } + + for p, ids := range c.idsByPath { + c.idsByPath[p] = removeID(id, ids) + if len(c.idsByPath[p]) == 0 { + delete(c.idsByPath, p) + } + } + + // Remove package + delete(c.byID, id) +} + // Enumerate all packages for the given type(s), enumerating all packages if no type is specified. func (c *Catalog) Enumerate(types ...Type) <-chan *Package { channel := make(chan *Package) go func() { defer close(channel) - for ty, packages := range c.byType { + for ty, ids := range c.idsByType { if len(types) != 0 { found := false typeCheck: @@ -100,8 +146,8 @@ func (c *Catalog) Enumerate(types ...Type) <-chan *Package { continue } } - for _, p := range packages { - channel <- p + for _, id := range ids { + channel <- c.Package(id) } } }() @@ -128,3 +174,12 @@ func (c *Catalog) Sorted(types ...Type) []*Package { return pkgs } + +func removeID(id ID, target []ID) (result []ID) { + for _, value := range target { + if value != id { + result = append(result, value) + } + } + return result +} diff --git a/syft/pkg/catalog_test.go b/syft/pkg/catalog_test.go new file mode 100644 index 000000000..bb02bbc8a --- /dev/null +++ b/syft/pkg/catalog_test.go @@ -0,0 +1,157 @@ +package pkg + +import ( + "testing" + + "github.com/scylladb/go-set/strset" + + "github.com/anchore/syft/syft/source" +) + +var catalogAddAndRemoveTestPkgs = []Package{ + { + ID: "my-id", + Locations: []source.Location{ + { + RealPath: "/a/path", + VirtualPath: "/another/path", + }, + { + RealPath: "/b/path", + VirtualPath: "/bee/path", + }, + }, + Type: RpmPkg, + }, + { + ID: "my-other-id", + Locations: []source.Location{ + { + RealPath: "/c/path", + VirtualPath: "/another/path", + }, + { + RealPath: "/d/path", + VirtualPath: "/another/path", + }, + }, + Type: NpmPkg, + }, +} + +type expectedIndexes struct { + byType map[Type]*strset.Set + byPath map[string]*strset.Set +} + +func TestCatalogAddPopulatesIndex(t *testing.T) { + tests := []struct { + name string + pkgs []Package + expectedIndexes expectedIndexes + }{ + { + name: "vanilla-add", + pkgs: catalogAddAndRemoveTestPkgs, + expectedIndexes: expectedIndexes{ + byType: map[Type]*strset.Set{ + RpmPkg: strset.New("my-id"), + NpmPkg: strset.New("my-other-id"), + }, + byPath: map[string]*strset.Set{ + "/another/path": strset.New("my-id", "my-other-id"), + "/a/path": strset.New("my-id"), + "/b/path": strset.New("my-id"), + "/bee/path": strset.New("my-id"), + "/c/path": strset.New("my-other-id"), + "/d/path": strset.New("my-other-id"), + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + c := NewCatalog(test.pkgs...) + + assertIndexes(t, c, test.expectedIndexes) + + }) + } +} + +func TestCatalogRemove(t *testing.T) { + tests := []struct { + name string + pkgs []Package + removeId ID + expectedIndexes expectedIndexes + }{ + { + name: "vanilla-add", + removeId: "my-other-id", + pkgs: catalogAddAndRemoveTestPkgs, + expectedIndexes: expectedIndexes{ + byType: map[Type]*strset.Set{ + RpmPkg: strset.New("my-id"), + }, + byPath: map[string]*strset.Set{ + "/another/path": strset.New("my-id"), + "/a/path": strset.New("my-id"), + "/b/path": strset.New("my-id"), + "/bee/path": strset.New("my-id"), + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + c := NewCatalog(test.pkgs...) + c.Remove(test.removeId) + + assertIndexes(t, c, test.expectedIndexes) + + if c.Package(test.removeId) != nil { + t.Errorf("expected package to be removed, but was found!") + } + + if c.PackageCount() != len(test.pkgs)-1 { + t.Errorf("expected count to be affected but was not") + } + + }) + } +} + +func assertIndexes(t *testing.T, c *Catalog, expectedIndexes expectedIndexes) { + // assert path index + if len(c.idsByPath) != len(expectedIndexes.byPath) { + t.Errorf("unexpected path index length: %d != %d", len(c.idsByPath), len(expectedIndexes.byPath)) + } + for path, expectedIds := range expectedIndexes.byPath { + actualIds := strset.New() + for _, p := range c.PackagesByPath(path) { + actualIds.Add(string(p.ID)) + } + + if !expectedIds.IsEqual(actualIds) { + t.Errorf("mismatched IDs for path=%q : %+v", path, strset.SymmetricDifference(actualIds, expectedIds)) + } + } + + // assert type index + if len(c.idsByType) != len(expectedIndexes.byType) { + t.Errorf("unexpected type index length: %d != %d", len(c.idsByType), len(expectedIndexes.byType)) + } + for ty, expectedIds := range expectedIndexes.byType { + actualIds := strset.New() + for p := range c.Enumerate(ty) { + actualIds.Add(string(p.ID)) + } + + if !expectedIds.IsEqual(actualIds) { + t.Errorf("mismatched IDs for type=%q : %+v", ty, strset.SymmetricDifference(actualIds, expectedIds)) + } + } +} diff --git a/syft/pkg/dpkg_metadata.go b/syft/pkg/dpkg_metadata.go index d82197029..3f91c4879 100644 --- a/syft/pkg/dpkg_metadata.go +++ b/syft/pkg/dpkg_metadata.go @@ -1,10 +1,17 @@ package pkg import ( + "sort" + "github.com/anchore/syft/syft/distro" "github.com/package-url/packageurl-go" + "github.com/scylladb/go-set/strset" ) +const DpkgDbGlob = "**/var/lib/dpkg/status" + +var _ fileOwner = (*DpkgMetadata)(nil) + // DpkgMetadata represents all captured data for a Debian package DB entry; available fields are described // at http://manpages.ubuntu.com/manpages/xenial/man1/dpkg-query.1.html in the --showformat section. type DpkgMetadata struct { @@ -44,3 +51,15 @@ func (m DpkgMetadata) PackageURL(d *distro.Distro) string { "") return pURL.ToString() } + +func (m DpkgMetadata) ownedFiles() (result []string) { + s := strset.New() + for _, f := range m.Files { + if f.Path != "" { + s.Add(f.Path) + } + } + result = s.List() + sort.Strings(result) + return +} diff --git a/syft/pkg/dpkg_metadata_test.go b/syft/pkg/dpkg_metadata_test.go index db791db81..81a0cb665 100644 --- a/syft/pkg/dpkg_metadata_test.go +++ b/syft/pkg/dpkg_metadata_test.go @@ -1,8 +1,11 @@ package pkg import ( + "strings" "testing" + "github.com/go-test/deep" + "github.com/anchore/syft/syft/distro" "github.com/sergi/go-diff/diffmatchpatch" ) @@ -50,3 +53,45 @@ func TestDpkgMetadata_pURL(t *testing.T) { }) } } + +func TestDpkgMetadata_fileOwner(t *testing.T) { + tests := []struct { + metadata DpkgMetadata + expected []string + }{ + { + metadata: DpkgMetadata{ + Files: []DpkgFileRecord{ + {Path: "/somewhere"}, + {Path: "/else"}, + }, + }, + expected: []string{ + "/else", + "/somewhere", + }, + }, + { + metadata: DpkgMetadata{ + Files: []DpkgFileRecord{ + {Path: "/somewhere"}, + {Path: ""}, + }, + }, + expected: []string{ + "/somewhere", + }, + }, + } + + for _, test := range tests { + t.Run(strings.Join(test.expected, ","), func(t *testing.T) { + var i interface{} + i = test.metadata + actual := i.(fileOwner).ownedFiles() + for _, d := range deep.Equal(test.expected, actual) { + t.Errorf("diff: %+v", d) + } + }) + } +} diff --git a/syft/pkg/file_owner.go b/syft/pkg/file_owner.go new file mode 100644 index 000000000..24520c304 --- /dev/null +++ b/syft/pkg/file_owner.go @@ -0,0 +1,5 @@ +package pkg + +type fileOwner interface { + ownedFiles() []string +} diff --git a/syft/pkg/id.go b/syft/pkg/id.go new file mode 100644 index 000000000..d152209d0 --- /dev/null +++ b/syft/pkg/id.go @@ -0,0 +1,12 @@ +package pkg + +import ( + "github.com/google/uuid" +) + +// ID represents a unique value for each package added to a package catalog. +type ID string + +func newID() ID { + return ID(uuid.New().String()) +} diff --git a/syft/pkg/ownership_by_files_relationship.go b/syft/pkg/ownership_by_files_relationship.go new file mode 100644 index 000000000..23213604d --- /dev/null +++ b/syft/pkg/ownership_by_files_relationship.go @@ -0,0 +1,86 @@ +package pkg + +import ( + "github.com/anchore/syft/internal/log" + "github.com/bmatcuk/doublestar/v2" + "github.com/scylladb/go-set/strset" +) + +type ownershipByFilesMetadata struct { + Files []string `json:"files"` +} + +func ownershipByFilesRelationships(catalog *Catalog) []Relationship { + var relationships = findOwnershipByFilesRelationships(catalog) + + var edges []Relationship + for parent, children := range relationships { + for child, files := range children { + edges = append(edges, Relationship{ + Parent: parent, + Child: child, + Type: OwnershipByFileOverlapRelationship, + Metadata: ownershipByFilesMetadata{ + Files: files.List(), + }, + }) + } + } + + return edges +} + +// findOwnershipByFilesRelationships find overlaps in file ownership with a file that defines another package. Specifically, a .Location.Path of +// a package is found to be owned by another (from the owner's .Metadata.Files[]). +func findOwnershipByFilesRelationships(catalog *Catalog) map[ID]map[ID]*strset.Set { + var relationships = make(map[ID]map[ID]*strset.Set) + + for _, candidateOwnerPkg := range catalog.Sorted() { + if candidateOwnerPkg.Metadata == nil { + continue + } + + // check to see if this is a file owner + pkgFileOwner, ok := candidateOwnerPkg.Metadata.(fileOwner) + if !ok { + continue + } + for _, ownedFilePath := range pkgFileOwner.ownedFiles() { + if matchesAny(ownedFilePath, globsForbiddenFromBeingOwned) { + // we skip over known exceptions to file ownership, such as the RPM package owning + // the RPM DB path, otherwise the RPM package would "own" all RPMs, which is not intended + continue + } + + // look for package(s) in the catalog that may be owned by this package and mark the relationship + for _, subPackage := range catalog.PackagesByPath(ownedFilePath) { + if subPackage.ID == candidateOwnerPkg.ID { + continue + } + if _, exists := relationships[candidateOwnerPkg.ID]; !exists { + relationships[candidateOwnerPkg.ID] = make(map[ID]*strset.Set) + } + + if _, exists := relationships[candidateOwnerPkg.ID][subPackage.ID]; !exists { + relationships[candidateOwnerPkg.ID][subPackage.ID] = strset.New() + } + relationships[candidateOwnerPkg.ID][subPackage.ID].Add(ownedFilePath) + } + } + } + + return relationships +} + +func matchesAny(s string, globs []string) bool { + for _, g := range globs { + matches, err := doublestar.Match(g, s) + if err != nil { + log.Errorf("failed to match glob=%q : %+v", g, err) + } + if matches { + return true + } + } + return false +} diff --git a/syft/pkg/ownership_by_files_relationship_test.go b/syft/pkg/ownership_by_files_relationship_test.go new file mode 100644 index 000000000..3e8bf4597 --- /dev/null +++ b/syft/pkg/ownership_by_files_relationship_test.go @@ -0,0 +1,175 @@ +package pkg + +import ( + "testing" + + "github.com/anchore/syft/syft/source" + "github.com/go-test/deep" +) + +func TestOwnershipByFilesRelationship(t *testing.T) { + tests := []struct { + name string + pkgs []Package + expectedRelations []Relationship + }{ + { + name: "owns-by-real-path", + pkgs: []Package{ + { + ID: "parent", + Locations: []source.Location{ + { + RealPath: "/a/path", + VirtualPath: "/another/path", + }, + { + RealPath: "/b/path", + VirtualPath: "/bee/path", + }, + }, + Type: RpmPkg, + MetadataType: RpmdbMetadataType, + Metadata: RpmdbMetadata{ + Files: []RpmdbFileRecord{ + {Path: "/owning/path/1"}, + {Path: "/owning/path/2"}, + {Path: "/d/path"}, + }, + }, + }, + { + ID: "child", + Locations: []source.Location{ + { + RealPath: "/c/path", + VirtualPath: "/another/path", + }, + { + RealPath: "/d/path", + VirtualPath: "/another/path", + }, + }, + Type: NpmPkg, + }, + }, + expectedRelations: []Relationship{ + { + Parent: "parent", + Child: "child", + Type: OwnershipByFileOverlapRelationship, + Metadata: ownershipByFilesMetadata{ + Files: []string{ + "/d/path", + }, + }, + }, + }, + }, + { + name: "owns-by-virtual-path", + pkgs: []Package{ + { + ID: "parent", + Locations: []source.Location{ + { + RealPath: "/a/path", + VirtualPath: "/some/other/path", + }, + { + RealPath: "/b/path", + VirtualPath: "/bee/path", + }, + }, + Type: RpmPkg, + MetadataType: RpmdbMetadataType, + Metadata: RpmdbMetadata{ + Files: []RpmdbFileRecord{ + {Path: "/owning/path/1"}, + {Path: "/owning/path/2"}, + {Path: "/another/path"}, + }, + }, + }, + { + ID: "child", + Locations: []source.Location{ + { + RealPath: "/c/path", + VirtualPath: "/another/path", + }, + { + RealPath: "/d/path", + VirtualPath: "", + }, + }, + Type: NpmPkg, + }, + }, + expectedRelations: []Relationship{ + { + Parent: "parent", + Child: "child", + Type: OwnershipByFileOverlapRelationship, + Metadata: ownershipByFilesMetadata{ + Files: []string{ + "/another/path", + }, + }, + }, + }, + }, + { + name: "ignore-empty-path", + pkgs: []Package{ + { + ID: "parent", + Locations: []source.Location{ + { + RealPath: "/a/path", + VirtualPath: "/some/other/path", + }, + { + RealPath: "/b/path", + VirtualPath: "/bee/path", + }, + }, + Type: RpmPkg, + MetadataType: RpmdbMetadataType, + Metadata: RpmdbMetadata{ + Files: []RpmdbFileRecord{ + {Path: "/owning/path/1"}, + {Path: "/owning/path/2"}, + {Path: ""}, + }, + }, + }, + { + ID: "child", + Locations: []source.Location{ + { + RealPath: "/c/path", + VirtualPath: "/another/path", + }, + { + RealPath: "/d/path", + VirtualPath: "", + }, + }, + Type: NpmPkg, + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + c := NewCatalog(test.pkgs...) + relationships := ownershipByFilesRelationships(c) + + for _, d := range deep.Equal(test.expectedRelations, relationships) { + t.Errorf("diff: %+v", d) + } + }) + } +} diff --git a/syft/pkg/package.go b/syft/pkg/package.go index 6475d15f5..8ae228a21 100644 --- a/syft/pkg/package.go +++ b/syft/pkg/package.go @@ -9,12 +9,9 @@ import ( "github.com/anchore/syft/syft/source" ) -// ID represents a unique value for each package added to a package catalog. -type ID int64 - // Package represents an application or library that has been bundled into a distributable format. type Package struct { - id ID // uniquely identifies a package, set by the cataloger + ID ID // uniquely identifies a package, set by the cataloger Name string // the package name Version string // the version of the package FoundBy string // the specific cataloger that discovered this package @@ -29,11 +26,6 @@ type Package struct { Metadata interface{} // additional data found while parsing the package source } -// ID returns the package ID, which is unique relative to a package catalog. -func (p Package) ID() ID { - return p.id -} - // Stringer to represent a package. func (p Package) String() string { return fmt.Sprintf("Pkg(type=%s, name=%s, version=%s)", p.Type, p.Name, p.Version) diff --git a/syft/pkg/python_package_metadata.go b/syft/pkg/python_package_metadata.go index e01771602..c1e752d21 100644 --- a/syft/pkg/python_package_metadata.go +++ b/syft/pkg/python_package_metadata.go @@ -1,5 +1,13 @@ package pkg +import ( + "sort" + + "github.com/scylladb/go-set/strset" +) + +var _ fileOwner = (*PythonPackageMetadata)(nil) + // PythonFileDigest represents the file metadata for a single file attributed to a python package. type PythonFileDigest struct { Algorithm string `json:"algorithm"` @@ -25,3 +33,15 @@ type PythonPackageMetadata struct { SitePackagesRootPath string `json:"sitePackagesRootPath"` TopLevelPackages []string `json:"topLevelPackages,omitempty"` } + +func (m PythonPackageMetadata) ownedFiles() (result []string) { + s := strset.New() + for _, f := range m.Files { + if f.Path != "" { + s.Add(f.Path) + } + } + result = s.List() + sort.Strings(result) + return result +} diff --git a/syft/pkg/python_package_metadata_test.go b/syft/pkg/python_package_metadata_test.go new file mode 100644 index 000000000..c2cd37817 --- /dev/null +++ b/syft/pkg/python_package_metadata_test.go @@ -0,0 +1,50 @@ +package pkg + +import ( + "strings" + "testing" + + "github.com/go-test/deep" +) + +func TestPythonMetadata_fileOwner(t *testing.T) { + tests := []struct { + metadata PythonPackageMetadata + expected []string + }{ + { + metadata: PythonPackageMetadata{ + Files: []PythonFileRecord{ + {Path: "/somewhere"}, + {Path: "/else"}, + }, + }, + expected: []string{ + "/else", + "/somewhere", + }, + }, + { + metadata: PythonPackageMetadata{ + Files: []PythonFileRecord{ + {Path: "/somewhere"}, + {Path: ""}, + }, + }, + expected: []string{ + "/somewhere", + }, + }, + } + + for _, test := range tests { + t.Run(strings.Join(test.expected, ","), func(t *testing.T) { + var i interface{} + i = test.metadata + actual := i.(fileOwner).ownedFiles() + for _, d := range deep.Equal(test.expected, actual) { + t.Errorf("diff: %+v", d) + } + }) + } +} diff --git a/syft/pkg/relationship.go b/syft/pkg/relationship.go new file mode 100644 index 000000000..09271e564 --- /dev/null +++ b/syft/pkg/relationship.go @@ -0,0 +1,20 @@ +package pkg + +const ( + // OwnershipByFileOverlapRelationship indicates that the parent package owns the child package made evident by the set of provided files + OwnershipByFileOverlapRelationship RelationshipType = "ownership-by-file-overlap" +) + +type RelationshipType string + +type Relationship struct { + Parent ID + Child ID + Type RelationshipType + Metadata interface{} +} + +// TODO: as more relationships are added, this function signature will probably accommodate selection +func NewRelationships(catalog *Catalog) []Relationship { + return ownershipByFilesRelationships(catalog) +} diff --git a/syft/pkg/rpmdb_metadata.go b/syft/pkg/rpmdb_metadata.go index 4cd6f3805..24cef2fc2 100644 --- a/syft/pkg/rpmdb_metadata.go +++ b/syft/pkg/rpmdb_metadata.go @@ -2,11 +2,18 @@ package pkg import ( "fmt" + "sort" + + "github.com/scylladb/go-set/strset" "github.com/anchore/syft/syft/distro" "github.com/package-url/packageurl-go" ) +const RpmDbGlob = "**/var/lib/rpm/Packages" + +var _ fileOwner = (*RpmdbMetadata)(nil) + // RpmdbMetadata represents all captured data for a RPM DB package entry. type RpmdbMetadata struct { Name string `json:"name"` @@ -52,3 +59,15 @@ func (m RpmdbMetadata) PackageURL(d *distro.Distro) string { "") return pURL.ToString() } + +func (m RpmdbMetadata) ownedFiles() (result []string) { + s := strset.New() + for _, f := range m.Files { + if f.Path != "" { + s.Add(f.Path) + } + } + result = s.List() + sort.Strings(result) + return result +} diff --git a/syft/pkg/rpmdb_metadata_test.go b/syft/pkg/rpmdb_metadata_test.go index d80ac3e15..dcdddedc3 100644 --- a/syft/pkg/rpmdb_metadata_test.go +++ b/syft/pkg/rpmdb_metadata_test.go @@ -1,8 +1,11 @@ package pkg import ( + "strings" "testing" + "github.com/go-test/deep" + "github.com/anchore/syft/syft/distro" "github.com/sergi/go-diff/diffmatchpatch" ) @@ -52,3 +55,45 @@ func TestRpmMetadata_pURL(t *testing.T) { }) } } + +func TestRpmMetadata_fileOwner(t *testing.T) { + tests := []struct { + metadata RpmdbMetadata + expected []string + }{ + { + metadata: RpmdbMetadata{ + Files: []RpmdbFileRecord{ + {Path: "/somewhere"}, + {Path: "/else"}, + }, + }, + expected: []string{ + "/else", + "/somewhere", + }, + }, + { + metadata: RpmdbMetadata{ + Files: []RpmdbFileRecord{ + {Path: "/somewhere"}, + {Path: ""}, + }, + }, + expected: []string{ + "/somewhere", + }, + }, + } + + for _, test := range tests { + t.Run(strings.Join(test.expected, ","), func(t *testing.T) { + var i interface{} + i = test.metadata + actual := i.(fileOwner).ownedFiles() + for _, d := range deep.Equal(test.expected, actual) { + t.Errorf("diff: %+v", d) + } + }) + } +} diff --git a/syft/presenter/json/document.go b/syft/presenter/json/document.go index 772d0ac58..5f04cdfcf 100644 --- a/syft/presenter/json/document.go +++ b/syft/presenter/json/document.go @@ -12,11 +12,12 @@ import ( // Document represents the syft cataloging findings as a JSON document type Document struct { - Artifacts []Package `json:"artifacts"` // Artifacts is the list of packages discovered and placed into the catalog - Source Source `json:"source"` // Source represents the original object that was cataloged - Distro Distribution `json:"distro"` // Distro represents the Linux distribution that was detected from the source - Descriptor Descriptor `json:"descriptor"` // Descriptor is a block containing self-describing information about syft - Schema Schema `json:"schema"` // Schema is a block reserved for defining the version for the shape of this JSON document and where to find the schema document to validate the shape + Artifacts []Package `json:"artifacts"` // Artifacts is the list of packages discovered and placed into the catalog + Source Source `json:"source"` // Source represents the original object that was cataloged + Distro Distribution `json:"distro"` // Distro represents the Linux distribution that was detected from the source + Descriptor Descriptor `json:"descriptor"` // Descriptor is a block containing self-describing information about syft + Schema Schema `json:"schema"` // Schema is a block reserved for defining the version for the shape of this JSON document and where to find the schema document to validate the shape + ArtifactRelationships []Relationship `json:"artifactRelationships"` } // NewDocument creates and populates a new JSON document struct from the given cataloging results. @@ -38,6 +39,7 @@ func NewDocument(catalog *pkg.Catalog, srcMetadata source.Metadata, d *distro.Di Version: internal.JSONSchemaVersion, URL: fmt.Sprintf("https://raw.githubusercontent.com/anchore/syft/main/schema/json/schema-%s.json", internal.JSONSchemaVersion), }, + ArtifactRelationships: newRelationships(pkg.NewRelationships(catalog)), } for _, p := range catalog.Sorted() { diff --git a/syft/presenter/json/package.go b/syft/presenter/json/package.go index c6d9cbdd2..e0e995a6a 100644 --- a/syft/presenter/json/package.go +++ b/syft/presenter/json/package.go @@ -16,6 +16,7 @@ type Package struct { // packageBasicMetadata contains non-ambiguous values (type-wise) from pkg.Package. type packageBasicMetadata struct { + ID string `json:"id"` Name string `json:"name"` Version string `json:"version"` Type string `json:"type"` @@ -60,6 +61,7 @@ func NewPackage(p *pkg.Package) (Package, error) { return Package{ packageBasicMetadata: packageBasicMetadata{ + ID: string(p.ID), Name: p.Name, Version: p.Version, Type: string(p.Type), @@ -89,6 +91,7 @@ func (a Package) ToPackage() (pkg.Package, error) { } return pkg.Package{ // does not include found-by and locations + ID: pkg.ID(a.ID), Name: a.Name, Version: a.Version, FoundBy: a.FoundBy, diff --git a/syft/presenter/json/presenter_test.go b/syft/presenter/json/presenter_test.go index 6241aac4a..f76215c93 100644 --- a/syft/presenter/json/presenter_test.go +++ b/syft/presenter/json/presenter_test.go @@ -31,6 +31,7 @@ func TestJsonDirsPresenter(t *testing.T) { // populate catalog with test data catalog.Add(pkg.Package{ + ID: "package-1-id", Name: "package-1", Version: "1.0.1", Type: pkg.PythonPkg, @@ -51,6 +52,7 @@ func TestJsonDirsPresenter(t *testing.T) { }, }) catalog.Add(pkg.Package{ + ID: "package-2-id", Name: "package-2", Version: "2.0.1", Type: pkg.DebPkg, @@ -113,6 +115,7 @@ func TestJsonImgsPresenter(t *testing.T) { // populate catalog with test data catalog.Add(pkg.Package{ + ID: "package-1-id", Name: "package-1", Version: "1.0.1", Locations: []source.Location{ @@ -133,6 +136,7 @@ func TestJsonImgsPresenter(t *testing.T) { }, }) catalog.Add(pkg.Package{ + ID: "package-2-id", Name: "package-2", Version: "2.0.1", Locations: []source.Location{ diff --git a/syft/presenter/json/relationship.go b/syft/presenter/json/relationship.go new file mode 100644 index 000000000..25eee2a3c --- /dev/null +++ b/syft/presenter/json/relationship.go @@ -0,0 +1,23 @@ +package json + +import "github.com/anchore/syft/syft/pkg" + +type Relationship struct { + Parent string `json:"parent"` + Child string `json:"child"` + Type string `json:"type"` + Metadata interface{} `json:"metadata"` +} + +func newRelationships(relationships []pkg.Relationship) []Relationship { + result := make([]Relationship, len(relationships)) + for i, r := range relationships { + result[i] = Relationship{ + Parent: string(r.Parent), + Child: string(r.Child), + Type: string(r.Type), + Metadata: r.Metadata, + } + } + return result +} diff --git a/syft/presenter/json/test-fixtures/snapshot/TestJsonDirsPresenter.golden b/syft/presenter/json/test-fixtures/snapshot/TestJsonDirsPresenter.golden index 917b53c8a..e4f7b6daf 100644 --- a/syft/presenter/json/test-fixtures/snapshot/TestJsonDirsPresenter.golden +++ b/syft/presenter/json/test-fixtures/snapshot/TestJsonDirsPresenter.golden @@ -1,6 +1,7 @@ { "artifacts": [ { + "id": "package-1-id", "name": "package-1", "version": "1.0.1", "type": "python", @@ -30,6 +31,7 @@ } }, { + "id": "package-2-id", "name": "package-2", "version": "2.0.1", "type": "deb", @@ -72,7 +74,8 @@ "version": "[not provided]" }, "schema": { - "version": "1.0.1", - "url": "https://raw.githubusercontent.com/anchore/syft/main/schema/json/schema-1.0.1.json" - } + "version": "1.0.2", + "url": "https://raw.githubusercontent.com/anchore/syft/main/schema/json/schema-1.0.2.json" + }, + "artifactRelationships": [] } diff --git a/syft/presenter/json/test-fixtures/snapshot/TestJsonImgsPresenter.golden b/syft/presenter/json/test-fixtures/snapshot/TestJsonImgsPresenter.golden index 4d0a023b7..b377cd718 100644 --- a/syft/presenter/json/test-fixtures/snapshot/TestJsonImgsPresenter.golden +++ b/syft/presenter/json/test-fixtures/snapshot/TestJsonImgsPresenter.golden @@ -1,6 +1,7 @@ { "artifacts": [ { + "id": "package-1-id", "name": "package-1", "version": "1.0.1", "type": "python", @@ -31,6 +32,7 @@ } }, { + "id": "package-2-id", "name": "package-2", "version": "2.0.1", "type": "deb", @@ -103,7 +105,8 @@ "version": "[not provided]" }, "schema": { - "version": "1.0.1", - "url": "https://raw.githubusercontent.com/anchore/syft/main/schema/json/schema-1.0.1.json" - } + "version": "1.0.2", + "url": "https://raw.githubusercontent.com/anchore/syft/main/schema/json/schema-1.0.2.json" + }, + "artifactRelationships": [] } diff --git a/test/integration/document_import_test.go b/test/integration/document_import_test.go index c45004034..cd80d1ca0 100644 --- a/test/integration/document_import_test.go +++ b/test/integration/document_import_test.go @@ -2,6 +2,7 @@ package integration import ( "bytes" + "strings" "testing" "github.com/anchore/stereoscope/pkg/imagetest" @@ -57,8 +58,6 @@ func TestCatalogFromJSON(t *testing.T) { var actualPackages, expectedPackages []*pkg.Package - // TODO: take out pkg.RpmdbMetadataType filter - for _, p := range expectedCatalog.Sorted() { expectedPackages = append(expectedPackages, p) } @@ -89,8 +88,7 @@ func TestCatalogFromJSON(t *testing.T) { for _, d := range deep.Equal(a, e) { // ignore errors for empty collections vs nil for select fields - // TODO: this is brittle, but not dangerously so. We should still find a better way to do this. - if d == "Licenses: [] != " { + if strings.Contains(d, "[] != ") { continue } t.Errorf(" package %d (name=%s) diff: %+v", i, e.Name, d) diff --git a/test/integration/package_ownership_relationship_test.go b/test/integration/package_ownership_relationship_test.go new file mode 100644 index 000000000..490408ccc --- /dev/null +++ b/test/integration/package_ownership_relationship_test.go @@ -0,0 +1,61 @@ +package integration + +import ( + "bytes" + "encoding/json" + "testing" + + "github.com/anchore/stereoscope/pkg/imagetest" + "github.com/anchore/syft/syft" + "github.com/anchore/syft/syft/presenter" + jsonPresenter "github.com/anchore/syft/syft/presenter/json" + "github.com/anchore/syft/syft/source" +) + +func TestPackageOwnershipRelationships(t *testing.T) { + + // ensure that the json presenter is applying artifact ownership with an image that has expected ownership relationships + tests := []struct { + fixture string + }{ + { + fixture: "image-owning-package", + }, + } + + for _, test := range tests { + t.Run(test.fixture, func(t *testing.T) { + _, cleanup := imagetest.GetFixtureImage(t, "docker-archive", test.fixture) + tarPath := imagetest.GetFixtureImageTarPath(t, test.fixture) + defer cleanup() + + src, catalog, d, err := syft.Catalog("docker-archive:"+tarPath, source.SquashedScope) + if err != nil { + t.Fatalf("failed to catalog image: %+v", err) + } + + p := presenter.GetPresenter(presenter.JSONPresenter, src.Metadata, catalog, d) + if p == nil { + t.Fatal("unable to get presenter") + } + + output := bytes.NewBufferString("") + err = p.Present(output) + if err != nil { + t.Fatalf("unable to present: %+v", err) + } + + var doc jsonPresenter.Document + decoder := json.NewDecoder(output) + if err := decoder.Decode(&doc); err != nil { + t.Fatalf("unable to decode json doc: %+v", err) + } + + if len(doc.ArtifactRelationships) == 0 { + t.Errorf("expected to find relationships between packages but found none") + } + + }) + } + +} diff --git a/test/integration/test-fixtures/image-owning-package/Dockerfile b/test/integration/test-fixtures/image-owning-package/Dockerfile new file mode 100644 index 000000000..37346c9c7 --- /dev/null +++ b/test/integration/test-fixtures/image-owning-package/Dockerfile @@ -0,0 +1,3 @@ +FROM ubuntu:20.04 +# this covers rpm-python +RUN apt-get update && apt-get install -y python-pil=6.2.1-3 \ No newline at end of file