diff --git a/internal/parse.go b/internal/parse.go new file mode 100644 index 000000000..300825c98 --- /dev/null +++ b/internal/parse.go @@ -0,0 +1,15 @@ +package internal + +import "regexp" + +// MatchCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map. +func MatchCaptureGroups(regEx *regexp.Regexp, str string) map[string]string { + match := regEx.FindStringSubmatch(str) + results := make(map[string]string) + for i, name := range regEx.SubexpNames() { + if i > 0 && i <= len(match) { + results[name] = match[i] + } + } + return results +} diff --git a/schema/json/schema.json b/schema/json/schema.json index 440b271fc..0b32b6a65 100644 --- a/schema/json/schema.json +++ b/schema/json/schema.json @@ -53,62 +53,55 @@ "type": "integer" }, "files": { - "anyOf": [ - { - "type": "null" - }, - { - "items": { - "anyOf": [ - { + "items": { + "anyOf": [ + { + "type": "string" + }, + { + "properties": { + "checksum": { "type": "string" }, - { + "digest": { "properties": { - "checksum": { + "algorithm": { "type": "string" }, - "digest": { - "properties": { - "algorithm": { - "type": "string" - }, - "value": { - "type": "string" - } - }, - "required": [ - "algorithm", - "value" - ], - "type": "object" - }, - "ownerGid": { - "type": "string" - }, - "ownerUid": { - "type": "string" - }, - "path": { - "type": "string" - }, - "permissions": { - "type": "string" - }, - "size": { + "value": { "type": "string" } }, "required": [ - "path" + "algorithm", + "value" ], "type": "object" + }, + "ownerGid": { + "type": "string" + }, + "ownerUid": { + "type": "string" + }, + "path": { + "type": "string" + }, + "permissions": { + "type": "string" + }, + "size": { + "type": "string" } - ] - }, - "type": "array" - } - ] + }, + "required": [ + "path" + ], + "type": "object" + } + ] + }, + "type": "array" }, "gitCommitOfApkPort": { "type": "string" diff --git a/syft/cataloger/javascript/parse_package_json.go b/syft/cataloger/javascript/parse_package_json.go index bcdc68801..654f1b68b 100644 --- a/syft/cataloger/javascript/parse_package_json.go +++ b/syft/cataloger/javascript/parse_package_json.go @@ -4,6 +4,11 @@ import ( "encoding/json" "fmt" "io" + "regexp" + + "github.com/anchore/syft/internal" + + "github.com/mitchellh/mapstructure" "github.com/anchore/syft/syft/cataloger/common" "github.com/anchore/syft/syft/pkg" @@ -16,12 +21,90 @@ var _ common.ParserFn = parsePackageLock type PackageJSON struct { Version string `json:"version"` Latest []string `json:"latest"` - Author string `json:"author"` + Author Author `json:"author"` License string `json:"license"` Name string `json:"name"` Homepage string `json:"homepage"` Description string `json:"description"` Dependencies map[string]string `json:"dependencies"` + Repository Repository `json:"repository"` +} + +type Author struct { + Name string `json:"name" mapstruct:"name"` + Email string `json:"email" mapstruct:"email"` + URL string `json:"url" mapstruct:"url"` +} + +type Repository struct { + Type string `json:"type" mapstructure:"type"` + URL string `json:"url" mapstructure:"url"` +} + +// match example: "author": "Isaac Z. Schlueter (http://blog.izs.me)" +// ---> name: "Isaac Z. Schlueter" email: "i@izs.me" url: "http://blog.izs.me" +var authorPattern = regexp.MustCompile(`^\s*(?P[^<(]*)(\s+<(?P.*)>)?(\s\((?P.*)\))?\s*$`) + +// Exports Author.UnmarshalJSON interface to help normalize the json structure. +func (a *Author) UnmarshalJSON(b []byte) error { + var authorStr string + var fields map[string]string + var author Author + + if err := json.Unmarshal(b, &authorStr); err != nil { + // string parsing did not work, assume a map was given + // for more information: https://docs.npmjs.com/files/package.json#people-fields-author-contributors + if err := json.Unmarshal(b, &fields); err != nil { + return fmt.Errorf("unable to parse package.json author: %w", err) + } + } else { + // parse out "name (url)" into an Author struct + fields = internal.MatchCaptureGroups(authorPattern, authorStr) + } + + // translate the map into a structure + if err := mapstructure.Decode(fields, &author); err != nil { + return fmt.Errorf("unable to decode package.json author: %w", err) + } + + *a = author + + return nil +} + +func (a *Author) AuthorString() string { + result := a.Name + if a.Email != "" { + result += fmt.Sprintf(" <%s>", a.Email) + } + if a.URL != "" { + result += fmt.Sprintf(" (%s)", a.URL) + } + return result +} + +func (r *Repository) UnmarshalJSON(b []byte) error { + var repositoryStr string + var fields map[string]string + var repository Repository + + if err := json.Unmarshal(b, &repositoryStr); err != nil { + // string parsing did not work, assume a map was given + // for more information: https://docs.npmjs.com/files/package.json#people-fields-author-contributors + if err := json.Unmarshal(b, &fields); err != nil { + return fmt.Errorf("unable to parse package.json author: %w", err) + } + // translate the map into a structure + if err := mapstructure.Decode(fields, &repository); err != nil { + return fmt.Errorf("unable to decode package.json author: %w", err) + } + + *r = repository + } else { + r.URL = repositoryStr + } + + return nil } // parsePackageJson parses a package.json and returns the discovered JavaScript packages. @@ -45,8 +128,10 @@ func parsePackageJSON(_ string, reader io.Reader) ([]pkg.Package, error) { Type: pkg.NpmPkg, MetadataType: pkg.NpmPackageJSONMetadataType, Metadata: pkg.NpmPackageJSONMetadata{ - Author: p.Author, + Author: p.Author.AuthorString(), Homepage: p.Homepage, + URL: p.Repository.URL, + Licenses: []string{p.License}, }, }) } diff --git a/syft/cataloger/javascript/parse_package_json_test.go b/syft/cataloger/javascript/parse_package_json_test.go index bfe46f4a0..869d4293f 100644 --- a/syft/cataloger/javascript/parse_package_json_test.go +++ b/syft/cataloger/javascript/parse_package_json_test.go @@ -9,36 +9,85 @@ import ( ) func TestParsePackageJSON(t *testing.T) { - expected := pkg.Package{ - Name: "npm", - Version: "6.14.6", - Type: pkg.NpmPkg, - Licenses: []string{"Artistic-2.0"}, - Language: pkg.JavaScript, - MetadataType: pkg.NpmPackageJSONMetadataType, - Metadata: pkg.NpmPackageJSONMetadata{ - Author: "Isaac Z. Schlueter (http://blog.izs.me)", - Homepage: "https://docs.npmjs.com/", + tests := []struct { + Fixture string + ExpectedPkg pkg.Package + }{ + { + Fixture: "test-fixtures/pkg-json/package.json", + ExpectedPkg: pkg.Package{ + Name: "npm", + Version: "6.14.6", + Type: pkg.NpmPkg, + Licenses: []string{"Artistic-2.0"}, + Language: pkg.JavaScript, + MetadataType: pkg.NpmPackageJSONMetadataType, + Metadata: pkg.NpmPackageJSONMetadata{ + Author: "Isaac Z. Schlueter (http://blog.izs.me)", + Homepage: "https://docs.npmjs.com/", + URL: "https://github.com/npm/cli", + Licenses: []string{"Artistic-2.0"}, + }, + }, + }, + { + Fixture: "test-fixtures/pkg-json/package-nested-author.json", + ExpectedPkg: pkg.Package{ + Name: "npm", + Version: "6.14.6", + Type: pkg.NpmPkg, + Licenses: []string{"Artistic-2.0"}, + Language: pkg.JavaScript, + MetadataType: pkg.NpmPackageJSONMetadataType, + Metadata: pkg.NpmPackageJSONMetadata{ + Author: "Isaac Z. Schlueter (http://blog.izs.me)", + Homepage: "https://docs.npmjs.com/", + URL: "https://github.com/npm/cli", + Licenses: []string{"Artistic-2.0"}, + }, + }, + }, + { + Fixture: "test-fixtures/pkg-json/package-repo-string.json", + ExpectedPkg: pkg.Package{ + Name: "function-bind", + Version: "1.1.1", + Type: pkg.NpmPkg, + Licenses: []string{"MIT"}, + Language: pkg.JavaScript, + MetadataType: pkg.NpmPackageJSONMetadataType, + Metadata: pkg.NpmPackageJSONMetadata{ + Author: "Raynos ", + Homepage: "https://github.com/Raynos/function-bind", + URL: "git://github.com/Raynos/function-bind.git", + Licenses: []string{"MIT"}, + }, + }, }, } - fixture, err := os.Open("test-fixtures/pkg-json/package.json") - if err != nil { - t.Fatalf("failed to open fixture: %+v", err) - } - actual, err := parsePackageJSON(fixture.Name(), fixture) - if err != nil { - t.Fatalf("failed to parse package-lock.json: %+v", err) - } - if len(actual) != 1 { - for _, a := range actual { - t.Log(" ", a) - } - t.Fatalf("unexpected package count: %d!=1", len(actual)) - } + for _, test := range tests { + t.Run(test.Fixture, func(t *testing.T) { + fixture, err := os.Open(test.Fixture) + if err != nil { + t.Fatalf("failed to open fixture: %+v", err) + } - for _, d := range deep.Equal(actual[0], expected) { - t.Errorf("diff: %+v", d) - } + actual, err := parsePackageJSON(fixture.Name(), fixture) + if err != nil { + t.Fatalf("failed to parse package-lock.json: %+v", err) + } + if len(actual) != 1 { + for _, a := range actual { + t.Log(" ", a) + } + t.Fatalf("unexpected package count: %d!=1", len(actual)) + } + for _, d := range deep.Equal(actual[0], test.ExpectedPkg) { + + t.Errorf("diff: %+v", d) + } + }) + } } diff --git a/syft/cataloger/javascript/test-fixtures/pkg-json/package-nested-author.json b/syft/cataloger/javascript/test-fixtures/pkg-json/package-nested-author.json new file mode 100644 index 000000000..ac2438a11 --- /dev/null +++ b/syft/cataloger/javascript/test-fixtures/pkg-json/package-nested-author.json @@ -0,0 +1,16 @@ +{ + "version": "6.14.6", + "name": "npm", + "description": "a package manager for JavaScript", + "homepage": "https://docs.npmjs.com/", + "author": { + "name": "Isaac Z. Schlueter", + "email": "i@izs.me", + "url": "http://blog.izs.me" + }, + "repository": { + "type": "git", + "url": "https://github.com/npm/cli" + }, + "license": "Artistic-2.0" +} \ No newline at end of file diff --git a/syft/cataloger/javascript/test-fixtures/pkg-json/package-repo-string.json b/syft/cataloger/javascript/test-fixtures/pkg-json/package-repo-string.json new file mode 100644 index 000000000..d42a73b5c --- /dev/null +++ b/syft/cataloger/javascript/test-fixtures/pkg-json/package-repo-string.json @@ -0,0 +1,66 @@ +{ + "name": "function-bind", + "version": "1.1.1", + "description": "Implementation of Function.prototype.bind", + "keywords": [ + "function", + "bind", + "shim", + "es5" + ], + "author": "Raynos ", + "repository": "git://github.com/Raynos/function-bind.git", + "main": "index", + "homepage": "https://github.com/Raynos/function-bind", + "contributors": [ + { + "name": "Raynos" + }, + { + "name": "Jordan Harband", + "url": "https://github.com/ljharb" + } + ], + "bugs": { + "url": "https://github.com/Raynos/function-bind/issues", + "email": "raynos2@gmail.com" + }, + "dependencies": {}, + "devDependencies": { + "@ljharb/eslint-config": "^12.2.1", + "covert": "^1.1.0", + "eslint": "^4.5.0", + "jscs": "^3.0.7", + "tape": "^4.8.0" + }, + "license": "MIT", + "scripts": { + "pretest": "npm run lint", + "test": "npm run tests-only", + "posttest": "npm run coverage -- --quiet", + "tests-only": "node test", + "coverage": "covert test/*.js", + "lint": "npm run jscs && npm run eslint", + "jscs": "jscs *.js */*.js", + "eslint": "eslint *.js */*.js" + }, + "testling": { + "files": "test/index.js", + "browsers": [ + "ie/8..latest", + "firefox/16..latest", + "firefox/nightly", + "chrome/22..latest", + "chrome/canary", + "opera/12..latest", + "opera/next", + "safari/5.1..latest", + "ipad/6.0..latest", + "iphone/6.0..latest", + "android-browser/4.2..latest" + ] + } +,"_resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz" +,"_integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==" +,"_from": "function-bind@1.1.1" +} \ No newline at end of file diff --git a/syft/cataloger/ruby/parse_gemspec.go b/syft/cataloger/ruby/parse_gemspec.go index e8f33f56d..239d2a4eb 100644 --- a/syft/cataloger/ruby/parse_gemspec.go +++ b/syft/cataloger/ruby/parse_gemspec.go @@ -8,6 +8,8 @@ import ( "regexp" "strings" + "github.com/anchore/syft/internal" + "github.com/mitchellh/mapstructure" "github.com/anchore/syft/syft/cataloger/common" @@ -76,7 +78,7 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) { } for field, pattern := range patterns { - matchMap := matchCaptureGroups(pattern, sanitizedLine) + matchMap := internal.MatchCaptureGroups(pattern, sanitizedLine) if value := matchMap[field]; value != "" { if postProcessor := postProcessors[field]; postProcessor != nil { fields[field] = postProcessor(value) @@ -122,15 +124,3 @@ func renderUtf8(s string) string { }) return fullReplacement } - -// matchCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map. -func matchCaptureGroups(regEx *regexp.Regexp, str string) map[string]string { - match := regEx.FindStringSubmatch(str) - results := make(map[string]string) - for i, name := range regEx.SubexpNames() { - if i > 0 && i <= len(match) { - results[name] = match[i] - } - } - return results -} diff --git a/syft/pkg/npm_metadata.go b/syft/pkg/npm_metadata.go index 6df2152d0..316404765 100644 --- a/syft/pkg/npm_metadata.go +++ b/syft/pkg/npm_metadata.go @@ -2,11 +2,10 @@ package pkg // NpmPackageJSONMetadata holds extra information that is used in pkg.Package type NpmPackageJSONMetadata struct { - Name string `mapstructure:"name" json:"name"` - Version string `mapstructure:"version" json:"version"` - Files []string `mapstructure:"files" json:"files"` + Files []string `mapstructure:"files" json:"files,omitempty"` Author string `mapstructure:"author" json:"author"` - License string `mapstructure:"license" json:"license"` + Licenses []string `mapstructure:"licenses" json:"licenses"` Homepage string `mapstructure:"homepage" json:"homepage"` Description string `mapstructure:"description" json:"description"` + URL string `mapstructure:"url" json:"url"` }