Merge pull request #233 from anchore/javascript_parser_fix_author

Javascript parser fix author
This commit is contained in:
Toure Dunnon 2020-10-26 09:30:11 -04:00 committed by GitHub
commit 076454d7a8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 303 additions and 90 deletions

15
internal/parse.go Normal file
View File

@ -0,0 +1,15 @@
package internal
import "regexp"
// MatchCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map.
func MatchCaptureGroups(regEx *regexp.Regexp, str string) map[string]string {
match := regEx.FindStringSubmatch(str)
results := make(map[string]string)
for i, name := range regEx.SubexpNames() {
if i > 0 && i <= len(match) {
results[name] = match[i]
}
}
return results
}

View File

@ -53,11 +53,6 @@
"type": "integer"
},
"files": {
"anyOf": [
{
"type": "null"
},
{
"items": {
"anyOf": [
{
@ -107,8 +102,6 @@
]
},
"type": "array"
}
]
},
"gitCommitOfApkPort": {
"type": "string"

View File

@ -4,6 +4,11 @@ import (
"encoding/json"
"fmt"
"io"
"regexp"
"github.com/anchore/syft/internal"
"github.com/mitchellh/mapstructure"
"github.com/anchore/syft/syft/cataloger/common"
"github.com/anchore/syft/syft/pkg"
@ -16,12 +21,90 @@ var _ common.ParserFn = parsePackageLock
type PackageJSON struct {
Version string `json:"version"`
Latest []string `json:"latest"`
Author string `json:"author"`
Author Author `json:"author"`
License string `json:"license"`
Name string `json:"name"`
Homepage string `json:"homepage"`
Description string `json:"description"`
Dependencies map[string]string `json:"dependencies"`
Repository Repository `json:"repository"`
}
type Author struct {
Name string `json:"name" mapstruct:"name"`
Email string `json:"email" mapstruct:"email"`
URL string `json:"url" mapstruct:"url"`
}
type Repository struct {
Type string `json:"type" mapstructure:"type"`
URL string `json:"url" mapstructure:"url"`
}
// match example: "author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)"
// ---> name: "Isaac Z. Schlueter" email: "i@izs.me" url: "http://blog.izs.me"
var authorPattern = regexp.MustCompile(`^\s*(?P<name>[^<(]*)(\s+<(?P<email>.*)>)?(\s\((?P<url>.*)\))?\s*$`)
// Exports Author.UnmarshalJSON interface to help normalize the json structure.
func (a *Author) UnmarshalJSON(b []byte) error {
var authorStr string
var fields map[string]string
var author Author
if err := json.Unmarshal(b, &authorStr); err != nil {
// string parsing did not work, assume a map was given
// for more information: https://docs.npmjs.com/files/package.json#people-fields-author-contributors
if err := json.Unmarshal(b, &fields); err != nil {
return fmt.Errorf("unable to parse package.json author: %w", err)
}
} else {
// parse out "name <email> (url)" into an Author struct
fields = internal.MatchCaptureGroups(authorPattern, authorStr)
}
// translate the map into a structure
if err := mapstructure.Decode(fields, &author); err != nil {
return fmt.Errorf("unable to decode package.json author: %w", err)
}
*a = author
return nil
}
func (a *Author) AuthorString() string {
result := a.Name
if a.Email != "" {
result += fmt.Sprintf(" <%s>", a.Email)
}
if a.URL != "" {
result += fmt.Sprintf(" (%s)", a.URL)
}
return result
}
func (r *Repository) UnmarshalJSON(b []byte) error {
var repositoryStr string
var fields map[string]string
var repository Repository
if err := json.Unmarshal(b, &repositoryStr); err != nil {
// string parsing did not work, assume a map was given
// for more information: https://docs.npmjs.com/files/package.json#people-fields-author-contributors
if err := json.Unmarshal(b, &fields); err != nil {
return fmt.Errorf("unable to parse package.json author: %w", err)
}
// translate the map into a structure
if err := mapstructure.Decode(fields, &repository); err != nil {
return fmt.Errorf("unable to decode package.json author: %w", err)
}
*r = repository
} else {
r.URL = repositoryStr
}
return nil
}
// parsePackageJson parses a package.json and returns the discovered JavaScript packages.
@ -45,8 +128,10 @@ func parsePackageJSON(_ string, reader io.Reader) ([]pkg.Package, error) {
Type: pkg.NpmPkg,
MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJSONMetadata{
Author: p.Author,
Author: p.Author.AuthorString(),
Homepage: p.Homepage,
URL: p.Repository.URL,
Licenses: []string{p.License},
},
})
}

View File

@ -9,7 +9,13 @@ import (
)
func TestParsePackageJSON(t *testing.T) {
expected := pkg.Package{
tests := []struct {
Fixture string
ExpectedPkg pkg.Package
}{
{
Fixture: "test-fixtures/pkg-json/package.json",
ExpectedPkg: pkg.Package{
Name: "npm",
Version: "6.14.6",
Type: pkg.NpmPkg,
@ -19,9 +25,50 @@ func TestParsePackageJSON(t *testing.T) {
Metadata: pkg.NpmPackageJSONMetadata{
Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
Homepage: "https://docs.npmjs.com/",
URL: "https://github.com/npm/cli",
Licenses: []string{"Artistic-2.0"},
},
},
},
{
Fixture: "test-fixtures/pkg-json/package-nested-author.json",
ExpectedPkg: pkg.Package{
Name: "npm",
Version: "6.14.6",
Type: pkg.NpmPkg,
Licenses: []string{"Artistic-2.0"},
Language: pkg.JavaScript,
MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJSONMetadata{
Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
Homepage: "https://docs.npmjs.com/",
URL: "https://github.com/npm/cli",
Licenses: []string{"Artistic-2.0"},
},
},
},
{
Fixture: "test-fixtures/pkg-json/package-repo-string.json",
ExpectedPkg: pkg.Package{
Name: "function-bind",
Version: "1.1.1",
Type: pkg.NpmPkg,
Licenses: []string{"MIT"},
Language: pkg.JavaScript,
MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJSONMetadata{
Author: "Raynos <raynos2@gmail.com>",
Homepage: "https://github.com/Raynos/function-bind",
URL: "git://github.com/Raynos/function-bind.git",
Licenses: []string{"MIT"},
},
},
},
}
fixture, err := os.Open("test-fixtures/pkg-json/package.json")
for _, test := range tests {
t.Run(test.Fixture, func(t *testing.T) {
fixture, err := os.Open(test.Fixture)
if err != nil {
t.Fatalf("failed to open fixture: %+v", err)
}
@ -37,8 +84,10 @@ func TestParsePackageJSON(t *testing.T) {
t.Fatalf("unexpected package count: %d!=1", len(actual))
}
for _, d := range deep.Equal(actual[0], expected) {
for _, d := range deep.Equal(actual[0], test.ExpectedPkg) {
t.Errorf("diff: %+v", d)
}
})
}
}

View File

@ -0,0 +1,16 @@
{
"version": "6.14.6",
"name": "npm",
"description": "a package manager for JavaScript",
"homepage": "https://docs.npmjs.com/",
"author": {
"name": "Isaac Z. Schlueter",
"email": "i@izs.me",
"url": "http://blog.izs.me"
},
"repository": {
"type": "git",
"url": "https://github.com/npm/cli"
},
"license": "Artistic-2.0"
}

View File

@ -0,0 +1,66 @@
{
"name": "function-bind",
"version": "1.1.1",
"description": "Implementation of Function.prototype.bind",
"keywords": [
"function",
"bind",
"shim",
"es5"
],
"author": "Raynos <raynos2@gmail.com>",
"repository": "git://github.com/Raynos/function-bind.git",
"main": "index",
"homepage": "https://github.com/Raynos/function-bind",
"contributors": [
{
"name": "Raynos"
},
{
"name": "Jordan Harband",
"url": "https://github.com/ljharb"
}
],
"bugs": {
"url": "https://github.com/Raynos/function-bind/issues",
"email": "raynos2@gmail.com"
},
"dependencies": {},
"devDependencies": {
"@ljharb/eslint-config": "^12.2.1",
"covert": "^1.1.0",
"eslint": "^4.5.0",
"jscs": "^3.0.7",
"tape": "^4.8.0"
},
"license": "MIT",
"scripts": {
"pretest": "npm run lint",
"test": "npm run tests-only",
"posttest": "npm run coverage -- --quiet",
"tests-only": "node test",
"coverage": "covert test/*.js",
"lint": "npm run jscs && npm run eslint",
"jscs": "jscs *.js */*.js",
"eslint": "eslint *.js */*.js"
},
"testling": {
"files": "test/index.js",
"browsers": [
"ie/8..latest",
"firefox/16..latest",
"firefox/nightly",
"chrome/22..latest",
"chrome/canary",
"opera/12..latest",
"opera/next",
"safari/5.1..latest",
"ipad/6.0..latest",
"iphone/6.0..latest",
"android-browser/4.2..latest"
]
}
,"_resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz"
,"_integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A=="
,"_from": "function-bind@1.1.1"
}

View File

@ -8,6 +8,8 @@ import (
"regexp"
"strings"
"github.com/anchore/syft/internal"
"github.com/mitchellh/mapstructure"
"github.com/anchore/syft/syft/cataloger/common"
@ -76,7 +78,7 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) {
}
for field, pattern := range patterns {
matchMap := matchCaptureGroups(pattern, sanitizedLine)
matchMap := internal.MatchCaptureGroups(pattern, sanitizedLine)
if value := matchMap[field]; value != "" {
if postProcessor := postProcessors[field]; postProcessor != nil {
fields[field] = postProcessor(value)
@ -122,15 +124,3 @@ func renderUtf8(s string) string {
})
return fullReplacement
}
// matchCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map.
func matchCaptureGroups(regEx *regexp.Regexp, str string) map[string]string {
match := regEx.FindStringSubmatch(str)
results := make(map[string]string)
for i, name := range regEx.SubexpNames() {
if i > 0 && i <= len(match) {
results[name] = match[i]
}
}
return results
}

View File

@ -2,11 +2,10 @@ package pkg
// NpmPackageJSONMetadata holds extra information that is used in pkg.Package
type NpmPackageJSONMetadata struct {
Name string `mapstructure:"name" json:"name"`
Version string `mapstructure:"version" json:"version"`
Files []string `mapstructure:"files" json:"files"`
Files []string `mapstructure:"files" json:"files,omitempty"`
Author string `mapstructure:"author" json:"author"`
License string `mapstructure:"license" json:"license"`
Licenses []string `mapstructure:"licenses" json:"licenses"`
Homepage string `mapstructure:"homepage" json:"homepage"`
Description string `mapstructure:"description" json:"description"`
URL string `mapstructure:"url" json:"url"`
}