From 931c7961586bae00785168dbe13310cda5793ce7 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 20 Oct 2020 15:23:04 -0400 Subject: [PATCH] add dynamic package.json parsing of author field Signed-off-by: Alex Goodman --- internal/parse.go | 15 ++++ .../javascript/parse_package_json.go | 60 +++++++++++++- .../javascript/parse_package_json_test.go | 78 ++++++++++++------- .../pkg-json/package-nested-author.json | 12 +++ syft/cataloger/ruby/parse_gemspec.go | 16 +--- syft/pkg/metadata.go | 11 +++ 6 files changed, 151 insertions(+), 41 deletions(-) create mode 100644 internal/parse.go create mode 100644 syft/cataloger/javascript/test-fixtures/pkg-json/package-nested-author.json create mode 100644 syft/pkg/metadata.go diff --git a/internal/parse.go b/internal/parse.go new file mode 100644 index 000000000..300825c98 --- /dev/null +++ b/internal/parse.go @@ -0,0 +1,15 @@ +package internal + +import "regexp" + +// MatchCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map. +func MatchCaptureGroups(regEx *regexp.Regexp, str string) map[string]string { + match := regEx.FindStringSubmatch(str) + results := make(map[string]string) + for i, name := range regEx.SubexpNames() { + if i > 0 && i <= len(match) { + results[name] = match[i] + } + } + return results +} diff --git a/syft/cataloger/javascript/parse_package_json.go b/syft/cataloger/javascript/parse_package_json.go index 83642e451..0b3a39a11 100644 --- a/syft/cataloger/javascript/parse_package_json.go +++ b/syft/cataloger/javascript/parse_package_json.go @@ -4,6 +4,11 @@ import ( "encoding/json" "fmt" "io" + "regexp" + + "github.com/anchore/syft/internal" + + "github.com/mitchellh/mapstructure" "github.com/anchore/syft/syft/cataloger/common" "github.com/anchore/syft/syft/pkg" @@ -16,7 +21,7 @@ var _ common.ParserFn = parsePackageLock type PackageJSON struct { Version string `json:"version"` Latest []string `json:"latest"` - Author string `json:"author"` + Author Author `json:"author"` License string `json:"license"` Name string `json:"name"` Homepage string `json:"homepage"` @@ -24,6 +29,57 @@ type PackageJSON struct { Dependencies map[string]string `json:"dependencies"` } +type Author struct { + Name string `json:"name" mapstruct:"name"` + Email string `json:"email" mapstruct:"email"` + URL string `json:"url" mapstruct:"url"` +} + +var authorPattern = regexp.MustCompile(`^\s*(?P[^<(]*)(\s+<(?P.*)>)?(\s\((?P.*)\))?\s*$`) + +func (a *Author) UnmarshalJSON(b []byte) error { + var authorStr string + if err := json.Unmarshal(b, &authorStr); err != nil { + // string parsing did not work, assume a map was given + // for more information: https://docs.npmjs.com/files/package.json#people-fields-author-contributors + var fields map[string]string + var author Author + if err := json.Unmarshal(b, &fields); err != nil { + return fmt.Errorf("unable to parse package.json author: %w", err) + } + // translate the map into a structure + if err := mapstructure.Decode(fields, &author); err != nil { + return fmt.Errorf("unable to decode package.json author: %w", err) + } + *a = author + } else { + // parse out "name (url)" into an Author struct + var fields = internal.MatchCaptureGroups(authorPattern, authorStr) + *a = Author{ + Name: fields["name"], + Email: fields["email"], + URL: fields["url"], + } + } + + if a.Name == "" { + return fmt.Errorf("package.json author name is empty") + } + + return nil +} + +func (a *Author) String() string { + result := a.Name + if a.Email != "" { + result += fmt.Sprintf(" <%s>", a.Email) + } + if a.URL != "" { + result += fmt.Sprintf(" (%s)", a.URL) + } + return result +} + // parsePackageJson parses a package.json and returns the discovered JavaScript packages. func parsePackageJSON(_ string, reader io.Reader) ([]pkg.Package, error) { packages := make([]pkg.Package, 0) @@ -44,7 +100,7 @@ func parsePackageJSON(_ string, reader io.Reader) ([]pkg.Package, error) { Language: pkg.JavaScript, Type: pkg.NpmPkg, Metadata: pkg.NpmMetadata{ - Author: p.Author, + Author: p.Author.String(), Homepage: p.Homepage, }, }) diff --git a/syft/cataloger/javascript/parse_package_json_test.go b/syft/cataloger/javascript/parse_package_json_test.go index f8e7b92fb..b843f199d 100644 --- a/syft/cataloger/javascript/parse_package_json_test.go +++ b/syft/cataloger/javascript/parse_package_json_test.go @@ -9,35 +9,61 @@ import ( ) func TestParsePackageJSON(t *testing.T) { - expected := pkg.Package{ - Name: "npm", - Version: "6.14.6", - Type: pkg.NpmPkg, - Licenses: []string{"Artistic-2.0"}, - Language: pkg.JavaScript, - Metadata: pkg.NpmMetadata{ - Author: "Isaac Z. Schlueter (http://blog.izs.me)", - Homepage: "https://docs.npmjs.com/", + tests := []struct { + Fixture string + ExpectedPkg pkg.Package + }{ + { + Fixture: "test-fixtures/pkg-json/package.json", + ExpectedPkg: pkg.Package{ + Name: "npm", + Version: "6.14.6", + Type: pkg.NpmPkg, + Licenses: []string{"Artistic-2.0"}, + Language: pkg.JavaScript, + Metadata: pkg.NpmMetadata{ + Author: "Isaac Z. Schlueter (http://blog.izs.me)", + Homepage: "https://docs.npmjs.com/", + }, + }, + }, + { + Fixture: "test-fixtures/pkg-json/package-nested-author.json", + ExpectedPkg: pkg.Package{ + Name: "npm", + Version: "6.14.6", + Type: pkg.NpmPkg, + Licenses: []string{"Artistic-2.0"}, + Language: pkg.JavaScript, + Metadata: pkg.NpmMetadata{ + Author: "Isaac Z. Schlueter (http://blog.izs.me)", + Homepage: "https://docs.npmjs.com/", + }, + }, }, } - fixture, err := os.Open("test-fixtures/pkg-json/package.json") - if err != nil { - t.Fatalf("failed to open fixture: %+v", err) - } - actual, err := parsePackageJSON(fixture.Name(), fixture) - if err != nil { - t.Fatalf("failed to parse package-lock.json: %+v", err) - } - if len(actual) != 1 { - for _, a := range actual { - t.Log(" ", a) - } - t.Fatalf("unexpected package count: %d!=1", len(actual)) - } + for _, test := range tests { + t.Run(test.Fixture, func(t *testing.T) { + fixture, err := os.Open(test.Fixture) + if err != nil { + t.Fatalf("failed to open fixture: %+v", err) + } - for _, d := range deep.Equal(actual[0], expected) { - t.Errorf("diff: %+v", d) - } + actual, err := parsePackageJSON(fixture.Name(), fixture) + if err != nil { + t.Fatalf("failed to parse package-lock.json: %+v", err) + } + if len(actual) != 1 { + for _, a := range actual { + t.Log(" ", a) + } + t.Fatalf("unexpected package count: %d!=1", len(actual)) + } + for _, d := range deep.Equal(actual[0], test.ExpectedPkg) { + t.Errorf("diff: %+v", d) + } + }) + } } diff --git a/syft/cataloger/javascript/test-fixtures/pkg-json/package-nested-author.json b/syft/cataloger/javascript/test-fixtures/pkg-json/package-nested-author.json new file mode 100644 index 000000000..436fca091 --- /dev/null +++ b/syft/cataloger/javascript/test-fixtures/pkg-json/package-nested-author.json @@ -0,0 +1,12 @@ +{ + "version": "6.14.6", + "name": "npm", + "description": "a package manager for JavaScript", + "homepage": "https://docs.npmjs.com/", + "author": { + "name": "Isaac Z. Schlueter", + "email": "i@izs.me", + "url": "http://blog.izs.me" + }, + "license": "Artistic-2.0" +} \ No newline at end of file diff --git a/syft/cataloger/ruby/parse_gemspec.go b/syft/cataloger/ruby/parse_gemspec.go index 7fe310265..3957c5b5c 100644 --- a/syft/cataloger/ruby/parse_gemspec.go +++ b/syft/cataloger/ruby/parse_gemspec.go @@ -8,6 +8,8 @@ import ( "regexp" "strings" + "github.com/anchore/syft/internal" + "github.com/mitchellh/mapstructure" "github.com/anchore/syft/syft/cataloger/common" @@ -76,7 +78,7 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) { } for field, pattern := range patterns { - matchMap := matchCaptureGroups(pattern, sanitizedLine) + matchMap := internal.MatchCaptureGroups(pattern, sanitizedLine) if value := matchMap[field]; value != "" { if postProcessor := postProcessors[field]; postProcessor != nil { fields[field] = postProcessor(value) @@ -121,15 +123,3 @@ func renderUtf8(s string) string { }) return fullReplacement } - -// matchCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map. -func matchCaptureGroups(regEx *regexp.Regexp, str string) map[string]string { - match := regEx.FindStringSubmatch(str) - results := make(map[string]string) - for i, name := range regEx.SubexpNames() { - if i > 0 && i <= len(match) { - results[name] = match[i] - } - } - return results -} diff --git a/syft/pkg/metadata.go b/syft/pkg/metadata.go new file mode 100644 index 000000000..2d5eb2def --- /dev/null +++ b/syft/pkg/metadata.go @@ -0,0 +1,11 @@ +package pkg + +type MetadataType string + +const ( + UnknownMetadataType MetadataType = "UnknownMetadata" + ApkMetadataType MetadataType = "apk-metadata" + DpkgMetadataType MetadataType = "dpkg-metadata" + GemgMetadataType MetadataType = "gem-metadata" + RpmdbMetadataType MetadataType = "rpmdb-metadata" +)