add dynamic package.json parsing of author field

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2020-10-20 15:23:04 -04:00
parent bb14f3b45b
commit 931c796158
No known key found for this signature in database
GPG Key ID: 5CB45AE22BAB7EA7
6 changed files with 151 additions and 41 deletions

15
internal/parse.go Normal file
View File

@ -0,0 +1,15 @@
package internal
import "regexp"
// MatchCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map.
func MatchCaptureGroups(regEx *regexp.Regexp, str string) map[string]string {
match := regEx.FindStringSubmatch(str)
results := make(map[string]string)
for i, name := range regEx.SubexpNames() {
if i > 0 && i <= len(match) {
results[name] = match[i]
}
}
return results
}

View File

@ -4,6 +4,11 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
"regexp"
"github.com/anchore/syft/internal"
"github.com/mitchellh/mapstructure"
"github.com/anchore/syft/syft/cataloger/common" "github.com/anchore/syft/syft/cataloger/common"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
@ -16,7 +21,7 @@ var _ common.ParserFn = parsePackageLock
type PackageJSON struct { type PackageJSON struct {
Version string `json:"version"` Version string `json:"version"`
Latest []string `json:"latest"` Latest []string `json:"latest"`
Author string `json:"author"` Author Author `json:"author"`
License string `json:"license"` License string `json:"license"`
Name string `json:"name"` Name string `json:"name"`
Homepage string `json:"homepage"` Homepage string `json:"homepage"`
@ -24,6 +29,57 @@ type PackageJSON struct {
Dependencies map[string]string `json:"dependencies"` Dependencies map[string]string `json:"dependencies"`
} }
type Author struct {
Name string `json:"name" mapstruct:"name"`
Email string `json:"email" mapstruct:"email"`
URL string `json:"url" mapstruct:"url"`
}
var authorPattern = regexp.MustCompile(`^\s*(?P<name>[^<(]*)(\s+<(?P<email>.*)>)?(\s\((?P<url>.*)\))?\s*$`)
func (a *Author) UnmarshalJSON(b []byte) error {
var authorStr string
if err := json.Unmarshal(b, &authorStr); err != nil {
// string parsing did not work, assume a map was given
// for more information: https://docs.npmjs.com/files/package.json#people-fields-author-contributors
var fields map[string]string
var author Author
if err := json.Unmarshal(b, &fields); err != nil {
return fmt.Errorf("unable to parse package.json author: %w", err)
}
// translate the map into a structure
if err := mapstructure.Decode(fields, &author); err != nil {
return fmt.Errorf("unable to decode package.json author: %w", err)
}
*a = author
} else {
// parse out "name <email> (url)" into an Author struct
var fields = internal.MatchCaptureGroups(authorPattern, authorStr)
*a = Author{
Name: fields["name"],
Email: fields["email"],
URL: fields["url"],
}
}
if a.Name == "" {
return fmt.Errorf("package.json author name is empty")
}
return nil
}
func (a *Author) String() string {
result := a.Name
if a.Email != "" {
result += fmt.Sprintf(" <%s>", a.Email)
}
if a.URL != "" {
result += fmt.Sprintf(" (%s)", a.URL)
}
return result
}
// parsePackageJson parses a package.json and returns the discovered JavaScript packages. // parsePackageJson parses a package.json and returns the discovered JavaScript packages.
func parsePackageJSON(_ string, reader io.Reader) ([]pkg.Package, error) { func parsePackageJSON(_ string, reader io.Reader) ([]pkg.Package, error) {
packages := make([]pkg.Package, 0) packages := make([]pkg.Package, 0)
@ -44,7 +100,7 @@ func parsePackageJSON(_ string, reader io.Reader) ([]pkg.Package, error) {
Language: pkg.JavaScript, Language: pkg.JavaScript,
Type: pkg.NpmPkg, Type: pkg.NpmPkg,
Metadata: pkg.NpmMetadata{ Metadata: pkg.NpmMetadata{
Author: p.Author, Author: p.Author.String(),
Homepage: p.Homepage, Homepage: p.Homepage,
}, },
}) })

View File

@ -9,7 +9,13 @@ import (
) )
func TestParsePackageJSON(t *testing.T) { func TestParsePackageJSON(t *testing.T) {
expected := pkg.Package{ tests := []struct {
Fixture string
ExpectedPkg pkg.Package
}{
{
Fixture: "test-fixtures/pkg-json/package.json",
ExpectedPkg: pkg.Package{
Name: "npm", Name: "npm",
Version: "6.14.6", Version: "6.14.6",
Type: pkg.NpmPkg, Type: pkg.NpmPkg,
@ -19,8 +25,27 @@ func TestParsePackageJSON(t *testing.T) {
Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)", Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
Homepage: "https://docs.npmjs.com/", Homepage: "https://docs.npmjs.com/",
}, },
},
},
{
Fixture: "test-fixtures/pkg-json/package-nested-author.json",
ExpectedPkg: pkg.Package{
Name: "npm",
Version: "6.14.6",
Type: pkg.NpmPkg,
Licenses: []string{"Artistic-2.0"},
Language: pkg.JavaScript,
Metadata: pkg.NpmMetadata{
Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
Homepage: "https://docs.npmjs.com/",
},
},
},
} }
fixture, err := os.Open("test-fixtures/pkg-json/package.json")
for _, test := range tests {
t.Run(test.Fixture, func(t *testing.T) {
fixture, err := os.Open(test.Fixture)
if err != nil { if err != nil {
t.Fatalf("failed to open fixture: %+v", err) t.Fatalf("failed to open fixture: %+v", err)
} }
@ -36,8 +61,9 @@ func TestParsePackageJSON(t *testing.T) {
t.Fatalf("unexpected package count: %d!=1", len(actual)) t.Fatalf("unexpected package count: %d!=1", len(actual))
} }
for _, d := range deep.Equal(actual[0], expected) { for _, d := range deep.Equal(actual[0], test.ExpectedPkg) {
t.Errorf("diff: %+v", d) t.Errorf("diff: %+v", d)
} }
})
}
} }

View File

@ -0,0 +1,12 @@
{
"version": "6.14.6",
"name": "npm",
"description": "a package manager for JavaScript",
"homepage": "https://docs.npmjs.com/",
"author": {
"name": "Isaac Z. Schlueter",
"email": "i@izs.me",
"url": "http://blog.izs.me"
},
"license": "Artistic-2.0"
}

View File

@ -8,6 +8,8 @@ import (
"regexp" "regexp"
"strings" "strings"
"github.com/anchore/syft/internal"
"github.com/mitchellh/mapstructure" "github.com/mitchellh/mapstructure"
"github.com/anchore/syft/syft/cataloger/common" "github.com/anchore/syft/syft/cataloger/common"
@ -76,7 +78,7 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) {
} }
for field, pattern := range patterns { for field, pattern := range patterns {
matchMap := matchCaptureGroups(pattern, sanitizedLine) matchMap := internal.MatchCaptureGroups(pattern, sanitizedLine)
if value := matchMap[field]; value != "" { if value := matchMap[field]; value != "" {
if postProcessor := postProcessors[field]; postProcessor != nil { if postProcessor := postProcessors[field]; postProcessor != nil {
fields[field] = postProcessor(value) fields[field] = postProcessor(value)
@ -121,15 +123,3 @@ func renderUtf8(s string) string {
}) })
return fullReplacement return fullReplacement
} }
// matchCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map.
func matchCaptureGroups(regEx *regexp.Regexp, str string) map[string]string {
match := regEx.FindStringSubmatch(str)
results := make(map[string]string)
for i, name := range regEx.SubexpNames() {
if i > 0 && i <= len(match) {
results[name] = match[i]
}
}
return results
}

11
syft/pkg/metadata.go Normal file
View File

@ -0,0 +1,11 @@
package pkg
type MetadataType string
const (
UnknownMetadataType MetadataType = "UnknownMetadata"
ApkMetadataType MetadataType = "apk-metadata"
DpkgMetadataType MetadataType = "dpkg-metadata"
GemgMetadataType MetadataType = "gem-metadata"
RpmdbMetadataType MetadataType = "rpmdb-metadata"
)