Extract language and package type from pURLs on SBOM decode (#777)

* add language detection from pURLs

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add package type detection from pURLs

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add cargo and npm pURL support

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* fix npm tests and linting

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2022-01-27 09:35:16 -05:00 committed by GitHub
parent 9f7104d4f1
commit d7a23e4bb2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 287 additions and 21 deletions

View File

@ -1,5 +1,12 @@
package pkg package pkg
import (
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/linux"
)
var _ urlIdentifier = (*CargoPackageMetadata)(nil)
type CargoPackageMetadata struct { type CargoPackageMetadata struct {
Name string `toml:"name" json:"name"` Name string `toml:"name" json:"name"`
Version string `toml:"version" json:"version"` Version string `toml:"version" json:"version"`
@ -19,3 +26,15 @@ func (p CargoPackageMetadata) Pkg() *Package {
Metadata: p, Metadata: p,
} }
} }
// PackageURL returns the PURL for the specific rust package (see https://github.com/package-url/purl-spec)
func (p CargoPackageMetadata) PackageURL(_ *linux.Release) string {
return packageurl.NewPackageURL(
"cargo",
"",
p.Name,
p.Version,
nil,
"",
).ToString()
}

View File

@ -21,27 +21,27 @@ import (
// integrity check // integrity check
var _ common.ParserFn = parsePackageJSON var _ common.ParserFn = parsePackageJSON
// PackageJSON represents a JavaScript package.json file // packageJSON represents a JavaScript package.json file
type PackageJSON struct { type packageJSON struct {
Version string `json:"version"` Version string `json:"version"`
Latest []string `json:"latest"` Latest []string `json:"latest"`
Author Author `json:"author"` Author author `json:"author"`
License json.RawMessage `json:"license"` License json.RawMessage `json:"license"`
Licenses []license `json:"licenses"` Licenses []license `json:"licenses"`
Name string `json:"name"` Name string `json:"name"`
Homepage string `json:"homepage"` Homepage string `json:"homepage"`
Description string `json:"description"` Description string `json:"description"`
Dependencies map[string]string `json:"dependencies"` Dependencies map[string]string `json:"dependencies"`
Repository Repository `json:"repository"` Repository repository `json:"repository"`
} }
type Author struct { type author struct {
Name string `json:"name" mapstruct:"name"` Name string `json:"name" mapstruct:"name"`
Email string `json:"email" mapstruct:"email"` Email string `json:"email" mapstruct:"email"`
URL string `json:"url" mapstruct:"url"` URL string `json:"url" mapstruct:"url"`
} }
type Repository struct { type repository struct {
Type string `json:"type" mapstructure:"type"` Type string `json:"type" mapstructure:"type"`
URL string `json:"url" mapstructure:"url"` URL string `json:"url" mapstructure:"url"`
} }
@ -50,10 +50,10 @@ type Repository struct {
// ---> name: "Isaac Z. Schlueter" email: "i@izs.me" url: "http://blog.izs.me" // ---> name: "Isaac Z. Schlueter" email: "i@izs.me" url: "http://blog.izs.me"
var authorPattern = regexp.MustCompile(`^\s*(?P<name>[^<(]*)(\s+<(?P<email>.*)>)?(\s\((?P<url>.*)\))?\s*$`) var authorPattern = regexp.MustCompile(`^\s*(?P<name>[^<(]*)(\s+<(?P<email>.*)>)?(\s\((?P<url>.*)\))?\s*$`)
func (a *Author) UnmarshalJSON(b []byte) error { func (a *author) UnmarshalJSON(b []byte) error {
var authorStr string var authorStr string
var fields map[string]string var fields map[string]string
var author Author var auth author
if err := json.Unmarshal(b, &authorStr); err != nil { if err := json.Unmarshal(b, &authorStr); err != nil {
// string parsing did not work, assume a map was given // string parsing did not work, assume a map was given
@ -62,21 +62,21 @@ func (a *Author) UnmarshalJSON(b []byte) error {
return fmt.Errorf("unable to parse package.json author: %w", err) return fmt.Errorf("unable to parse package.json author: %w", err)
} }
} else { } else {
// parse out "name <email> (url)" into an Author struct // parse out "name <email> (url)" into an author struct
fields = internal.MatchNamedCaptureGroups(authorPattern, authorStr) fields = internal.MatchNamedCaptureGroups(authorPattern, authorStr)
} }
// translate the map into a structure // translate the map into a structure
if err := mapstructure.Decode(fields, &author); err != nil { if err := mapstructure.Decode(fields, &auth); err != nil {
return fmt.Errorf("unable to decode package.json author: %w", err) return fmt.Errorf("unable to decode package.json author: %w", err)
} }
*a = author *a = auth
return nil return nil
} }
func (a *Author) AuthorString() string { func (a *author) AuthorString() string {
result := a.Name result := a.Name
if a.Email != "" { if a.Email != "" {
result += fmt.Sprintf(" <%s>", a.Email) result += fmt.Sprintf(" <%s>", a.Email)
@ -87,10 +87,10 @@ func (a *Author) AuthorString() string {
return result return result
} }
func (r *Repository) UnmarshalJSON(b []byte) error { func (r *repository) UnmarshalJSON(b []byte) error {
var repositoryStr string var repositoryStr string
var fields map[string]string var fields map[string]string
var repository Repository var repo repository
if err := json.Unmarshal(b, &repositoryStr); err != nil { if err := json.Unmarshal(b, &repositoryStr); err != nil {
// string parsing did not work, assume a map was given // string parsing did not work, assume a map was given
@ -99,11 +99,11 @@ func (r *Repository) UnmarshalJSON(b []byte) error {
return fmt.Errorf("unable to parse package.json author: %w", err) return fmt.Errorf("unable to parse package.json author: %w", err)
} }
// translate the map into a structure // translate the map into a structure
if err := mapstructure.Decode(fields, &repository); err != nil { if err := mapstructure.Decode(fields, &repo); err != nil {
return fmt.Errorf("unable to decode package.json author: %w", err) return fmt.Errorf("unable to decode package.json author: %w", err)
} }
*r = repository *r = repo
} else { } else {
r.URL = repositoryStr r.URL = repositoryStr
} }
@ -134,7 +134,7 @@ func licenseFromJSON(b []byte) (string, error) {
return "", errors.New("unable to unmarshal license field as either string or object") return "", errors.New("unable to unmarshal license field as either string or object")
} }
func (p PackageJSON) licensesFromJSON() ([]string, error) { func (p packageJSON) licensesFromJSON() ([]string, error) {
if p.License == nil && p.Licenses == nil { if p.License == nil && p.Licenses == nil {
// This package.json doesn't specify any licenses whatsoever // This package.json doesn't specify any licenses whatsoever
return []string{}, nil return []string{}, nil
@ -167,7 +167,7 @@ func parsePackageJSON(path string, reader io.Reader) ([]*pkg.Package, []artifact
dec := json.NewDecoder(reader) dec := json.NewDecoder(reader)
for { for {
var p PackageJSON var p packageJSON
if err := dec.Decode(&p); err == io.EOF { if err := dec.Decode(&p); err == io.EOF {
break break
} else if err != nil { } else if err != nil {
@ -185,7 +185,7 @@ func parsePackageJSON(path string, reader io.Reader) ([]*pkg.Package, []artifact
return packages, nil, nil return packages, nil, nil
} }
func newPackageJSONPackage(p PackageJSON) *pkg.Package { func newPackageJSONPackage(p packageJSON) *pkg.Package {
licenses, err := p.licensesFromJSON() licenses, err := p.licensesFromJSON()
if err != nil { if err != nil {
log.Warnf("unable to extract licenses from javascript package.json: %+v", err) log.Warnf("unable to extract licenses from javascript package.json: %+v", err)
@ -199,6 +199,8 @@ func newPackageJSONPackage(p PackageJSON) *pkg.Package {
Type: pkg.NpmPkg, Type: pkg.NpmPkg,
MetadataType: pkg.NpmPackageJSONMetadataType, MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJSONMetadata{ Metadata: pkg.NpmPackageJSONMetadata{
Name: p.Name,
Version: p.Version,
Author: p.Author.AuthorString(), Author: p.Author.AuthorString(),
Homepage: p.Homepage, Homepage: p.Homepage,
URL: p.Repository.URL, URL: p.Repository.URL,
@ -207,7 +209,7 @@ func newPackageJSONPackage(p PackageJSON) *pkg.Package {
} }
} }
func (p PackageJSON) hasNameAndVersionValues() bool { func (p packageJSON) hasNameAndVersionValues() bool {
return p.Name != "" && p.Version != "" return p.Name != "" && p.Version != ""
} }

View File

@ -24,6 +24,8 @@ func TestParsePackageJSON(t *testing.T) {
Language: pkg.JavaScript, Language: pkg.JavaScript,
MetadataType: pkg.NpmPackageJSONMetadataType, MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJSONMetadata{ Metadata: pkg.NpmPackageJSONMetadata{
Name: "npm",
Version: "6.14.6",
Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)", Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
Homepage: "https://docs.npmjs.com/", Homepage: "https://docs.npmjs.com/",
URL: "https://github.com/npm/cli", URL: "https://github.com/npm/cli",
@ -41,6 +43,8 @@ func TestParsePackageJSON(t *testing.T) {
Language: pkg.JavaScript, Language: pkg.JavaScript,
MetadataType: pkg.NpmPackageJSONMetadataType, MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJSONMetadata{ Metadata: pkg.NpmPackageJSONMetadata{
Name: "npm",
Version: "6.14.6",
Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)", Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
Homepage: "https://docs.npmjs.com/", Homepage: "https://docs.npmjs.com/",
URL: "https://github.com/npm/cli", URL: "https://github.com/npm/cli",
@ -58,6 +62,8 @@ func TestParsePackageJSON(t *testing.T) {
Language: pkg.JavaScript, Language: pkg.JavaScript,
MetadataType: pkg.NpmPackageJSONMetadataType, MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJSONMetadata{ Metadata: pkg.NpmPackageJSONMetadata{
Name: "npm",
Version: "6.14.6",
Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)", Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
Homepage: "https://docs.npmjs.com/", Homepage: "https://docs.npmjs.com/",
URL: "https://github.com/npm/cli", URL: "https://github.com/npm/cli",
@ -75,6 +81,8 @@ func TestParsePackageJSON(t *testing.T) {
Language: pkg.JavaScript, Language: pkg.JavaScript,
MetadataType: pkg.NpmPackageJSONMetadataType, MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJSONMetadata{ Metadata: pkg.NpmPackageJSONMetadata{
Name: "npm",
Version: "6.14.6",
Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)", Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
Homepage: "https://docs.npmjs.com/", Homepage: "https://docs.npmjs.com/",
URL: "https://github.com/npm/cli", URL: "https://github.com/npm/cli",
@ -92,6 +100,8 @@ func TestParsePackageJSON(t *testing.T) {
Language: pkg.JavaScript, Language: pkg.JavaScript,
MetadataType: pkg.NpmPackageJSONMetadataType, MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJSONMetadata{ Metadata: pkg.NpmPackageJSONMetadata{
Name: "npm",
Version: "6.14.6",
Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)", Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
Homepage: "https://docs.npmjs.com/", Homepage: "https://docs.npmjs.com/",
URL: "https://github.com/npm/cli", URL: "https://github.com/npm/cli",
@ -109,6 +119,8 @@ func TestParsePackageJSON(t *testing.T) {
Language: pkg.JavaScript, Language: pkg.JavaScript,
MetadataType: pkg.NpmPackageJSONMetadataType, MetadataType: pkg.NpmPackageJSONMetadataType,
Metadata: pkg.NpmPackageJSONMetadata{ Metadata: pkg.NpmPackageJSONMetadata{
Name: "function-bind",
Version: "1.1.1",
Author: "Raynos <raynos2@gmail.com>", Author: "Raynos <raynos2@gmail.com>",
Homepage: "https://github.com/Raynos/function-bind", Homepage: "https://github.com/Raynos/function-bind",
URL: "git://github.com/Raynos/function-bind.git", URL: "git://github.com/Raynos/function-bind.git",

View File

@ -72,7 +72,7 @@ type JavaManifest struct {
NamedSections map[string]map[string]string `json:"namedSections,omitempty"` NamedSections map[string]map[string]string `json:"namedSections,omitempty"`
} }
// PackageURL returns the PURL for the specific Alpine package (see https://github.com/package-url/purl-spec) // PackageURL returns the PURL for the specific Maven package (see https://github.com/package-url/purl-spec)
func (m JavaMetadata) PackageURL(_ *linux.Release) string { func (m JavaMetadata) PackageURL(_ *linux.Release) string {
if m.PomProperties != nil { if m.PomProperties != nil {
pURL := packageurl.NewPackageURL( pURL := packageurl.NewPackageURL(

View File

@ -1,5 +1,7 @@
package pkg package pkg
import "github.com/anchore/packageurl-go"
// Language represents a single programming language. // Language represents a single programming language.
type Language string type Language string
@ -30,3 +32,29 @@ var AllLanguages = []Language{
func (l Language) String() string { func (l Language) String() string {
return string(l) return string(l)
} }
func LanguageFromPURL(p string) Language {
purl, err := packageurl.FromString(p)
if err != nil {
return UnknownLanguage
}
switch purl.Type {
case packageurl.TypeMaven, purlGradlePkgType:
return Java
case packageurl.TypeComposer:
return PHP
case packageurl.TypeGolang:
return Go
case packageurl.TypeNPM:
return JavaScript
case packageurl.TypePyPi:
return Python
case packageurl.TypeGem:
return Ruby
case purlCargoPkgType:
return Rust
default:
return UnknownLanguage
}
}

66
syft/pkg/language_test.go Normal file
View File

@ -0,0 +1,66 @@
package pkg
import (
"github.com/scylladb/go-set/strset"
"github.com/stretchr/testify/assert"
"testing"
)
func TestLanguageFromPURL(t *testing.T) {
tests := []struct {
purl string
want Language
}{
{
purl: "pkg:npm/util@2.32",
want: JavaScript,
},
{
purl: "pkg:pypi/util-linux@2.32.1-27.el8",
want: Python,
},
{
purl: "pkg:gem/ruby-advisory-db-check@0.12.4",
want: Ruby,
},
{
purl: "pkg:golang/github.com/gorilla/context@234fd47e07d1004f0aed9c",
want: Go,
},
{
purl: "pkg:cargo/clap@2.33.0",
want: Rust,
},
{
purl: "pkg:composer/laravel/laravel@5.5.0",
want: PHP,
},
{
purl: "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?type=zip&classifier=dist",
want: Java,
},
}
var languages []string
var expectedLanguages = strset.New()
for _, ty := range AllLanguages {
expectedLanguages.Add(string(ty))
}
for _, tt := range tests {
t.Run(tt.purl, func(t *testing.T) {
actual := LanguageFromPURL(tt.purl)
if actual != "" {
languages = append(languages, string(actual))
}
assert.Equalf(t, tt.want, actual, "LanguageFromPURL(%v)", tt.purl)
})
}
assert.ElementsMatch(t, expectedLanguages.List(), languages, "missing one or more languages to test against (maybe a package type was added?)")
}

View File

@ -1,7 +1,16 @@
package pkg package pkg
import (
"github.com/anchore/packageurl-go"
"github.com/anchore/syft/syft/linux"
)
var _ urlIdentifier = (*NpmPackageJSONMetadata)(nil)
// NpmPackageJSONMetadata holds extra information that is used in pkg.Package // NpmPackageJSONMetadata holds extra information that is used in pkg.Package
type NpmPackageJSONMetadata struct { type NpmPackageJSONMetadata struct {
Name string `mapstructure:"name" json:"name"`
Version string `mapstructure:"version" json:"version"`
Files []string `mapstructure:"files" json:"files,omitempty"` Files []string `mapstructure:"files" json:"files,omitempty"`
Author string `mapstructure:"author" json:"author"` Author string `mapstructure:"author" json:"author"`
Licenses []string `mapstructure:"licenses" json:"licenses"` Licenses []string `mapstructure:"licenses" json:"licenses"`
@ -9,3 +18,15 @@ type NpmPackageJSONMetadata struct {
Description string `mapstructure:"description" json:"description"` Description string `mapstructure:"description" json:"description"`
URL string `mapstructure:"url" json:"url"` URL string `mapstructure:"url" json:"url"`
} }
// PackageURL returns the PURL for the specific NPM package (see https://github.com/package-url/purl-spec)
func (p NpmPackageJSONMetadata) PackageURL(_ *linux.Release) string {
return packageurl.NewPackageURL(
packageurl.TypeNPM,
"",
p.Name,
p.Version,
nil,
"",
).ToString()
}

View File

@ -66,3 +66,35 @@ func (t Type) PackageURLType() string {
return "" return ""
} }
} }
func TypeFromPURL(p string) Type {
purl, err := packageurl.FromString(p)
if err != nil {
return UnknownPkg
}
switch purl.Type {
case packageurl.TypeDebian, "deb":
return DebPkg
case packageurl.TypeRPM:
return RpmPkg
case "alpine":
return ApkPkg
case packageurl.TypeMaven:
return JavaPkg
case packageurl.TypeComposer:
return PhpComposerPkg
case packageurl.TypeGolang:
return GoModulePkg
case packageurl.TypeNPM:
return NpmPkg
case packageurl.TypePyPi:
return PythonPkg
case packageurl.TypeGem:
return GemPkg
case "cargo", "crate":
return RustPkg
default:
return UnknownPkg
}
}

83
syft/pkg/type_test.go Normal file
View File

@ -0,0 +1,83 @@
package pkg
import (
"github.com/scylladb/go-set/strset"
"testing"
"github.com/stretchr/testify/assert"
)
func TestTypeFromPURL(t *testing.T) {
tests := []struct {
name string
purl string
expected Type
}{
{
purl: "pkg:rpm/fedora/util-linux@2.32.1-27.el8-?arch=amd64",
expected: RpmPkg,
},
{
purl: "pkg:alpine/util-linux@2.32.1",
expected: ApkPkg,
},
{
purl: "pkg:deb/debian/curl@7.50.3-1?arch=i386&distro=jessie",
expected: DebPkg,
},
{
purl: "pkg:npm/util@2.32",
expected: NpmPkg,
},
{
purl: "pkg:pypi/util-linux@2.32.1-27.el8",
expected: PythonPkg,
},
{
purl: "pkg:gem/ruby-advisory-db-check@0.12.4",
expected: GemPkg,
},
{
purl: "pkg:golang/github.com/gorilla/context@234fd47e07d1004f0aed9c",
expected: GoModulePkg,
},
{
purl: "pkg:cargo/clap@2.33.0",
expected: RustPkg,
},
{
purl: "pkg:composer/laravel/laravel@5.5.0",
expected: PhpComposerPkg,
},
{
purl: "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?type=zip&classifier=dist",
expected: JavaPkg,
},
}
var pkgTypes []string
var expectedTypes = strset.New()
for _, ty := range AllPkgs {
expectedTypes.Add(string(ty))
}
// testing microsoft packages and jenkins-plugins is not valid for purl at this time
expectedTypes.Remove(string(KbPkg))
expectedTypes.Remove(string(JenkinsPluginPkg))
for _, test := range tests {
t.Run(string(test.expected), func(t *testing.T) {
actual := TypeFromPURL(test.purl)
if actual != "" {
pkgTypes = append(pkgTypes, string(actual))
}
assert.Equal(t, test.expected, actual)
})
}
assert.ElementsMatch(t, expectedTypes.List(), pkgTypes, "missing one or more package types to test against (maybe a package type was added?)")
}

View File

@ -18,6 +18,9 @@ const (
// this qualifier is not in the pURL spec, but is used by grype to perform indirect matching based on source information // this qualifier is not in the pURL spec, but is used by grype to perform indirect matching based on source information
purlUpstreamQualifier = "upstream" purlUpstreamQualifier = "upstream"
purlCargoPkgType = "cargo"
purlGradlePkgType = "gradle"
) )
type urlIdentifier interface { type urlIdentifier interface {