Fill in SPDX originator for all supported package types (#2822)

* add failing test + beef up doc comments

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* cover more metadata types in spdx originator processing

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

---------

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Alex Goodman 2024-04-29 16:33:00 -04:00 committed by GitHub
parent 9901ea8fe9
commit 5b03788300
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 698 additions and 169 deletions

View File

@ -517,9 +517,7 @@ func toPackageOriginator(p pkg.Package) *spdx.Originator {
} }
func toPackageSupplier(p pkg.Package) *spdx.Supplier { func toPackageSupplier(p pkg.Package) *spdx.Supplier {
// this uses the Originator function for now until kind, supplier := helpers.Supplier(p)
// a better distinction can be made for supplier
kind, supplier := helpers.Originator(p)
if kind == "" || supplier == "" { if kind == "" || supplier == "" {
return &spdx.Supplier{ return &spdx.Supplier{
Supplier: helpers.NOASSERTION, Supplier: helpers.NOASSERTION,

View File

@ -0,0 +1,210 @@
package helpers
import (
"fmt"
"regexp"
"strings"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/pkg"
)
const (
orgType = "Organization"
personType = "Person"
)
// Originator needs to conform to the SPDX spec here:
// https://spdx.github.io/spdx-spec/v2.2.2/package-information/#76-package-originator-field
//
// Definition:
//
// If the package identified in the SPDX document originated from a different person or
// organization than identified as Package Supplier (see 7.5 above), this field identifies from
// where or whom the package originally came. In some cases, a package may be created and
// originally distributed by a different third party than the Package Supplier of the package.
// For example, the SPDX document identifies the package as glibc and the Package Supplier as
// Red Hat, but the Free Software Foundation is the Package Originator.
//
// Use NOASSERTION if:
//
// - the SPDX document creator has attempted to but cannot reach a reasonable objective determination;
// - the SPDX document creator has made no attempt to determine this field; or
// - the SPDX document creator has intentionally provided no information (no meaning should be implied by doing so).
//
// Available options are: <omit>, NOASSERTION, Person: <person>, Organization: <org>
// return values are: <type>, <value>
func Originator(p pkg.Package) (typ string, author string) { // nolint: funlen
if !hasMetadata(p) {
return typ, author
}
switch metadata := p.Metadata.(type) {
case pkg.ApkDBEntry:
author = metadata.Maintainer
case pkg.DotnetPortableExecutableEntry:
typ = orgType
author = metadata.CompanyName
case pkg.DpkgDBEntry:
author = metadata.Maintainer
case pkg.JavaArchive:
if metadata.Manifest != nil {
author = metadata.Manifest.Main.MustGet("Specification-Vendor")
if author == "" {
author = metadata.Manifest.Main.MustGet("Implementation-Vendor")
}
}
case pkg.LinuxKernelModule:
author = metadata.Author
case pkg.PhpComposerLockEntry:
if len(metadata.Authors) > 0 {
entry := metadata.Authors[0]
author = formatPersonOrOrg(entry.Name, entry.Email)
}
case pkg.PhpComposerInstalledEntry:
if len(metadata.Authors) > 0 {
entry := metadata.Authors[0]
author = formatPersonOrOrg(entry.Name, entry.Email)
}
case pkg.RDescription:
// this is most likely to have a name and email
author = metadata.Maintainer
if author == "" {
author = metadata.Author
}
case pkg.NpmPackage:
author = metadata.Author
case pkg.PythonPackage:
author = formatPersonOrOrg(metadata.Author, metadata.AuthorEmail)
case pkg.RubyGemspec:
if len(metadata.Authors) > 0 {
author = metadata.Authors[0]
}
case pkg.RpmDBEntry:
typ = orgType
author = metadata.Vendor
case pkg.RpmArchive:
typ = orgType
author = metadata.Vendor
case pkg.WordpressPluginEntry:
// it seems that the vast majority of the time the author is an org, not a person
typ = orgType
author = metadata.Author
}
if typ == "" && author != "" {
typ = personType
}
return typ, parseAndFormatPersonOrOrg(author)
}
// Supplier needs to conform to the SPDX spec here:
// https://spdx.github.io/spdx-spec/v2.2.2/package-information/#75-package-supplier-field
//
// Definition:
//
// Identify the actual distribution source for the package/directory identified in the SPDX document. This might
// or might not be different from the originating distribution source for the package. The name of the Package Supplier
// shall be an organization or recognized author and not a web site. For example, SourceForge is a host website, not a
// supplier, the supplier for https://sourceforge.net/projects/bridge/ is “The Linux Foundation.”
//
// Use NOASSERTION if:
//
// - the SPDX document creator has attempted to but cannot reach a reasonable objective determination;
// - the SPDX document creator has made no attempt to determine this field; or
// - the SPDX document creator has intentionally provided no information (no meaning should be implied by doing so).
//
// Available options are: <omit>, NOASSERTION, Person: <person>, Organization: <org>
// return values are: <type>, <value>
func Supplier(p pkg.Package) (typ string, author string) {
if !hasMetadata(p) {
return
}
if metadata, ok := p.Metadata.(pkg.AlpmDBEntry); ok {
// most indications here are that this is the person that is simply packaging the upstream software. Most
// of the time this is not the original author of the upstream software (which would be the originator).
// Though it is possible for users to be both the packager and the author, this code cannot distinct this
// case and sticks to the semantically correct interpretation of the "packager" (which says nothing about the
// authorship of the upstream software).
author = metadata.Packager
}
if author == "" {
// TODO: this uses the Originator function for now until a better distinction can be made for supplier
return Originator(p)
}
if typ == "" && author != "" {
typ = personType
}
return typ, parseAndFormatPersonOrOrg(author)
}
var nameEmailURLPattern = regexp.MustCompile(`^(?P<name>[^<>()]*)( <(?P<email>[^@]+@\w+\.\w+)>)?( \((?P<url>.*)\))?$`)
func parseAndFormatPersonOrOrg(s string) string {
name, email, _ := parseNameEmailURL(s)
return formatPersonOrOrg(name, email)
}
func parseNameEmailURL(s string) (name, email, url string) {
fields := internal.MatchNamedCaptureGroups(nameEmailURLPattern, s)
name = strings.TrimSpace(fields["name"])
email = strings.TrimSpace(fields["email"])
url = strings.TrimSpace(fields["url"])
if email == "" {
if approximatesAsEmail(url) {
email = url
url = ""
} else if approximatesAsEmail(name) {
email = name
name = ""
}
}
return name, email, url
}
func approximatesAsEmail(s string) bool {
atIndex := strings.Index(s, "@")
if atIndex == -1 {
return false
}
dotIndex := strings.Index(s[atIndex:], ".")
return dotIndex != -1
}
func formatPersonOrOrg(name, email string) string {
name = strings.TrimSpace(name)
email = strings.TrimSpace(email)
blankName := name == ""
blankEmail := email == ""
if !blankEmail && !blankName {
return fmt.Sprintf("%s (%s)", name, email)
}
if !blankName && blankEmail {
return name
}
if blankName && !blankEmail {
return email
}
return ""
}

View File

@ -0,0 +1,482 @@
package helpers
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/anchore/syft/syft/internal/packagemetadata"
"github.com/anchore/syft/syft/pkg"
)
func Test_OriginatorSupplier(t *testing.T) {
completionTester := packagemetadata.NewCompletionTester(t,
pkg.BinarySignature{},
pkg.CocoaPodfileLockEntry{},
pkg.ConanV1LockEntry{},
pkg.ConanV2LockEntry{}, // the field Username might be the username of either the package originator or the supplier (unclear currently)
pkg.ConanfileEntry{},
pkg.ConaninfoEntry{},
pkg.DartPubspecLockEntry{},
pkg.DotnetDepsEntry{},
pkg.ELFBinaryPackageNoteJSONPayload{},
pkg.ElixirMixLockEntry{},
pkg.ErlangRebarLockEntry{},
pkg.GolangBinaryBuildinfoEntry{},
pkg.GolangModuleEntry{},
pkg.HackageStackYamlLockEntry{},
pkg.HackageStackYamlEntry{},
pkg.LinuxKernel{},
pkg.MicrosoftKbPatch{},
pkg.NixStoreEntry{},
pkg.NpmPackageLockEntry{},
pkg.PhpComposerInstalledEntry{},
pkg.PhpPeclEntry{},
pkg.PortageEntry{},
pkg.PythonPipfileLockEntry{},
pkg.PythonRequirementsEntry{},
pkg.PythonPoetryLockEntry{},
pkg.RustBinaryAuditEntry{},
pkg.RustCargoLockEntry{},
pkg.SwiftPackageManagerResolvedEntry{},
pkg.YarnLockEntry{},
)
tests := []struct {
name string
input pkg.Package
originator string
supplier string
}{
{
// note: since this is an optional field, no value is preferred over NONE or NOASSERTION
name: "no metadata",
input: pkg.Package{},
originator: "",
supplier: "",
},
{
// note: since this is an optional field, no value is preferred over NONE or NOASSERTION
name: "empty author on existing metadata",
input: pkg.Package{
Metadata: pkg.NpmPackage{
Author: "",
},
},
originator: "",
supplier: "",
},
{
name: "from apk",
input: pkg.Package{
Metadata: pkg.ApkDBEntry{
Maintainer: "auth",
},
},
originator: "Person: auth",
supplier: "Person: auth",
},
{
name: "from alpm",
input: pkg.Package{
Metadata: pkg.AlpmDBEntry{
Packager: "someone",
},
},
originator: "",
supplier: "Person: someone",
},
{
name: "from dotnet -- PE binary",
input: pkg.Package{
Metadata: pkg.DotnetPortableExecutableEntry{
CompanyName: "Microsoft Corporation",
},
},
originator: "Organization: Microsoft Corporation",
supplier: "Organization: Microsoft Corporation",
},
{
name: "from dpkg",
input: pkg.Package{
Metadata: pkg.DpkgDBEntry{
Maintainer: "auth",
},
},
originator: "Person: auth",
supplier: "Person: auth",
},
{
name: "from gem",
input: pkg.Package{
Metadata: pkg.RubyGemspec{
Authors: []string{
"auth1",
"auth2",
},
},
},
originator: "Person: auth1",
supplier: "Person: auth1",
},
{
name: "from java -- spec > impl cendor in main manifest section",
input: pkg.Package{
Metadata: pkg.JavaArchive{
Manifest: &pkg.JavaManifest{
Main: pkg.KeyValues{
{
Key: "Implementation-Vendor",
Value: "auth-impl",
},
{
Key: "Specification-Vendor",
Value: "auth-spec",
},
},
},
},
},
originator: "Person: auth-spec",
supplier: "Person: auth-spec",
},
{
name: "from java -- fallback to impl vendor in main manifest section",
input: pkg.Package{
Metadata: pkg.JavaArchive{
Manifest: &pkg.JavaManifest{
Main: pkg.KeyValues{
{
Key: "Implementation-Vendor",
Value: "auth-impl",
},
},
},
},
},
originator: "Person: auth-impl",
supplier: "Person: auth-impl",
},
{
name: "from java -- non-main manifest sections ignored",
input: pkg.Package{
Metadata: pkg.JavaArchive{
Manifest: &pkg.JavaManifest{
Sections: []pkg.KeyValues{
{
{
Key: "Implementation-Vendor",
Value: "auth-impl",
},
},
},
Main: pkg.KeyValues{},
},
},
},
// note: empty!
},
{
name: "from linux kernel module",
input: pkg.Package{
Metadata: pkg.LinuxKernelModule{
Author: "auth",
},
},
originator: "Person: auth",
supplier: "Person: auth",
},
{
name: "from npm",
input: pkg.Package{
Metadata: pkg.NpmPackage{
Author: "auth",
},
},
originator: "Person: auth",
supplier: "Person: auth",
},
{
name: "from npm -- name, email, and url",
input: pkg.Package{
Metadata: pkg.NpmPackage{
Author: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
},
},
originator: "Person: Isaac Z. Schlueter (i@izs.me)",
supplier: "Person: Isaac Z. Schlueter (i@izs.me)",
},
{
name: "from npm -- name, email",
input: pkg.Package{
Metadata: pkg.NpmPackage{
Author: "Isaac Z. Schlueter <i@izs.me>",
},
},
originator: "Person: Isaac Z. Schlueter (i@izs.me)",
supplier: "Person: Isaac Z. Schlueter (i@izs.me)",
},
{
name: "from php composer installed file",
input: pkg.Package{
Metadata: pkg.PhpComposerInstalledEntry{
Authors: []pkg.PhpComposerAuthors{
{
Name: "auth",
Email: "me@auth.com",
},
},
},
},
originator: "Person: auth (me@auth.com)",
supplier: "Person: auth (me@auth.com)",
},
{
name: "from php composer installed file",
input: pkg.Package{
Metadata: pkg.PhpComposerLockEntry{
Authors: []pkg.PhpComposerAuthors{
{
Name: "auth",
Email: "me@auth.com",
},
},
},
},
originator: "Person: auth (me@auth.com)",
supplier: "Person: auth (me@auth.com)",
},
{
name: "from python - just name",
input: pkg.Package{
Metadata: pkg.PythonPackage{
Author: "auth",
},
},
originator: "Person: auth",
supplier: "Person: auth",
},
{
name: "from python - just email",
input: pkg.Package{
Metadata: pkg.PythonPackage{
AuthorEmail: "auth@auth.gov",
},
},
originator: "Person: auth@auth.gov",
supplier: "Person: auth@auth.gov",
},
{
name: "from python - both name and email",
input: pkg.Package{
Metadata: pkg.PythonPackage{
Author: "auth",
AuthorEmail: "auth@auth.gov",
},
},
originator: "Person: auth (auth@auth.gov)",
supplier: "Person: auth (auth@auth.gov)",
},
{
name: "from r -- maintainer > author",
input: pkg.Package{
Metadata: pkg.RDescription{
Author: "author",
Maintainer: "maintainer",
},
},
originator: "Person: maintainer",
supplier: "Person: maintainer",
},
{
name: "from r -- fallback to author",
input: pkg.Package{
Metadata: pkg.RDescription{
Author: "author",
},
},
originator: "Person: author",
supplier: "Person: author",
},
{
name: "from rpm archive",
input: pkg.Package{
Metadata: pkg.RpmArchive{
Vendor: "auth",
},
},
originator: "Organization: auth",
supplier: "Organization: auth",
},
{
name: "from rpm DB",
input: pkg.Package{
Metadata: pkg.RpmDBEntry{
Vendor: "auth",
},
},
originator: "Organization: auth",
supplier: "Organization: auth",
},
{
name: "from wordpress plugin",
input: pkg.Package{
Metadata: pkg.WordpressPluginEntry{
Author: "auth",
},
},
originator: "Organization: auth",
supplier: "Organization: auth",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
completionTester.Tested(t, test.input.Metadata)
typ, value := Originator(test.input)
if typ != "" {
value = typ + ": " + value
}
assert.Equal(t, test.originator, value)
typ, value = Supplier(test.input)
if typ != "" {
value = typ + ": " + value
}
assert.Equal(t, test.supplier, value)
})
}
}
func Test_parseNameEmailUrl(t *testing.T) {
tests := []struct {
name string
input string
wantName string
wantEmail string
wantUrl string
}{
{
name: "empty",
input: "",
},
{
name: "npm-like: name only",
input: "Isaac Z. Schlueter",
wantName: "Isaac Z. Schlueter",
},
{
name: "npm-like: name and email",
input: "Ray Nos <bogus2@gmail.com>",
wantName: "Ray Nos",
wantEmail: "bogus2@gmail.com",
},
{
name: "npm-like: name and url",
input: "Ray Nos (http://example.com)",
wantName: "Ray Nos",
wantUrl: "http://example.com",
},
{
name: "npm-like: name, email, and url",
input: "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
wantName: "Isaac Z. Schlueter",
wantEmail: "i@izs.me",
wantUrl: "http://blog.izs.me",
},
{
name: "mixed input: email only",
input: "i@izs.me",
wantEmail: "i@izs.me",
},
{
name: "mixed input: email in url",
input: "my name (i@izs.me)",
wantName: "my name",
wantEmail: "i@izs.me",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotName, gotEmail, gotUrl := parseNameEmailURL(tt.input)
assert.Equal(t, tt.wantName, gotName)
assert.Equal(t, tt.wantEmail, gotEmail)
assert.Equal(t, tt.wantUrl, gotUrl)
})
}
}
func Test_formatPersonOrOrg(t *testing.T) {
tests := []struct {
name string
input string
email string
want string
}{
{
name: "empty",
want: "",
},
{
name: "name only",
input: "Isaac Z. Schlueter",
want: "Isaac Z. Schlueter",
},
{
name: "email only",
email: "i@something.com",
want: "i@something.com",
},
{
name: "name and email",
input: "Isaac Z. Schlueter",
email: "i@something.com",
want: "Isaac Z. Schlueter (i@something.com)",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.want, formatPersonOrOrg(tt.input, tt.email))
})
}
}
func Test_approximatesAsEmail(t *testing.T) {
tests := []struct {
name string
input string
want bool
}{
{
name: "empty",
input: "",
want: false,
},
{
name: "no at",
input: "something.com",
want: false,
},
{
name: "no dot",
input: "something@com",
want: false,
},
{
name: "dot before at",
input: "something.com@nothing",
want: false,
},
{
name: "valid",
input: "something@nothing.com",
want: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.want, approximatesAsEmail(tt.input))
})
}
}

View File

@ -1,119 +0,0 @@
package helpers
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/anchore/syft/syft/pkg"
)
func Test_Originator(t *testing.T) {
tests := []struct {
name string
input pkg.Package
expected string
}{
{
// note: since this is an optional field, no value is preferred over NONE or NOASSERTION
name: "no metadata",
input: pkg.Package{},
expected: "",
},
{
name: "from gem",
input: pkg.Package{
Metadata: pkg.RubyGemspec{
Authors: []string{
"auth1",
"auth2",
},
},
},
expected: "Person: auth1",
},
{
name: "from npm",
input: pkg.Package{
Metadata: pkg.NpmPackage{
Author: "auth",
},
},
expected: "Person: auth",
},
{
name: "from apk",
input: pkg.Package{
Metadata: pkg.ApkDBEntry{
Maintainer: "auth",
},
},
expected: "Person: auth",
},
{
name: "from python - just name",
input: pkg.Package{
Metadata: pkg.PythonPackage{
Author: "auth",
},
},
expected: "Person: auth",
},
{
name: "from python - just email",
input: pkg.Package{
Metadata: pkg.PythonPackage{
AuthorEmail: "auth@auth.gov",
},
},
expected: "Person: auth@auth.gov",
},
{
name: "from python - both name and email",
input: pkg.Package{
Metadata: pkg.PythonPackage{
Author: "auth",
AuthorEmail: "auth@auth.gov",
},
},
expected: "Person: auth (auth@auth.gov)",
},
{
name: "from rpm",
input: pkg.Package{
Metadata: pkg.RpmDBEntry{
Vendor: "auth",
},
},
expected: "Organization: auth",
},
{
name: "from dpkg",
input: pkg.Package{
Metadata: pkg.DpkgDBEntry{
Maintainer: "auth",
},
},
expected: "Person: auth",
},
{
// note: since this is an optional field, no value is preferred over NONE or NOASSERTION
name: "empty",
input: pkg.Package{
Metadata: pkg.NpmPackage{
Author: "",
},
},
expected: "",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
typ, value := Originator(test.input)
if typ != "" {
value = typ + ": " + value
}
assert.Equal(t, test.expected, value)
})
}
}

View File

@ -1,44 +0,0 @@
package helpers
import (
"fmt"
"github.com/anchore/syft/syft/pkg"
)
// Originator needs to conform to the SPDX spec here:
// https://spdx.github.io/spdx-spec/package-information/#76-package-originator-field
// Available options are: <omit>, NOASSERTION, Person: <person>, Organization: <org>
// return values are: <type>, <value>
func Originator(p pkg.Package) (string, string) {
typ := ""
author := ""
if hasMetadata(p) {
switch metadata := p.Metadata.(type) {
case pkg.ApkDBEntry:
author = metadata.Maintainer
case pkg.NpmPackage:
author = metadata.Author
case pkg.PythonPackage:
author = metadata.Author
if author == "" {
author = metadata.AuthorEmail
} else if metadata.AuthorEmail != "" {
author = fmt.Sprintf("%s (%s)", author, metadata.AuthorEmail)
}
case pkg.RubyGemspec:
if len(metadata.Authors) > 0 {
author = metadata.Authors[0]
}
case pkg.RpmDBEntry:
typ = "Organization"
author = metadata.Vendor
case pkg.DpkgDBEntry:
author = metadata.Maintainer
}
if typ == "" && author != "" {
typ = "Person"
}
}
return typ, author
}

View File

@ -3,9 +3,11 @@ package pkg
type RDescription struct { type RDescription struct {
/* /*
Fields chosen by: Fields chosen by:
docker run --rm -it rocker/r-ver bash docker run --rm -it rocker/r-ver bash
$ install2.r ggplot2 # has a lot of dependencies $ install2.r ggplot2 # has a lot of dependencies
$ find /usr/local/lib/R -name DESCRIPTION | xargs cat | grep -v '^\s' | cut -d ':' -f 1 | sort | uniq -c | sort -nr $ find /usr/local/lib/R -name DESCRIPTION | xargs cat | grep -v '^\s' | cut -d ':' -f 1 | sort | uniq -c | sort -nr
For more information on the DESCRIPTION file see https://r-pkgs.org/description.html
*/ */
Title string `json:"title,omitempty"` Title string `json:"title,omitempty"`
Description string `json:"description,omitempty"` Description string `json:"description,omitempty"`