From 1350d6c5bfd55a96b868a6f8ff82a4b026a278a5 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Wed, 19 Jan 2022 17:30:29 -0500 Subject: [PATCH] Improve package URL support (#754) * rename npm metadata struct Signed-off-by: Alex Goodman * improve os package URLs Signed-off-by: Alex Goodman * improve language package URLs Signed-off-by: Alex Goodman * wire up composer pURL method Signed-off-by: Alex Goodman --- syft/pkg/apk_metadata.go | 27 +-- syft/pkg/apk_metadata_test.go | 38 +++- syft/pkg/cataloger/catalog.go | 2 +- syft/pkg/cataloger/package_url.go | 49 ----- syft/pkg/cataloger/package_url_test.go | 166 --------------- syft/pkg/dpkg_metadata.go | 28 +-- syft/pkg/dpkg_metadata_test.go | 39 ++-- syft/pkg/java_metadata.go | 6 +- syft/pkg/java_metadata_test.go | 2 +- ...tadata.go => npm_package_json_metadata.go} | 0 syft/pkg/php_composer_json_metadata.go | 5 +- syft/pkg/php_composer_json_metadata_test.go | 17 +- syft/pkg/python_package_metadata.go | 13 +- syft/pkg/python_package_metadata_test.go | 48 +++++ syft/pkg/rpmdb_metadata.go | 37 ++-- syft/pkg/rpmdb_metadata_test.go | 34 ++- syft/pkg/url.go | 86 ++++++++ syft/pkg/url_test.go | 200 ++++++++++++++++++ 18 files changed, 501 insertions(+), 296 deletions(-) delete mode 100644 syft/pkg/cataloger/package_url.go delete mode 100644 syft/pkg/cataloger/package_url_test.go rename syft/pkg/{npm_metadata.go => npm_package_json_metadata.go} (100%) create mode 100644 syft/pkg/url.go create mode 100644 syft/pkg/url_test.go diff --git a/syft/pkg/apk_metadata.go b/syft/pkg/apk_metadata.go index c967255ab..7c3db6bb2 100644 --- a/syft/pkg/apk_metadata.go +++ b/syft/pkg/apk_metadata.go @@ -3,15 +3,18 @@ package pkg import ( "sort" - "github.com/anchore/syft/syft/file" - "github.com/anchore/packageurl-go" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/linux" "github.com/scylladb/go-set/strset" ) const ApkDBGlob = "**/lib/apk/db/installed" -var _ FileOwner = (*ApkMetadata)(nil) +var ( + _ FileOwner = (*ApkMetadata)(nil) + _ urlIdentifier = (*ApkMetadata)(nil) +) // ApkMetadata represents all captured data for a Alpine DB package entry. // See the following sources for more information: @@ -45,22 +48,22 @@ type ApkFileRecord struct { } // PackageURL returns the PURL for the specific Alpine package (see https://github.com/package-url/purl-spec) -func (m ApkMetadata) PackageURL() string { - pURL := packageurl.NewPackageURL( +func (m ApkMetadata) PackageURL(distro *linux.Release) string { + return packageurl.NewPackageURL( // note: this is currently a candidate and not technically within spec // see https://github.com/package-url/purl-spec#other-candidate-types-to-define "alpine", "", m.Package, m.Version, - packageurl.Qualifiers{ - { - Key: "arch", - Value: m.Architecture, + purlQualifiers( + map[string]string{ + purlArchQualifier: m.Architecture, }, - }, - "") - return pURL.ToString() + distro, + ), + "", + ).ToString() } func (m ApkMetadata) OwnedFiles() (result []string) { diff --git a/syft/pkg/apk_metadata_test.go b/syft/pkg/apk_metadata_test.go index da22b30a3..774b94147 100644 --- a/syft/pkg/apk_metadata_test.go +++ b/syft/pkg/apk_metadata_test.go @@ -1,6 +1,7 @@ package pkg import ( + "github.com/anchore/syft/syft/linux" "strings" "testing" @@ -11,16 +12,35 @@ import ( func TestApkMetadata_pURL(t *testing.T) { tests := []struct { + name string metadata ApkMetadata + distro linux.Release expected string }{ { + name: "gocase", metadata: ApkMetadata{ Package: "p", Version: "v", Architecture: "a", }, - expected: "pkg:alpine/p@v?arch=a", + distro: linux.Release{ + ID: "alpine", + VersionID: "3.4.6", + }, + expected: "pkg:alpine/p@v?arch=a&distro=alpine-3.4.6", + }, + { + name: "missing architecure", + metadata: ApkMetadata{ + Package: "p", + Version: "v", + }, + distro: linux.Release{ + ID: "alpine", + VersionID: "3.4.6", + }, + expected: "pkg:alpine/p@v?distro=alpine-3.4.6", }, // verify #351 { @@ -29,7 +49,11 @@ func TestApkMetadata_pURL(t *testing.T) { Version: "v84", Architecture: "am86", }, - expected: "pkg:alpine/g++@v84?arch=am86", + distro: linux.Release{ + ID: "alpine", + VersionID: "3.4.6", + }, + expected: "pkg:alpine/g++@v84?arch=am86&distro=alpine-3.4.6", }, { metadata: ApkMetadata{ @@ -37,13 +61,17 @@ func TestApkMetadata_pURL(t *testing.T) { Version: "v84", Architecture: "am86", }, - expected: "pkg:alpine/g%20plus%20plus@v84?arch=am86", + distro: linux.Release{ + ID: "alpine", + VersionID: "3.15.0", + }, + expected: "pkg:alpine/g%20plus%20plus@v84?arch=am86&distro=alpine-3.15.0", }, } for _, test := range tests { - t.Run(test.expected, func(t *testing.T) { - actual := test.metadata.PackageURL() + t.Run(test.name, func(t *testing.T) { + actual := test.metadata.PackageURL(&test.distro) if actual != test.expected { dmp := diffmatchpatch.New() diffs := dmp.DiffMain(test.expected, actual, true) diff --git a/syft/pkg/cataloger/catalog.go b/syft/pkg/cataloger/catalog.go index 928771818..dfd242a59 100644 --- a/syft/pkg/cataloger/catalog.go +++ b/syft/pkg/cataloger/catalog.go @@ -68,7 +68,7 @@ func Catalog(resolver source.FileResolver, release *linux.Release, catalogers .. p.CPEs = cpe.Generate(p) // generate PURL (note: this is excluded from package ID, so is safe to mutate) - p.PURL = generatePackageURL(p, release) + p.PURL = pkg.URL(p, release) // create file-to-package relationships for files owned by the package owningRelationships, err := packageFileOwnershipRelationships(p, resolver) diff --git a/syft/pkg/cataloger/package_url.go b/syft/pkg/cataloger/package_url.go deleted file mode 100644 index 3fe4eb42d..000000000 --- a/syft/pkg/cataloger/package_url.go +++ /dev/null @@ -1,49 +0,0 @@ -package cataloger - -import ( - "regexp" - "strings" - - "github.com/anchore/packageurl-go" - "github.com/anchore/syft/syft/linux" - "github.com/anchore/syft/syft/pkg" -) - -// generatePackageURL returns a package-URL representation of the given package (see https://github.com/package-url/purl-spec) -func generatePackageURL(p pkg.Package, release *linux.Release) string { - // default to pURLs on the metadata - if p.Metadata != nil { - if i, ok := p.Metadata.(interface{ PackageURL() string }); ok { - return i.PackageURL() - } else if i, ok := p.Metadata.(interface{ PackageURL(*linux.Release) string }); ok { - return i.PackageURL(release) - } - } - - var purlType = p.Type.PackageURLType() - var name = p.Name - var namespace = "" - - switch { - case purlType == "": - // there is no purl type, don't attempt to craft a purl - // TODO: should this be a "generic" purl type instead? - return "" - case p.Type == pkg.GoModulePkg: - re := regexp.MustCompile(`(/)[^/]*$`) - fields := re.Split(p.Name, -1) - namespace = fields[0] - name = strings.TrimPrefix(p.Name, namespace+"/") - } - - // generate a purl from the package data - pURL := packageurl.NewPackageURL( - purlType, - namespace, - name, - p.Version, - nil, - "") - - return pURL.ToString() -} diff --git a/syft/pkg/cataloger/package_url_test.go b/syft/pkg/cataloger/package_url_test.go deleted file mode 100644 index d7116d446..000000000 --- a/syft/pkg/cataloger/package_url_test.go +++ /dev/null @@ -1,166 +0,0 @@ -package cataloger - -import ( - "testing" - - "github.com/anchore/syft/syft/linux" - "github.com/anchore/syft/syft/pkg" - "github.com/sergi/go-diff/diffmatchpatch" -) - -func TestPackageURL(t *testing.T) { - tests := []struct { - name string - pkg pkg.Package - distro *linux.Release - expected string - }{ - { - name: "golang", - pkg: pkg.Package{ - Name: "github.com/anchore/syft", - Version: "v0.1.0", - Type: pkg.GoModulePkg, - }, - expected: "pkg:golang/github.com/anchore/syft@v0.1.0", - }, - { - name: "pip with vcs url", - pkg: pkg.Package{ - Name: "bad-name", - Version: "bad-v0.1.0", - Type: pkg.PythonPkg, - Metadata: pkg.PythonPackageMetadata{ - Name: "name", - Version: "v0.1.0", - DirectURLOrigin: &pkg.PythonDirectURLOriginInfo{ - VCS: "git", - URL: "https://github.com/test/test.git", - CommitID: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", - }, - }, - }, - expected: "pkg:pypi/name@v0.1.0?vcs_url=git+https:%2F%2Fgithub.com%2Ftest%2Ftest.git@aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", - }, - { - name: "pip without vcs url", - pkg: pkg.Package{ - Name: "bad-name", - Version: "bad-v0.1.0", - Type: pkg.PythonPkg, - Metadata: pkg.PythonPackageMetadata{ - Name: "name", - Version: "v0.1.0", - }, - }, - expected: "pkg:pypi/name@v0.1.0", - }, - { - name: "gem", - pkg: pkg.Package{ - Name: "name", - Version: "v0.1.0", - Type: pkg.GemPkg, - }, - expected: "pkg:gem/name@v0.1.0", - }, - { - name: "npm", - pkg: pkg.Package{ - Name: "name", - Version: "v0.1.0", - Type: pkg.NpmPkg, - }, - expected: "pkg:npm/name@v0.1.0", - }, - { - name: "deb with arch", - distro: &linux.Release{ - ID: "ubuntu", - }, - pkg: pkg.Package{ - Name: "bad-name", - Version: "bad-v0.1.0", - Type: pkg.DebPkg, - Metadata: pkg.DpkgMetadata{ - Package: "name", - Version: "v0.1.0", - Architecture: "amd64", - }, - }, - expected: "pkg:deb/ubuntu/name@v0.1.0?arch=amd64", - }, - { - name: "deb with epoch", - distro: &linux.Release{ - ID: "centos", - }, - pkg: pkg.Package{ - Name: "bad-name", - Version: "bad-v0.1.0", - Type: pkg.RpmPkg, - Metadata: pkg.RpmdbMetadata{ - Name: "name", - Version: "0.1.0", - Epoch: intRef(2), - Arch: "amd64", - Release: "3", - }, - }, - expected: "pkg:rpm/centos/name@0.1.0-3?arch=amd64&epoch=2", - }, - { - name: "deb with nil epoch", - distro: &linux.Release{ - ID: "centos", - }, - pkg: pkg.Package{ - Name: "bad-name", - Version: "bad-v0.1.0", - Type: pkg.RpmPkg, - Metadata: pkg.RpmdbMetadata{ - Name: "name", - Version: "0.1.0", - Epoch: nil, - Arch: "amd64", - Release: "3", - }, - }, - expected: "pkg:rpm/centos/name@0.1.0-3?arch=amd64", - }, - { - name: "deb with unknown distro", - distro: nil, - pkg: pkg.Package{ - Name: "name", - Version: "v0.1.0", - Type: pkg.DebPkg, - }, - expected: "pkg:deb/name@v0.1.0", - }, - { - name: "cargo", - pkg: pkg.Package{ - Name: "name", - Version: "v0.1.0", - Type: pkg.RustPkg, - }, - expected: "pkg:cargo/name@v0.1.0", - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - actual := generatePackageURL(test.pkg, test.distro) - if actual != test.expected { - dmp := diffmatchpatch.New() - diffs := dmp.DiffMain(test.expected, actual, true) - t.Errorf("diff: %s", dmp.DiffPrettyText(diffs)) - } - }) - } -} - -func intRef(i int) *int { - return &i -} diff --git a/syft/pkg/dpkg_metadata.go b/syft/pkg/dpkg_metadata.go index 0063ef231..d2687c0eb 100644 --- a/syft/pkg/dpkg_metadata.go +++ b/syft/pkg/dpkg_metadata.go @@ -12,7 +12,10 @@ import ( const DpkgDBGlob = "**/var/lib/dpkg/{status,status.d/**}" -var _ FileOwner = (*DpkgMetadata)(nil) +var ( + _ FileOwner = (*DpkgMetadata)(nil) + _ urlIdentifier = (*DpkgMetadata)(nil) +) // DpkgMetadata represents all captured data for a Debian package DB entry; available fields are described // at http://manpages.ubuntu.com/manpages/xenial/man1/dpkg-query.1.html in the --showformat section. @@ -36,24 +39,25 @@ type DpkgFileRecord struct { // PackageURL returns the PURL for the specific Debian package (see https://github.com/package-url/purl-spec) func (m DpkgMetadata) PackageURL(distro *linux.Release) string { - if distro == nil { - return "" + var namespace string + if distro != nil { + namespace = distro.ID } - pURL := packageurl.NewPackageURL( + return packageurl.NewPackageURL( // TODO: replace with `packageurl.TypeDebian` upon merge of https://github.com/package-url/packageurl-go/pull/21 // TODO: or, since we're now using an Anchore fork of this module, we could do this sooner. "deb", - distro.ID, + namespace, m.Package, m.Version, - packageurl.Qualifiers{ - { - Key: "arch", - Value: m.Architecture, + purlQualifiers( + map[string]string{ + purlArchQualifier: m.Architecture, }, - }, - "") - return pURL.ToString() + distro, + ), + "", + ).ToString() } func (m DpkgMetadata) OwnedFiles() (result []string) { diff --git a/syft/pkg/dpkg_metadata_test.go b/syft/pkg/dpkg_metadata_test.go index 340b706a9..f056a72ff 100644 --- a/syft/pkg/dpkg_metadata_test.go +++ b/syft/pkg/dpkg_metadata_test.go @@ -12,25 +12,29 @@ import ( func TestDpkgMetadata_pURL(t *testing.T) { tests := []struct { - distro linux.Release + name string + distro *linux.Release metadata DpkgMetadata expected string }{ { - distro: linux.Release{ - ID: "debian", + name: "go case", + distro: &linux.Release{ + ID: "debian", + VersionID: "11", }, metadata: DpkgMetadata{ - Package: "p", - Source: "s", - Version: "v", - Architecture: "a", + Package: "p", + Source: "s", + Version: "v", }, - expected: "pkg:deb/debian/p@v?arch=a", + expected: "pkg:deb/debian/p@v?distro=debian-11", }, { - distro: linux.Release{ - ID: "ubuntu", + name: "with arch info", + distro: &linux.Release{ + ID: "ubuntu", + VersionID: "16.04", }, metadata: DpkgMetadata{ Package: "p", @@ -38,13 +42,22 @@ func TestDpkgMetadata_pURL(t *testing.T) { Version: "v", Architecture: "a", }, - expected: "pkg:deb/ubuntu/p@v?arch=a", + expected: "pkg:deb/ubuntu/p@v?arch=a&distro=ubuntu-16.04", + }, + { + name: "missing distro", + metadata: DpkgMetadata{ + Package: "p", + Source: "s", + Version: "v", + }, + expected: "pkg:deb/p@v", }, } for _, test := range tests { - t.Run(test.expected, func(t *testing.T) { - actual := test.metadata.PackageURL(&test.distro) + t.Run(test.name, func(t *testing.T) { + actual := test.metadata.PackageURL(test.distro) if actual != test.expected { dmp := diffmatchpatch.New() diffs := dmp.DiffMain(test.expected, actual, true) diff --git a/syft/pkg/java_metadata.go b/syft/pkg/java_metadata.go index ceaf476e9..9dcd32e1e 100644 --- a/syft/pkg/java_metadata.go +++ b/syft/pkg/java_metadata.go @@ -3,10 +3,14 @@ package pkg import ( "strings" + "github.com/anchore/syft/syft/linux" + "github.com/anchore/packageurl-go" "github.com/anchore/syft/internal" ) +var _ urlIdentifier = (*JavaMetadata)(nil) + var JenkinsPluginPomPropertiesGroupIDs = []string{ "io.jenkins.plugins", "org.jenkins.plugins", @@ -69,7 +73,7 @@ type JavaManifest struct { } // PackageURL returns the PURL for the specific Alpine package (see https://github.com/package-url/purl-spec) -func (m JavaMetadata) PackageURL() string { +func (m JavaMetadata) PackageURL(_ *linux.Release) string { if m.PomProperties != nil { pURL := packageurl.NewPackageURL( packageurl.TypeMaven, diff --git a/syft/pkg/java_metadata_test.go b/syft/pkg/java_metadata_test.go index 8dcbded88..07a6ca977 100644 --- a/syft/pkg/java_metadata_test.go +++ b/syft/pkg/java_metadata_test.go @@ -136,7 +136,7 @@ func TestJavaMetadata_pURL(t *testing.T) { for _, test := range tests { t.Run(test.expected, func(t *testing.T) { - actual := test.metadata.PackageURL() + actual := test.metadata.PackageURL(nil) if actual != test.expected { dmp := diffmatchpatch.New() diffs := dmp.DiffMain(test.expected, actual, true) diff --git a/syft/pkg/npm_metadata.go b/syft/pkg/npm_package_json_metadata.go similarity index 100% rename from syft/pkg/npm_metadata.go rename to syft/pkg/npm_package_json_metadata.go diff --git a/syft/pkg/php_composer_json_metadata.go b/syft/pkg/php_composer_json_metadata.go index 3b3e88f4f..11b529d2b 100644 --- a/syft/pkg/php_composer_json_metadata.go +++ b/syft/pkg/php_composer_json_metadata.go @@ -4,8 +4,11 @@ import ( "strings" "github.com/anchore/packageurl-go" + "github.com/anchore/syft/syft/linux" ) +var _ urlIdentifier = (*PhpComposerJSONMetadata)(nil) + // PhpComposerJSONMetadata represents information found from composer v1/v2 "installed.json" files as well as composer.lock files type PhpComposerJSONMetadata struct { Name string `json:"name"` @@ -40,7 +43,7 @@ type PhpComposerAuthors struct { Homepage string `json:"homepage,omitempty"` } -func (m PhpComposerJSONMetadata) PackageURL() string { +func (m PhpComposerJSONMetadata) PackageURL(_ *linux.Release) string { var name, vendor string fields := strings.Split(m.Name, "/") switch len(fields) { diff --git a/syft/pkg/php_composer_json_metadata_test.go b/syft/pkg/php_composer_json_metadata_test.go index c7a1acf8b..81453a7d0 100644 --- a/syft/pkg/php_composer_json_metadata_test.go +++ b/syft/pkg/php_composer_json_metadata_test.go @@ -20,7 +20,8 @@ func TestPhpComposerJsonMetadata_pURL(t *testing.T) { Version: "1.0.1", }, expected: "pkg:composer/ven/name@1.0.1", - }, { + }, + { name: "name with slashes (invalid)", metadata: PhpComposerJSONMetadata{ Name: "ven/name/component", @@ -36,11 +37,23 @@ func TestPhpComposerJsonMetadata_pURL(t *testing.T) { }, expected: "pkg:composer/name@1.0.1", }, + { + name: "ignores distro", + distro: &linux.Release{ + ID: "rhel", + VersionID: "8.4", + }, + metadata: PhpComposerJSONMetadata{ + Name: "ven/name", + Version: "1.0.1", + }, + expected: "pkg:composer/ven/name@1.0.1", + }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - actual := test.metadata.PackageURL() + actual := test.metadata.PackageURL(test.distro) if actual != test.expected { dmp := diffmatchpatch.New() diffs := dmp.DiffMain(test.expected, actual, true) diff --git a/syft/pkg/python_package_metadata.go b/syft/pkg/python_package_metadata.go index 1b1418e73..25b311cf9 100644 --- a/syft/pkg/python_package_metadata.go +++ b/syft/pkg/python_package_metadata.go @@ -4,11 +4,16 @@ import ( "fmt" "sort" + "github.com/anchore/syft/syft/linux" + "github.com/anchore/packageurl-go" "github.com/scylladb/go-set/strset" ) -var _ FileOwner = (*PythonPackageMetadata)(nil) +var ( + _ FileOwner = (*PythonPackageMetadata)(nil) + _ urlIdentifier = (*PythonPackageMetadata)(nil) +) // PythonFileDigest represents the file metadata for a single file attributed to a python package. type PythonFileDigest struct { @@ -76,7 +81,7 @@ func (m PythonPackageMetadata) OwnedFiles() (result []string) { return result } -func (m PythonPackageMetadata) PackageURL() string { +func (m PythonPackageMetadata) PackageURL(_ *linux.Release) string { // generate a purl from the package data pURL := packageurl.NewPackageURL( packageurl.TypePyPi, @@ -101,7 +106,7 @@ func (p PythonDirectURLOriginInfo) vcsURLQualifier() packageurl.Qualifiers { if p.VCS != "" { // Taken from https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst#known-qualifiers-keyvalue-pairs // packageurl-go still doesn't support all qualifier names - return packageurl.Qualifiers{{Key: "vcs_url", Value: fmt.Sprintf("%s+%s@%s", p.VCS, p.URL, p.CommitID)}} + return packageurl.Qualifiers{{Key: purlVCSURL, Value: fmt.Sprintf("%s+%s@%s", p.VCS, p.URL, p.CommitID)}} } - return packageurl.Qualifiers{} + return nil } diff --git a/syft/pkg/python_package_metadata_test.go b/syft/pkg/python_package_metadata_test.go index 686980305..3b7d58b8e 100644 --- a/syft/pkg/python_package_metadata_test.go +++ b/syft/pkg/python_package_metadata_test.go @@ -1,12 +1,60 @@ package pkg import ( + "github.com/anchore/syft/syft/linux" + "github.com/sergi/go-diff/diffmatchpatch" "strings" "testing" "github.com/go-test/deep" ) +func TestPythonPackageMetadata_pURL(t *testing.T) { + tests := []struct { + name string + distro *linux.Release + metadata PythonPackageMetadata + expected string + }{ + { + name: "with vcs info", + metadata: PythonPackageMetadata{ + Name: "name", + Version: "v0.1.0", + DirectURLOrigin: &PythonDirectURLOriginInfo{ + VCS: "git", + URL: "https://github.com/test/test.git", + CommitID: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + }, + }, + expected: "pkg:pypi/name@v0.1.0?vcs_url=git+https:%2F%2Fgithub.com%2Ftest%2Ftest.git@aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + }, + { + name: "should not respond to release info", + distro: &linux.Release{ + ID: "rhel", + VersionID: "8.4", + }, + metadata: PythonPackageMetadata{ + Name: "name", + Version: "v0.1.0", + }, + expected: "pkg:pypi/name@v0.1.0", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + actual := test.metadata.PackageURL(test.distro) + if actual != test.expected { + dmp := diffmatchpatch.New() + diffs := dmp.DiffMain(test.expected, actual, true) + t.Errorf("diff: %s", dmp.DiffPrettyText(diffs)) + } + }) + } +} + func TestPythonMetadata_FileOwner(t *testing.T) { tests := []struct { metadata PythonPackageMetadata diff --git a/syft/pkg/rpmdb_metadata.go b/syft/pkg/rpmdb_metadata.go index faf1abbf9..e1f6a1786 100644 --- a/syft/pkg/rpmdb_metadata.go +++ b/syft/pkg/rpmdb_metadata.go @@ -15,7 +15,10 @@ import ( const RpmDBGlob = "**/var/lib/rpm/Packages" -var _ FileOwner = (*RpmdbMetadata)(nil) +var ( + _ FileOwner = (*RpmdbMetadata)(nil) + _ urlIdentifier = (*RpmdbMetadata)(nil) +) // RpmdbMetadata represents all captured data for a RPM DB package entry. type RpmdbMetadata struct { @@ -47,36 +50,32 @@ type RpmdbFileMode uint16 // PackageURL returns the PURL for the specific RHEL package (see https://github.com/package-url/purl-spec) func (m RpmdbMetadata) PackageURL(distro *linux.Release) string { - if distro == nil { - return "" + var namespace string + if distro != nil { + namespace = distro.ID } - qualifiers := packageurl.Qualifiers{ - { - Key: "arch", - Value: m.Arch, - }, + qualifiers := map[string]string{ + purlArchQualifier: m.Arch, } if m.Epoch != nil { - qualifiers = append(qualifiers, - packageurl.Qualifier{ - Key: "epoch", - Value: strconv.Itoa(*m.Epoch), - }, - ) + qualifiers[purlEpochQualifier] = strconv.Itoa(*m.Epoch) } - pURL := packageurl.NewPackageURL( + return packageurl.NewPackageURL( packageurl.TypeRPM, - distro.ID, + namespace, m.Name, // for purl the epoch is a qualifier, not part of the version // see https://github.com/package-url/purl-spec/blob/master/PURL-TYPES.rst under the RPM section fmt.Sprintf("%s-%s", m.Version, m.Release), - qualifiers, - "") - return pURL.ToString() + purlQualifiers( + qualifiers, + distro, + ), + "", + ).ToString() } func (m RpmdbMetadata) OwnedFiles() (result []string) { diff --git a/syft/pkg/rpmdb_metadata_test.go b/syft/pkg/rpmdb_metadata_test.go index faa9d4470..5c7b3bbb8 100644 --- a/syft/pkg/rpmdb_metadata_test.go +++ b/syft/pkg/rpmdb_metadata_test.go @@ -12,13 +12,16 @@ import ( func TestRpmMetadata_pURL(t *testing.T) { tests := []struct { - distro linux.Release + name string + distro *linux.Release metadata RpmdbMetadata expected string }{ { - distro: linux.Release{ - ID: "centos", + name: "with arch and epoch", + distro: &linux.Release{ + ID: "centos", + VersionID: "7", }, metadata: RpmdbMetadata{ Name: "p", @@ -27,26 +30,37 @@ func TestRpmMetadata_pURL(t *testing.T) { Release: "r", Epoch: intRef(1), }, - expected: "pkg:rpm/centos/p@v-r?arch=a&epoch=1", + expected: "pkg:rpm/centos/p@v-r?arch=a&epoch=1&distro=centos-7", }, { - distro: linux.Release{ - ID: "rhel", + name: "go case", + distro: &linux.Release{ + ID: "rhel", + VersionID: "8.4", }, metadata: RpmdbMetadata{ Name: "p", Version: "v", - Arch: "a", Release: "r", Epoch: nil, }, - expected: "pkg:rpm/rhel/p@v-r?arch=a", + expected: "pkg:rpm/rhel/p@v-r?distro=rhel-8.4", + }, + { + name: "missing distro", + metadata: RpmdbMetadata{ + Name: "p", + Version: "v", + Release: "r", + Epoch: nil, + }, + expected: "pkg:rpm/p@v-r", }, } for _, test := range tests { - t.Run(test.expected, func(t *testing.T) { - actual := test.metadata.PackageURL(&test.distro) + t.Run(test.name, func(t *testing.T) { + actual := test.metadata.PackageURL(test.distro) if actual != test.expected { dmp := diffmatchpatch.New() diffs := dmp.DiffMain(test.expected, actual, true) diff --git a/syft/pkg/url.go b/syft/pkg/url.go new file mode 100644 index 000000000..5187b6a0a --- /dev/null +++ b/syft/pkg/url.go @@ -0,0 +1,86 @@ +package pkg + +import ( + "fmt" + "regexp" + "sort" + "strings" + + "github.com/anchore/packageurl-go" + "github.com/anchore/syft/syft/linux" +) + +const ( + purlArchQualifier = "arch" + purlDistroQualifier = "distro" + purlEpochQualifier = "epoch" + purlVCSURL = "vcs_url" +) + +type urlIdentifier interface { + PackageURL(*linux.Release) string +} + +func URL(p Package, release *linux.Release) string { + if p.Metadata != nil { + if i, ok := p.Metadata.(urlIdentifier); ok { + return i.PackageURL(release) + } + } + + // the remaining cases are primarily reserved for packages without metadata struct instances + + var purlType = p.Type.PackageURLType() + var name = p.Name + var namespace = "" + + switch { + case purlType == "": + // there is no purl type, don't attempt to craft a purl + // TODO: should this be a "generic" purl type instead? + return "" + case p.Type == GoModulePkg: + re := regexp.MustCompile(`(/)[^/]*$`) + fields := re.Split(p.Name, -1) + namespace = fields[0] + name = strings.TrimPrefix(p.Name, namespace+"/") + } + + // generate a purl from the package data + return packageurl.NewPackageURL( + purlType, + namespace, + name, + p.Version, + nil, + "", + ).ToString() +} + +func purlQualifiers(vars map[string]string, release *linux.Release) (q packageurl.Qualifiers) { + keys := make([]string, 0, len(vars)) + for k := range vars { + keys = append(keys, k) + } + sort.Strings(keys) + + for _, k := range keys { + val := vars[k] + if val == "" { + continue + } + q = append(q, packageurl.Qualifier{ + Key: k, + Value: vars[k], + }) + } + + if release != nil && release.ID != "" && release.VersionID != "" { + q = append(q, packageurl.Qualifier{ + Key: purlDistroQualifier, + Value: fmt.Sprintf("%s-%s", release.ID, release.VersionID), + }) + } + + return q +} diff --git a/syft/pkg/url_test.go b/syft/pkg/url_test.go new file mode 100644 index 000000000..2abae9368 --- /dev/null +++ b/syft/pkg/url_test.go @@ -0,0 +1,200 @@ +package pkg + +import ( + "testing" + + "github.com/anchore/syft/syft/linux" + "github.com/scylladb/go-set/strset" + "github.com/sergi/go-diff/diffmatchpatch" + "github.com/stretchr/testify/assert" +) + +func TestPackageURL(t *testing.T) { + tests := []struct { + name string + pkg Package + distro *linux.Release + expected string + }{ + { + name: "golang", + pkg: Package{ + Name: "github.com/anchore/syft", + Version: "v0.1.0", + Type: GoModulePkg, + }, + expected: "pkg:golang/github.com/anchore/syft@v0.1.0", + }, + { + name: "python", + pkg: Package{ + Name: "bad-name", + Version: "bad-v0.1.0", + Type: PythonPkg, + Metadata: PythonPackageMetadata{ + Name: "name", + Version: "v0.1.0", + }, + }, + expected: "pkg:pypi/name@v0.1.0", + }, + { + name: "gem", + pkg: Package{ + Name: "name", + Version: "v0.1.0", + Type: GemPkg, + }, + expected: "pkg:gem/name@v0.1.0", + }, + { + name: "npm", + pkg: Package{ + Name: "name", + Version: "v0.1.0", + Type: NpmPkg, + }, + expected: "pkg:npm/name@v0.1.0", + }, + { + name: "deb", + distro: &linux.Release{ + ID: "ubuntu", + VersionID: "20.04", + }, + pkg: Package{ + Name: "bad-name", + Version: "bad-v0.1.0", + Type: DebPkg, + Metadata: DpkgMetadata{ + Package: "name", + Version: "v0.1.0", + Architecture: "amd64", + }, + }, + expected: "pkg:deb/ubuntu/name@v0.1.0?arch=amd64&distro=ubuntu-20.04", + }, + { + name: "rpm", + distro: &linux.Release{ + ID: "centos", + VersionID: "7", + }, + pkg: Package{ + Name: "bad-name", + Version: "bad-v0.1.0", + Type: RpmPkg, + Metadata: RpmdbMetadata{ + Name: "name", + Version: "0.1.0", + Epoch: intRef(2), + Arch: "amd64", + Release: "3", + }, + }, + expected: "pkg:rpm/centos/name@0.1.0-3?arch=amd64&epoch=2&distro=centos-7", + }, + { + name: "cargo", + pkg: Package{ + Name: "name", + Version: "v0.1.0", + Type: RustPkg, + }, + expected: "pkg:cargo/name@v0.1.0", + }, + { + name: "apk", + distro: &linux.Release{ + ID: "alpine", + VersionID: "3.4.6", + }, + pkg: Package{ + Name: "bad-name", + Version: "bad-v0.1.0", + Type: ApkPkg, + Metadata: ApkMetadata{ + Package: "name", + Version: "v0.1.0", + Architecture: "amd64", + }, + }, + expected: "pkg:alpine/name@v0.1.0?arch=amd64&distro=alpine-3.4.6", + }, + { + name: "php-composer", + pkg: Package{ + Name: "bad-name", + Version: "bad-v0.1.0", + Type: PhpComposerPkg, + Metadata: PhpComposerJSONMetadata{ + Name: "vendor/name", + Version: "2.0.1", + }, + }, + expected: "pkg:composer/vendor/name@2.0.1", + }, + { + name: "java", + pkg: Package{ + Name: "bad-name", + Version: "bad-v0.1.0", + Type: JavaPkg, + Metadata: JavaMetadata{ + PomProperties: &PomProperties{ + Path: "p", + Name: "n", + GroupID: "g.id", + ArtifactID: "a", + Version: "v", + }, + }, + }, + + expected: "pkg:maven/g.id/a@v", + }, + { + name: "jenkins-plugin", + pkg: Package{ + Name: "bad-name", + Version: "bad-v0.1.0", + Type: JenkinsPluginPkg, + Metadata: JavaMetadata{ + PomProperties: &PomProperties{ + Path: "p", + Name: "n", + GroupID: "g.id", + ArtifactID: "a", + Version: "v", + }, + }, + }, + + expected: "pkg:maven/g.id/a@v", + }, + } + + var pkgTypes []string + var expectedTypes = strset.New() + for _, ty := range AllPkgs { + expectedTypes.Add(string(ty)) + } + + // testing microsoft packages is not valid for purl at this time + expectedTypes.Remove(string(KbPkg)) + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if test.pkg.Type != "" { + pkgTypes = append(pkgTypes, string(test.pkg.Type)) + } + actual := URL(test.pkg, test.distro) + if actual != test.expected { + dmp := diffmatchpatch.New() + diffs := dmp.DiffMain(test.expected, actual, true) + t.Errorf("diff: %s", dmp.DiffPrettyText(diffs)) + } + }) + } + assert.ElementsMatch(t, expectedTypes.List(), pkgTypes, "missing one or more package types to test against (maybe a package type was added?)") +}