From fbda21f4f42dad230342feed6f50e3155d5670a0 Mon Sep 17 00:00:00 2001 From: Weston Steimel Date: Mon, 27 Feb 2023 18:16:04 +0000 Subject: [PATCH] fix: further improvements to CPE generation for apk packages (#1623) * fix: consider upstream logic during apk cpe gen * fix: correct apk CPE for go * fix: correct apk CPE for ruby * fix: correct apk CPE for bazel * fix: correct apk CPE for clang * fix: correct apk CPE for openjdk * fix: correct apk CPE for glibc * fix: correct apk CPE for gli * fix: correct apk CPE for bas * fix: correct apk CPE for alsa-lib * fix: correct apk CPE for alsa * fix: determine apk cpe vendor from known URLs * fix: add more url prefix->vendor mappings for apk * refactor: allow reuse of vendor by url prefix logic * feat: extract username as vendor candidate from github/gitlab Signed-off-by: Weston Steimel --- syft/pkg/cataloger/common/cpe/apk.go | 119 ++++++++++-------- syft/pkg/cataloger/common/cpe/apk_test.go | 76 +++++++++-- .../common/cpe/candidate_by_package_type.go | 71 +++++++++++ .../cataloger/common/cpe/vendors_from_url.go | 56 +++++++++ .../common/cpe/vendors_from_url_test.go | 61 +++++++++ 5 files changed, 326 insertions(+), 57 deletions(-) create mode 100644 syft/pkg/cataloger/common/cpe/vendors_from_url.go create mode 100644 syft/pkg/cataloger/common/cpe/vendors_from_url_test.go diff --git a/syft/pkg/cataloger/common/cpe/apk.go b/syft/pkg/cataloger/common/cpe/apk.go index 38e3a0b78..f91054478 100644 --- a/syft/pkg/cataloger/common/cpe/apk.go +++ b/syft/pkg/cataloger/common/cpe/apk.go @@ -13,21 +13,12 @@ var ( func pythonCandidateVendorsFromName(v string) fieldCandidateSet { vendors := newFieldCandidateSet() - vendors.add(fieldCandidate{ - value: v, - disallowSubSelections: true, - disallowDelimiterVariations: true, - }) - + vendors.addValue(v) vendors.addValue(findAdditionalVendors(defaultCandidateAdditions, pkg.PythonPkg, v, v)...) vendors.removeByValue(findVendorsToRemove(defaultCandidateRemovals, pkg.PythonPkg, v)...) for _, av := range additionalVendorsForPython(v) { - vendors.add(fieldCandidate{ - value: av, - disallowSubSelections: true, - disallowDelimiterVariations: true, - }) + vendors.addValue(av) vendors.addValue(findAdditionalVendors(defaultCandidateAdditions, pkg.PythonPkg, av, av)...) vendors.removeByValue(findVendorsToRemove(defaultCandidateRemovals, pkg.PythonPkg, av)...) } @@ -37,6 +28,7 @@ func pythonCandidateVendorsFromName(v string) fieldCandidateSet { func pythonCandidateVendorsFromAPK(m pkg.ApkMetadata) fieldCandidateSet { vendors := newFieldCandidateSet() + upstream := m.Upstream() for _, p := range pythonPrefixes { if strings.HasPrefix(m.Package, p) { @@ -44,8 +36,8 @@ func pythonCandidateVendorsFromAPK(m pkg.ApkMetadata) fieldCandidateSet { vendors.union(pythonCandidateVendorsFromName(t)) } - if m.OriginPackage != m.Package && strings.HasPrefix(m.OriginPackage, p) { - t := strings.ToLower(strings.TrimPrefix(m.OriginPackage, p)) + if upstream != m.Package && strings.HasPrefix(upstream, p) { + t := strings.ToLower(strings.TrimPrefix(upstream, p)) vendors.union(pythonCandidateVendorsFromName(t)) } } @@ -55,12 +47,7 @@ func pythonCandidateVendorsFromAPK(m pkg.ApkMetadata) fieldCandidateSet { func pythonCandidateProductsFromName(p string) fieldCandidateSet { products := newFieldCandidateSet() - products.add(fieldCandidate{ - value: p, - disallowSubSelections: true, - disallowDelimiterVariations: true, - }) - + products.addValue(p) products.addValue(findAdditionalProducts(defaultCandidateAdditions, pkg.PythonPkg, p)...) products.removeByValue(findProductsToRemove(defaultCandidateRemovals, pkg.PythonPkg, p)...) return products @@ -68,6 +55,7 @@ func pythonCandidateProductsFromName(p string) fieldCandidateSet { func pythonCandidateProductsFromAPK(m pkg.ApkMetadata) fieldCandidateSet { products := newFieldCandidateSet() + upstream := m.Upstream() for _, p := range pythonPrefixes { if strings.HasPrefix(m.Package, p) { @@ -75,8 +63,8 @@ func pythonCandidateProductsFromAPK(m pkg.ApkMetadata) fieldCandidateSet { products.union(pythonCandidateProductsFromName(t)) } - if m.OriginPackage != m.Package && strings.HasPrefix(m.OriginPackage, p) { - t := strings.ToLower(strings.TrimPrefix(m.OriginPackage, p)) + if upstream != m.Package && strings.HasPrefix(upstream, p) { + t := strings.ToLower(strings.TrimPrefix(upstream, p)) products.union(pythonCandidateProductsFromName(t)) } } @@ -86,12 +74,7 @@ func pythonCandidateProductsFromAPK(m pkg.ApkMetadata) fieldCandidateSet { func rubyCandidateVendorsFromName(v string) fieldCandidateSet { vendors := newFieldCandidateSet() - vendors.add(fieldCandidate{ - value: v, - disallowSubSelections: true, - disallowDelimiterVariations: true, - }) - + vendors.addValue(v) vendors.addValue(findAdditionalVendors(defaultCandidateAdditions, pkg.GemPkg, v, v)...) vendors.removeByValue(findVendorsToRemove(defaultCandidateRemovals, pkg.GemPkg, v)...) return vendors @@ -99,16 +82,19 @@ func rubyCandidateVendorsFromName(v string) fieldCandidateSet { func rubyCandidateVendorsFromAPK(m pkg.ApkMetadata) fieldCandidateSet { vendors := newFieldCandidateSet() + upstream := m.Upstream() - for _, p := range rubyPrefixes { - if strings.HasPrefix(m.Package, p) { - t := strings.ToLower(strings.TrimPrefix(m.Package, p)) - vendors.union(rubyCandidateVendorsFromName(t)) - } + if upstream != "ruby" { + for _, p := range rubyPrefixes { + if strings.HasPrefix(m.Package, p) { + t := strings.ToLower(strings.TrimPrefix(m.Package, p)) + vendors.union(rubyCandidateVendorsFromName(t)) + } - if m.OriginPackage != m.Package && strings.HasPrefix(m.OriginPackage, p) { - t := strings.ToLower(strings.TrimPrefix(m.OriginPackage, p)) - vendors.union(rubyCandidateVendorsFromName(t)) + if upstream != "" && upstream != m.Package && strings.HasPrefix(upstream, p) { + t := strings.ToLower(strings.TrimPrefix(upstream, p)) + vendors.union(rubyCandidateVendorsFromName(t)) + } } } @@ -117,12 +103,7 @@ func rubyCandidateVendorsFromAPK(m pkg.ApkMetadata) fieldCandidateSet { func rubyCandidateProductsFromName(p string) fieldCandidateSet { products := newFieldCandidateSet() - products.add(fieldCandidate{ - value: p, - disallowSubSelections: true, - disallowDelimiterVariations: true, - }) - + products.addValue(p) products.addValue(findAdditionalProducts(defaultCandidateAdditions, pkg.GemPkg, p)...) products.removeByValue(findProductsToRemove(defaultCandidateRemovals, pkg.GemPkg, p)...) return products @@ -130,22 +111,49 @@ func rubyCandidateProductsFromName(p string) fieldCandidateSet { func rubyCandidateProductsFromAPK(m pkg.ApkMetadata) fieldCandidateSet { products := newFieldCandidateSet() + upstream := m.Upstream() - for _, p := range rubyPrefixes { - if strings.HasPrefix(m.Package, p) { - t := strings.ToLower(strings.TrimPrefix(m.Package, p)) - products.union(rubyCandidateProductsFromName(t)) - } + if upstream != "ruby" { + for _, p := range rubyPrefixes { + if strings.HasPrefix(m.Package, p) { + t := strings.ToLower(strings.TrimPrefix(m.Package, p)) + products.union(rubyCandidateProductsFromName(t)) + } - if m.OriginPackage != m.Package && strings.HasPrefix(m.OriginPackage, p) { - t := strings.ToLower(strings.TrimPrefix(m.OriginPackage, p)) - products.union(rubyCandidateProductsFromName(t)) + if upstream != "" && upstream != m.Package && strings.HasPrefix(upstream, p) { + t := strings.ToLower(strings.TrimPrefix(upstream, p)) + products.union(rubyCandidateProductsFromName(t)) + } } } return products } +func candidateVendorsFromAPKUpstream(m pkg.ApkMetadata) fieldCandidateSet { + vendors := newFieldCandidateSet() + upstream := m.Upstream() + if upstream != "" && upstream != m.Package { + vendors.addValue(upstream) + vendors.addValue(findAdditionalVendors(defaultCandidateAdditions, pkg.ApkPkg, upstream, upstream)...) + vendors.removeByValue(findVendorsToRemove(defaultCandidateRemovals, pkg.ApkPkg, upstream)...) + } + + return vendors +} + +func candidateProductsFromAPKUpstream(m pkg.ApkMetadata) fieldCandidateSet { + products := newFieldCandidateSet() + upstream := m.Upstream() + if upstream != "" { + products.addValue(upstream) + products.addValue(findAdditionalProducts(defaultCandidateAdditions, pkg.ApkPkg, upstream)...) + products.removeByValue(findProductsToRemove(defaultCandidateRemovals, pkg.ApkPkg, upstream)...) + } + + return products +} + func candidateVendorsForAPK(p pkg.Package) fieldCandidateSet { metadata, ok := p.Metadata.(pkg.ApkMetadata) if !ok { @@ -155,6 +163,13 @@ func candidateVendorsForAPK(p pkg.Package) fieldCandidateSet { vendors := newFieldCandidateSet() vendors.union(pythonCandidateVendorsFromAPK(metadata)) vendors.union(rubyCandidateVendorsFromAPK(metadata)) + vendors.union(candidateVendorsFromAPKUpstream(metadata)) + vendors.union(candidateVendorsFromURL(metadata.URL)) + + for v := range vendors { + v.disallowDelimiterVariations = true + v.disallowSubSelections = true + } return vendors } @@ -168,6 +183,12 @@ func candidateProductsForAPK(p pkg.Package) fieldCandidateSet { products := newFieldCandidateSet() products.union(pythonCandidateProductsFromAPK(metadata)) products.union(rubyCandidateProductsFromAPK(metadata)) + products.union(candidateProductsFromAPKUpstream(metadata)) + + for p := range products { + p.disallowDelimiterVariations = true + p.disallowSubSelections = true + } return products } diff --git a/syft/pkg/cataloger/common/cpe/apk_test.go b/syft/pkg/cataloger/common/cpe/apk_test.go index 2335501a0..ed4efb1e0 100644 --- a/syft/pkg/cataloger/common/cpe/apk_test.go +++ b/syft/pkg/cataloger/common/cpe/apk_test.go @@ -24,13 +24,14 @@ func Test_candidateVendorsForAPK(t *testing.T) { expected: []string{"python-cryptography_project", "cryptography", "cryptographyproject", "cryptography_project"}, }, { - name: "py2-pypdf OriginPackage", + name: "py2-pypdf with explicit different origin", pkg: pkg.Package{ Metadata: pkg.ApkMetadata{ - OriginPackage: "py2-pypdf", + Package: "py2-pypdf", + OriginPackage: "abcdefg", }, }, - expected: []string{"pypdf", "pypdfproject", "pypdf_project"}, + expected: []string{"pypdf", "pypdfproject", "pypdf_project", "abcdefg"}, }, { name: "ruby-armadillo Package", @@ -41,10 +42,39 @@ func Test_candidateVendorsForAPK(t *testing.T) { }, expected: []string{"armadillo"}, }, + { + name: "python-3.6", + pkg: pkg.Package{ + Metadata: pkg.ApkMetadata{ + Package: "python-3.6", + }, + }, + expected: []string{"python", "python_software_foundation"}, + }, + { + name: "ruby-3.6", + pkg: pkg.Package{ + Metadata: pkg.ApkMetadata{ + Package: "ruby-3.6", + URL: "https://www.ruby-lang.org/", + }, + }, + expected: []string{"ruby", "ruby-lang"}, + }, + { + name: "make", + pkg: pkg.Package{ + Metadata: pkg.ApkMetadata{ + Package: "make", + URL: "https://www.gnu.org/software/make", + }, + }, + expected: []string{"gnu"}, + }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - assert.ElementsMatch(t, test.expected, candidateVendorsForAPK(test.pkg).values(), "different vendors") + assert.ElementsMatch(t, test.expected, candidateVendorsForAPK(test.pkg).uniqueValues(), "different vendors") }) } } @@ -65,13 +95,14 @@ func Test_candidateProductsForAPK(t *testing.T) { expected: []string{"cryptography", "python-cryptography"}, }, { - name: "py2-pypdf OriginPackage", + name: "py2-pypdf with explicit different origin", pkg: pkg.Package{ Metadata: pkg.ApkMetadata{ - OriginPackage: "py2-pypdf", + Package: "py2-pypdf", + OriginPackage: "abcdefg", }, }, - expected: []string{"pypdf"}, + expected: []string{"pypdf", "abcdefg"}, }, { name: "ruby-armadillo Package", @@ -82,10 +113,39 @@ func Test_candidateProductsForAPK(t *testing.T) { }, expected: []string{"armadillo"}, }, + { + name: "python-3.6", + pkg: pkg.Package{ + Metadata: pkg.ApkMetadata{ + Package: "python-3.6", + }, + }, + expected: []string{"python"}, + }, + { + name: "ruby-3.6", + pkg: pkg.Package{ + Metadata: pkg.ApkMetadata{ + Package: "ruby-3.6", + URL: "https://www.ruby-lang.org/", + }, + }, + expected: []string{"ruby"}, + }, + { + name: "make", + pkg: pkg.Package{ + Metadata: pkg.ApkMetadata{ + Package: "make", + URL: "https://www.gnu.org/software/make", + }, + }, + expected: []string{"make"}, + }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - assert.ElementsMatch(t, test.expected, candidateProductsForAPK(test.pkg).values(), "different products") + assert.ElementsMatch(t, test.expected, candidateProductsForAPK(test.pkg).uniqueValues(), "different products") }) } } diff --git a/syft/pkg/cataloger/common/cpe/candidate_by_package_type.go b/syft/pkg/cataloger/common/cpe/candidate_by_package_type.go index 488349198..7a5db9cce 100644 --- a/syft/pkg/cataloger/common/cpe/candidate_by_package_type.go +++ b/syft/pkg/cataloger/common/cpe/candidate_by_package_type.go @@ -203,6 +203,77 @@ var defaultCandidateAdditions = buildCandidateLookup( candidateKey{PkgName: "nodejs-current"}, candidateAddition{AdditionalProducts: []string{"node.js"}}, }, + { + pkg.ApkPkg, + candidateKey{PkgName: "go"}, + candidateAddition{AdditionalVendors: []string{"golang"}}, + }, + { + pkg.ApkPkg, + candidateKey{PkgName: "ruby"}, + candidateAddition{AdditionalVendors: []string{"ruby-lang"}}, + }, + { + pkg.ApkPkg, + candidateKey{PkgName: "bazel"}, + candidateAddition{AdditionalVendors: []string{"google"}}, + }, + { + pkg.ApkPkg, + candidateKey{PkgName: "clang"}, + candidateAddition{AdditionalVendors: []string{"llvm"}}, + }, + { + pkg.ApkPkg, + candidateKey{PkgName: "openjdk"}, + candidateAddition{AdditionalVendors: []string{"oracle"}}, + }, + { + pkg.ApkPkg, + candidateKey{PkgName: "glibc"}, + candidateAddition{AdditionalVendors: []string{"gnu"}}, + }, + { + pkg.ApkPkg, + candidateKey{PkgName: "glib"}, + candidateAddition{AdditionalVendors: []string{"gnome"}}, + }, + { + pkg.ApkPkg, + candidateKey{PkgName: "bash"}, + candidateAddition{AdditionalVendors: []string{"gnu"}}, + }, + { + pkg.ApkPkg, + candidateKey{PkgName: "alsa-lib"}, + candidateAddition{AdditionalVendors: []string{"alsa-project"}}, + }, + { + pkg.ApkPkg, + candidateKey{PkgName: "alsa"}, + candidateAddition{AdditionalVendors: []string{"alsa-project"}}, + }, + { + pkg.ApkPkg, + candidateKey{PkgName: "make"}, + candidateAddition{AdditionalVendors: []string{"gnu"}}, + }, + { + pkg.ApkPkg, + candidateKey{PkgName: "git"}, + candidateAddition{AdditionalVendors: []string{"git-scm"}}, + }, + { + pkg.ApkPkg, + candidateKey{PkgName: "bind"}, + candidateAddition{AdditionalVendors: []string{"isc"}}, + }, + { + pkg.ApkPkg, + candidateKey{PkgName: "libxpm"}, + candidateAddition{AdditionalVendors: []string{"libxpm_project"}}, + }, + // // Binary packages { pkg.BinaryPkg, diff --git a/syft/pkg/cataloger/common/cpe/vendors_from_url.go b/syft/pkg/cataloger/common/cpe/vendors_from_url.go new file mode 100644 index 000000000..932678274 --- /dev/null +++ b/syft/pkg/cataloger/common/cpe/vendors_from_url.go @@ -0,0 +1,56 @@ +package cpe + +import ( + "regexp" + "strings" + + "github.com/anchore/syft/internal" +) + +var ( + urlPrefixToVendors = map[string][]string{ + "https://www.gnu.org/": {"gnu"}, + "https://developer.gnome.org/": {"gnome"}, + "https://www.ruby-lang.org/": {"ruby-lang"}, + "https://llvm.org/": {"llvm"}, + "https://www.isc.org/": {"isc"}, + } + + vendorExtractionPatterns = []*regexp.Regexp{ + regexp.MustCompile(`^https://(?:github|gitlab)\.com/(?P[\w\-]*?)/.*$`), + } +) + +func candidateVendorsFromURL(url string) fieldCandidateSet { + vendors := newFieldCandidateSet() + + for urlPrefix, additionalVendors := range urlPrefixToVendors { + if strings.HasPrefix(url, urlPrefix) { + for _, v := range additionalVendors { + vendors.add(fieldCandidate{ + value: v, + disallowSubSelections: true, + disallowDelimiterVariations: true, + }) + + return vendors + } + } + } + + for _, p := range vendorExtractionPatterns { + groups := internal.MatchNamedCaptureGroups(p, url) + + if v, ok := groups["vendor"]; ok { + vendors.add(fieldCandidate{ + value: v, + disallowSubSelections: true, + disallowDelimiterVariations: true, + }) + + return vendors + } + } + + return vendors +} diff --git a/syft/pkg/cataloger/common/cpe/vendors_from_url_test.go b/syft/pkg/cataloger/common/cpe/vendors_from_url_test.go new file mode 100644 index 000000000..19348bd4a --- /dev/null +++ b/syft/pkg/cataloger/common/cpe/vendors_from_url_test.go @@ -0,0 +1,61 @@ +package cpe + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func Test_candidateVendorsFromURL(t *testing.T) { + tests := []struct { + name string + url string + expected []string + }{ + { + name: "empty", + url: "", + expected: []string{}, + }, + { + name: "no known vendors", + url: "https://something-unknown.com/126374623876/12345", + expected: []string{}, + }, + { + name: "gnu vendor from url", + url: "https://www.gnu.org/software/make", + expected: []string{"gnu"}, + }, + { + name: "github username as vendor", + url: "https://github.com/armadillo/abcxyz-12345", + expected: []string{"armadillo"}, + }, + { + name: "github username with - as vendor", + url: "https://github.com/1234-abc-xyz/hello", + expected: []string{"1234-abc-xyz"}, + }, + { + name: "gitlab username as vendor", + url: "https://gitlab.com/armadillo/abcxyz-12345", + expected: []string{"armadillo"}, + }, + { + name: "gitlab username with - as vendor", + url: "https://gitlab.com/1234-abc-xyz/hello", + expected: []string{"1234-abc-xyz"}, + }, + { + name: "github username as vendor from longer url", + url: "https://github.com/armadillo/abcxyz-12345/a/b/c/d/e/f/g", + expected: []string{"armadillo"}, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + assert.ElementsMatch(t, test.expected, candidateVendorsFromURL(test.url).uniqueValues(), "different vendors") + }) + } +}