William Murphy 878df69330
chore: stop re-exporting wfn.Attributes (#2534)
* chore: stop re-exporting wfn.Attributes

Previously, Syft re-exported wfn.Attributes from the nvdtools package as
a member of the Package struct. However, Syft doesn't own this struct,
and so after Syft 1.0, might be forced to bump a semver major version
due to a breaking change in wfn.Attributes. Rather than incur this risk
going into 1.0, instead replace Syft's use of wfn.Attributes with Syft's
own cpe.CPE type. That type has some pass-through calls to
wfn.Attributes, but hides the dependency from the rest of the
application.

Signed-off-by: Will Murphy <will.murphy@anchore.com>

* chore: make cpe.CPE type a Stringer

Previously, the cpe.CPE type was an alias for wfn.Attributes from
nvdtools. Now that it is a type we control, make the String method take
the CPE as a receiver, rather than as a normal parameter, so that Syft's
cpe.CPE type implements Stringer.

Signed-off-by: Will Murphy <will.murphy@anchore.com>

---------

Signed-off-by: Will Murphy <will.murphy@anchore.com>
2024-01-24 08:59:03 -05:00

351 lines
9.9 KiB
Go

package cpe
import (
"bufio"
"bytes"
_ "embed"
"encoding/json"
"fmt"
"sort"
"strings"
"sync"
"github.com/facebookincubator/nvdtools/wfn"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/cpe"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common/cpe/dictionary"
)
// knownVendors contains vendor strings that are known to exist in
// the CPE database, so they will be preferred over other candidates:
var knownVendors = strset.New("apache")
func newCPE(product, vendor, version, targetSW string) *cpe.CPE {
c := cpe.NewWithAny()
c.Part = "a"
c.Product = product
c.Vendor = vendor
c.Version = version
c.TargetSW = targetSW
if cpe.ValidateString(c.String()) != nil {
return nil
}
return &c
}
//go:embed dictionary/data/cpe-index.json
var indexedCPEDictionaryData []byte
var indexedCPEDictionary *dictionary.Indexed
var indexedCPEDictionaryOnce sync.Once
func GetIndexedDictionary() (_ *dictionary.Indexed, err error) {
indexedCPEDictionaryOnce.Do(func() {
err = json.Unmarshal(indexedCPEDictionaryData, &indexedCPEDictionary)
})
if err != nil {
return
}
if indexedCPEDictionary == nil {
err = fmt.Errorf("failed to unmarshal indexed CPE dictionary")
return
}
return indexedCPEDictionary, err
}
func DictionaryFind(p pkg.Package) (cpe.CPE, bool) {
dict, err := GetIndexedDictionary()
if err != nil {
log.Debugf("dictionary CPE lookup not available: %+v", err)
return cpe.CPE{}, false
}
var (
cpeString string
ok bool
)
switch p.Type {
case pkg.NpmPkg:
cpeString, ok = dict.EcosystemPackages[dictionary.EcosystemNPM][p.Name]
case pkg.GemPkg:
cpeString, ok = dict.EcosystemPackages[dictionary.EcosystemRubyGems][p.Name]
case pkg.PythonPkg:
cpeString, ok = dict.EcosystemPackages[dictionary.EcosystemPyPI][p.Name]
case pkg.JenkinsPluginPkg:
cpeString, ok = dict.EcosystemPackages[dictionary.EcosystemJenkinsPlugins][p.Name]
case pkg.RustPkg:
cpeString, ok = dict.EcosystemPackages[dictionary.EcosystemRustCrates][p.Name]
default:
// The dictionary doesn't support this package type yet.
return cpe.CPE{}, false
}
if !ok {
// The dictionary doesn't have a CPE for this package.
return cpe.CPE{}, false
}
parsedCPE, err := cpe.New(cpeString)
if err != nil {
return cpe.CPE{}, false
}
parsedCPE.Version = p.Version
return parsedCPE, true
}
// Generate Create a list of CPEs for a given package, trying to guess the vendor, product tuple. We should be trying to
// generate the minimal set of representative CPEs, which implies that optional fields should not be included
// (such as target SW).
func Generate(p pkg.Package) []cpe.CPE {
vendors := candidateVendors(p)
products := candidateProducts(p)
if len(products) == 0 {
return nil
}
keys := strset.New()
cpes := make([]cpe.CPE, 0)
for _, product := range products {
for _, vendor := range vendors {
// prevent duplicate entries...
key := fmt.Sprintf("%s|%s|%s", product, vendor, p.Version)
if keys.Has(key) {
continue
}
keys.Add(key)
// add a new entry...
if c := newCPE(product, vendor, p.Version, wfn.Any); c != nil {
cpes = append(cpes, *c)
}
}
}
// filter out any known combinations that don't accurately represent this package
cpes = filter(cpes, p, cpeFilters...)
sort.Sort(cpe.BySpecificity(cpes))
return cpes
}
func candidateVendors(p pkg.Package) []string {
// in ecosystems where the packaging metadata does not have a clear field to indicate a vendor (or a field that
// could be interpreted indirectly as such) the project name tends to be a common stand in. Examples of this
// are the elasticsearch gem, xstream jar, and rack gem... all of these cases you can find vulnerabilities
// with CPEs where the vendor is the product name and doesn't appear to be derived from any available package
// metadata.
vendors := newFieldCandidateSet(candidateProducts(p)...)
switch p.Language {
case pkg.JavaScript:
// for JavaScript if we find node.js as a package then the vendor is "nodejs"
if p.Name == "node.js" {
vendors.addValue("nodejs")
}
case pkg.Ruby:
vendors.addValue("ruby-lang")
case pkg.Go:
// replace all candidates with only the golang-specific helper
vendors.clear()
vendor := candidateVendorForGo(p.Name)
if vendor != "" {
vendors.addValue(vendor)
}
}
switch p.Metadata.(type) {
case pkg.RpmDBEntry:
vendors.union(candidateVendorsForRPM(p))
case pkg.RubyGemspec:
vendors.union(candidateVendorsForRuby(p))
case pkg.PythonPackage:
vendors.union(candidateVendorsForPython(p))
case pkg.JavaArchive:
vendors.union(candidateVendorsForJava(p))
case pkg.ApkDBEntry:
vendors.union(candidateVendorsForAPK(p))
case pkg.NpmPackage:
vendors.union(candidateVendorsForJavascript(p))
}
// We should no longer be generating vendor candidates with these values ["" and "*"]
// (since CPEs will match any other value)
vendors.removeByValue("")
vendors.removeByValue("*")
// try swapping hyphens for underscores, vice versa, and removing separators altogether
addDelimiterVariations(vendors)
// generate sub-selections of each candidate based on separators (e.g. jenkins-ci -> [jenkins, jenkins-ci])
addAllSubSelections(vendors)
// add more candidates based on the package info for each vendor candidate
for _, vendor := range vendors.uniqueValues() {
vendors.addValue(findAdditionalVendors(defaultCandidateAdditions, p.Type, p.Name, vendor)...)
}
// remove known mis
vendors.removeByValue(findVendorsToRemove(defaultCandidateRemovals, p.Type, p.Name)...)
uniqueVendors := vendors.uniqueValues()
// if any known vendor was detected, pick that one.
for _, vendor := range uniqueVendors {
if knownVendors.Has(vendor) {
return []string{vendor}
}
}
return uniqueVendors
}
func candidateProducts(p pkg.Package) []string {
products := newFieldCandidateSet(p.Name)
_, hasJavaMetadata := p.Metadata.(pkg.JavaArchive)
switch {
case p.Language == pkg.Python:
if !strings.HasPrefix(p.Name, "python") {
products.addValue("python-" + p.Name)
}
case p.Language == pkg.Java || hasJavaMetadata:
products.addValue(candidateProductsForJava(p)...)
case p.Language == pkg.Go:
// replace all candidates with only the golang-specific helper
products.clear()
prod := candidateProductForGo(p.Name)
if prod != "" {
products.addValue(prod)
}
}
if _, hasAPKMetadata := p.Metadata.(pkg.ApkDBEntry); hasAPKMetadata {
products.union(candidateProductsForAPK(p))
}
// it is never OK to have candidates with these values ["" and "*"] (since CPEs will match any other value)
products.removeByValue("")
products.removeByValue("*")
// try swapping hyphens for underscores, vice versa, and removing separators altogether
addDelimiterVariations(products)
// add known candidate additions
products.addValue(findAdditionalProducts(defaultCandidateAdditions, p.Type, p.Name)...)
// remove known candidate removals
products.removeByValue(findProductsToRemove(defaultCandidateRemovals, p.Type, p.Name)...)
return products.uniqueValues()
}
func addAllSubSelections(fields fieldCandidateSet) {
candidatesForVariations := fields.copy()
candidatesForVariations.removeWhere(subSelectionsDisallowed)
for _, candidate := range candidatesForVariations.values() {
fields.addValue(generateSubSelections(candidate)...)
}
}
// generateSubSelections attempts to split a field by hyphens and underscores and return a list of sensible sub-selections
// that can be used as product or vendor candidates. E.g. jenkins-ci-tools -> [jenkins-ci-tools, jenkins-ci, jenkins].
func generateSubSelections(field string) (results []string) {
scanner := bufio.NewScanner(strings.NewReader(field))
scanner.Split(scanByHyphenOrUnderscore)
var lastToken uint8
for scanner.Scan() {
rawCandidate := scanner.Text()
if len(rawCandidate) == 0 {
break
}
// trim any number of hyphen or underscore that is prefixed/suffixed on the given candidate. Since
// scanByHyphenOrUnderscore preserves delimiters (hyphens and underscores) they are guaranteed to be at least
// prefixed.
candidate := strings.TrimFunc(rawCandidate, trimHyphenOrUnderscore)
// capture the result (if there is content)
if len(candidate) > 0 {
if len(results) > 0 {
results = append(results, results[len(results)-1]+string(lastToken)+candidate)
} else {
results = append(results, candidate)
}
}
// keep track of the trailing separator for the next loop
lastToken = rawCandidate[len(rawCandidate)-1]
}
return results
}
// trimHyphenOrUnderscore is a character filter function for use with strings.TrimFunc in order to remove any hyphen or underscores.
func trimHyphenOrUnderscore(r rune) bool {
switch r {
case '-', '_':
return true
}
return false
}
// scanByHyphenOrUnderscore splits on hyphen or underscore and includes the separator in the split
func scanByHyphenOrUnderscore(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if i := bytes.IndexAny(data, "-_"); i >= 0 {
return i + 1, data[0 : i+1], nil
}
if atEOF {
return len(data), data, nil
}
return 0, nil, nil
}
func addDelimiterVariations(fields fieldCandidateSet) {
candidatesForVariations := fields.copy()
candidatesForVariations.removeWhere(delimiterVariationsDisallowed)
for _, candidate := range candidatesForVariations.list() {
field := candidate.value
hasHyphen := strings.Contains(field, "-")
hasUnderscore := strings.Contains(field, "_")
if hasHyphen {
// provide variations of hyphen candidates with an underscore
newValue := strings.ReplaceAll(field, "-", "_")
underscoreCandidate := candidate
underscoreCandidate.value = newValue
fields.add(underscoreCandidate)
}
if hasUnderscore {
// provide variations of underscore candidates with a hyphen
newValue := strings.ReplaceAll(field, "_", "-")
hyphenCandidate := candidate
hyphenCandidate.value = newValue
fields.add(hyphenCandidate)
}
}
}