syft/syft/cpe/cpe.go
William Murphy b7a6d5e946
feat: Record where CPEs come from (#2552)
Syft can get CPEs from several source, including generating them based on
package data, finding them in the NVD CPE dictionary, or finding them declared
in a manifest or existing SBOM. Record where Syft got CPEs so that consumers of
SBOMs can reason about how trustworthy they are.

Signed-off-by: Will Murphy <will.murphy@anchore.com>
2024-02-02 16:17:52 +00:00

226 lines
7.0 KiB
Go

package cpe
import (
"fmt"
"regexp"
"strings"
"github.com/facebookincubator/nvdtools/wfn"
)
// CPE contains the attributes of an NVD Attributes and a string
// describing where Syft got the Attributes, e.g. generated by heuristics
// vs looked up in the NVD Attributes dictionary
type CPE struct {
Attributes Attributes
Source Source
}
type Source string
func (c Source) String() string {
return string(c)
}
const (
GeneratedSource Source = "syft-generated"
NVDDictionaryLookupSource Source = "nvd-cpe-dictionary"
DeclaredSource Source = "declared"
)
const Any = ""
type Attributes struct {
Part string
Vendor string
Product string
Version string
Update string
Edition string
SWEdition string
TargetSW string
TargetHW string
Other string
Language string
}
func (c Attributes) asAttributes() wfn.Attributes {
return wfn.Attributes(c)
}
func fromAttributes(a wfn.Attributes) Attributes {
return Attributes(a)
}
func (c Attributes) BindToFmtString() string {
return c.asAttributes().BindToFmtString()
}
func NewWithAny() Attributes {
return fromAttributes(*(wfn.NewAttributesWithAny()))
}
const (
allowedCPEPunctuation = "-!\"#$%&'()+,./:;<=>@[]^`{|}~"
)
// This regex string is taken from
// https://csrc.nist.gov/schema/cpe/2.3/cpe-naming_2.3.xsd which has the official cpe spec
// This first part matches Attributes urls and the second part matches binding strings
const cpeRegexString = ((`^([c][pP][eE]:/[AHOaho]?(:[A-Za-z0-9\._\-~%]*){0,6})`) +
// Or match the Attributes binding string
// Note that we had to replace '`' with '\x60' to escape the backticks
`|(cpe:2\.3:[aho\*\-](:(((\?*|\*?)([a-zA-Z0-9\-\._]|(\\[\\\*\?!"#$$%&'\(\)\+,/:;<=>@\[\]\^\x60\{\|}~]))+(\?*|\*?))|[\*\-])){5}(:(([a-zA-Z]{2,3}(-([a-zA-Z]{2}|[0-9]{3}))?)|[\*\-]))(:(((\?*|\*?)([a-zA-Z0-9\-\._]|(\\[\\\*\?!"#$$%&'\(\)\+,/:;<=>@\[\]\^\x60\{\|}~]))+(\?*|\*?))|[\*\-])){4})$`)
var cpeRegex = regexp.MustCompile(cpeRegexString)
func New(value string, source Source) (CPE, error) {
attributes, err := NewAttributes(value)
if err != nil {
return CPE{}, err
}
return CPE{
Attributes: attributes,
Source: source,
}, nil
}
// NewAttributes will parse a formatted Attributes string and return a Attributes object. Some input, such as the existence of whitespace
// characters is allowed, however, a more strict validation is done after this sanitization process.
func NewAttributes(cpeStr string) (Attributes, error) {
// get a Attributes object based on the given string --don't validate yet since it may be possible to escape select cases on the callers behalf
c, err := newWithoutValidation(cpeStr)
if err != nil {
return Attributes{}, fmt.Errorf("unable to parse Attributes string: %w", err)
}
// ensure that this Attributes can be validated after being fully sanitized
if ValidateString(c.String()) != nil {
return Attributes{}, err
}
// we don't return the sanitized string, as this is a concern for later when creating Attributes strings. In fact, since
// sanitization is lossy (whitespace is replaced, not escaped) it's important that the raw values are left as.
return c, nil
}
// Must returns a CPE or panics if the provided string is not valid
func Must(cpeStr string, source Source) CPE {
c := MustAttributes(cpeStr)
return CPE{
Attributes: c,
Source: source,
}
}
func MustAttributes(cpeStr string) Attributes {
c, err := NewAttributes(cpeStr)
if err != nil {
panic(err)
}
return c
}
func ValidateString(cpeStr string) error {
// We should filter out all CPEs that do not match the official Attributes regex
// The facebook nvdtools parser can sometimes incorrectly parse invalid Attributes strings
if !cpeRegex.MatchString(cpeStr) {
return fmt.Errorf("failed to parse Attributes=%q as it doesn't match the regex=%s", cpeStr, cpeRegexString)
}
return nil
}
func newWithoutValidation(cpeStr string) (Attributes, error) {
value, err := wfn.Parse(cpeStr)
if err != nil {
return Attributes{}, fmt.Errorf("failed to parse Attributes=%q: %w", cpeStr, err)
}
if value == nil {
return Attributes{}, fmt.Errorf("failed to parse Attributes=%q", cpeStr)
}
syftCPE := fromAttributes(*value)
// we need to compare the raw data since we are constructing CPEs in other locations
syftCPE.Vendor = normalizeField(syftCPE.Vendor)
syftCPE.Product = normalizeField(syftCPE.Product)
syftCPE.Language = normalizeField(syftCPE.Language)
syftCPE.Version = normalizeField(syftCPE.Version)
syftCPE.TargetSW = normalizeField(syftCPE.TargetSW)
syftCPE.Part = normalizeField(syftCPE.Part)
syftCPE.Edition = normalizeField(syftCPE.Edition)
syftCPE.Other = normalizeField(syftCPE.Other)
syftCPE.SWEdition = normalizeField(syftCPE.SWEdition)
syftCPE.TargetHW = normalizeField(syftCPE.TargetHW)
syftCPE.Update = normalizeField(syftCPE.Update)
return syftCPE, nil
}
func normalizeField(field string) string {
// replace spaces with underscores (per section 5.3.2 of the Attributes spec v 2.3)
field = strings.ReplaceAll(field, " ", "_")
// keep dashes and forward slashes unescaped
if field == "*" {
return Any
}
return stripSlashes(field)
}
// stripSlashes is a reverse of the sanitize function below.
// It correctly removes slashes that are followed by allowed puncts.
// This is to allow for a correct round trip parsing of cpes with quoted characters.
func stripSlashes(s string) string {
sb := strings.Builder{}
for i, c := range s {
if c == '\\' && i+1 < len(s) && strings.ContainsRune(allowedCPEPunctuation, rune(s[i+1])) {
continue
}
sb.WriteRune(c)
}
return sb.String()
}
func (c Attributes) String() string {
output := Attributes{}
output.Vendor = sanitize(c.Vendor)
output.Product = sanitize(c.Product)
output.Language = sanitize(c.Language)
output.Version = sanitize(c.Version)
output.TargetSW = sanitize(c.TargetSW)
output.Part = sanitize(c.Part)
output.Edition = sanitize(c.Edition)
output.Other = sanitize(c.Other)
output.SWEdition = sanitize(c.SWEdition)
output.TargetHW = sanitize(c.TargetHW)
output.Update = sanitize(c.Update)
return output.BindToFmtString()
}
// sanitize is a modified version of WFNize function from nvdtools
// that quotes all the allowed punctation chars with a slash and replaces
// spaces with underscores. It differs from the upstream implmentation as
// it does not use the buggy nvdtools implementation, specifically the "addSlashesAt" part of the
// function which stops the loop as soon as it encounters ":" a valid
// character for a WFN attribute after quoting, but the way nvdtools
// handles it causes it to truncate strings that container ":". As a result
// strings like "prefix:1.2" which would have been quoted as "prefix\:1.2"
// end up becoming "prefix" instead causing loss of information and
// incorrect CPEs being generated.
func sanitize(s string) string {
// replace spaces with underscores
in := strings.ReplaceAll(s, " ", "_")
// escape allowable punctuation per section 5.3.2 in the CPE 2.3 spec
sb := strings.Builder{}
for _, c := range in {
if strings.ContainsRune(allowedCPEPunctuation, c) {
sb.WriteRune('\\')
}
sb.WriteRune(c)
}
return sb.String()
}