syft/syft/cpe/cpe.go

package cpe

import (
	"fmt"
	"regexp"
	"strings"

	"github.com/facebookincubator/nvdtools/wfn"
)

// CPE contains the attributes of an NVD Attributes and a string
// describing where Syft got the Attributes, e.g. generated by heuristics
// vs looked up in the NVD Attributes dictionary
type CPE struct {
	Attributes Attributes
	Source     Source
}

type Source string

func (c Source) String() string {
	return string(c)
}

const (
	GeneratedSource           Source = "syft-generated"
	NVDDictionaryLookupSource Source = "nvd-cpe-dictionary"
	DeclaredSource            Source = "declared"
)

const Any = ""

type Attributes struct {
	Part      string
	Vendor    string
	Product   string
	Version   string
	Update    string
	Edition   string
	SWEdition string
	TargetSW  string
	TargetHW  string
	Other     string
	Language  string
}

func (c Attributes) asAttributes() wfn.Attributes {
	return wfn.Attributes(c)
}

func fromAttributes(a wfn.Attributes) Attributes {
	return Attributes(a)
}

func (c Attributes) BindToFmtString() string {
	return c.asAttributes().BindToFmtString()
}

func NewWithAny() Attributes {
	return fromAttributes(*(wfn.NewAttributesWithAny()))
}

const (
	allowedCPEPunctuation = "-!\"#$%&'()+,./:;<=>@[]^`{|}~"
)

// This regex string is taken from
// https://csrc.nist.gov/schema/cpe/2.3/cpe-naming_2.3.xsd which has the official cpe spec
// This first part matches Attributes urls and the second part matches binding strings
const cpeRegexString = ((`^([c][pP][eE]:/[AHOaho]?(:[A-Za-z0-9\._\-~%]*){0,6})`) +
	// Or match the Attributes binding string
	// Note that we had to replace '`' with '\x60' to escape the backticks
	`|(cpe:2\.3:[aho\*\-](:(((\?*|\*?)([a-zA-Z0-9\-\._]|(\\[\\\*\?!"#$$%&'\(\)\+,/:;<=>@\[\]\^\x60\{\|}~]))+(\?*|\*?))|[\*\-])){5}(:(([a-zA-Z]{2,3}(-([a-zA-Z]{2}|[0-9]{3}))?)|[\*\-]))(:(((\?*|\*?)([a-zA-Z0-9\-\._]|(\\[\\\*\?!"#$$%&'\(\)\+,/:;<=>@\[\]\^\x60\{\|}~]))+(\?*|\*?))|[\*\-])){4})$`)

var cpeRegex = regexp.MustCompile(cpeRegexString)

func New(value string, source Source) (CPE, error) {
	attributes, err := NewAttributes(value)
	if err != nil {
		return CPE{}, err
	}
	return CPE{
		Attributes: attributes,
		Source:     source,
	}, nil
}

// NewAttributes will parse a formatted Attributes string and return a Attributes object. Some input, such as the existence of whitespace
// characters is allowed, however, a more strict validation is done after this sanitization process.
func NewAttributes(cpeStr string) (Attributes, error) {
	// get a Attributes object based on the given string --don't validate yet since it may be possible to escape select cases on the callers behalf
	c, err := newWithoutValidation(cpeStr)
	if err != nil {
		return Attributes{}, fmt.Errorf("unable to parse Attributes string: %w", err)
	}

	// ensure that this Attributes can be validated after being fully sanitized
	if ValidateString(c.String()) != nil {
		return Attributes{}, err
	}

	// we don't return the sanitized string, as this is a concern for later when creating Attributes strings. In fact, since
	// sanitization is lossy (whitespace is replaced, not escaped) it's important that the raw values are left as.
	return c, nil
}

// Must returns a CPE or panics if the provided string is not valid
func Must(cpeStr string, source Source) CPE {
	c := MustAttributes(cpeStr)
	return CPE{
		Attributes: c,
		Source:     source,
	}
}

func MustAttributes(cpeStr string) Attributes {
	c, err := NewAttributes(cpeStr)
	if err != nil {
		panic(err)
	}
	return c
}

func ValidateString(cpeStr string) error {
	// We should filter out all CPEs that do not match the official Attributes regex
	// The facebook nvdtools parser can sometimes incorrectly parse invalid Attributes strings
	if !cpeRegex.MatchString(cpeStr) {
		return fmt.Errorf("failed to parse Attributes=%q as it doesn't match the regex=%s", cpeStr, cpeRegexString)
	}
	return nil
}

func newWithoutValidation(cpeStr string) (Attributes, error) {
	value, err := wfn.Parse(cpeStr)
	if err != nil {
		return Attributes{}, fmt.Errorf("failed to parse Attributes=%q: %w", cpeStr, err)
	}

	if value == nil {
		return Attributes{}, fmt.Errorf("failed to parse Attributes=%q", cpeStr)
	}

	syftCPE := fromAttributes(*value)

	// we need to compare the raw data since we are constructing CPEs in other locations
	syftCPE.Vendor = normalizeField(syftCPE.Vendor)
	syftCPE.Product = normalizeField(syftCPE.Product)
	syftCPE.Language = normalizeField(syftCPE.Language)
	syftCPE.Version = normalizeField(syftCPE.Version)
	syftCPE.TargetSW = normalizeField(syftCPE.TargetSW)
	syftCPE.Part = normalizeField(syftCPE.Part)
	syftCPE.Edition = normalizeField(syftCPE.Edition)
	syftCPE.Other = normalizeField(syftCPE.Other)
	syftCPE.SWEdition = normalizeField(syftCPE.SWEdition)
	syftCPE.TargetHW = normalizeField(syftCPE.TargetHW)
	syftCPE.Update = normalizeField(syftCPE.Update)

	return syftCPE, nil
}

func normalizeField(field string) string {
	// replace spaces with underscores (per section 5.3.2 of the Attributes spec v 2.3)
	field = strings.ReplaceAll(field, " ", "_")

	// keep dashes and forward slashes unescaped
	if field == "*" {
		return Any
	}
	return stripSlashes(field)
}

// stripSlashes is a reverse of the sanitize function below.
// It correctly removes slashes that are followed by allowed puncts.
// This is to allow for a correct round trip parsing of cpes with quoted characters.
func stripSlashes(s string) string {
	sb := strings.Builder{}
	for i, c := range s {
		if c == '\\' && i+1 < len(s) && strings.ContainsRune(allowedCPEPunctuation, rune(s[i+1])) {
			continue
		}
		sb.WriteRune(c)
	}
	return sb.String()
}

func (c Attributes) String() string {
	output := Attributes{}
	output.Vendor = sanitize(c.Vendor)
	output.Product = sanitize(c.Product)
	output.Language = sanitize(c.Language)
	output.Version = sanitize(c.Version)
	output.TargetSW = sanitize(c.TargetSW)
	output.Part = sanitize(c.Part)
	output.Edition = sanitize(c.Edition)
	output.Other = sanitize(c.Other)
	output.SWEdition = sanitize(c.SWEdition)
	output.TargetHW = sanitize(c.TargetHW)
	output.Update = sanitize(c.Update)
	return output.BindToFmtString()
}

// sanitize is a modified version of WFNize function from nvdtools
// that quotes all the allowed punctation chars with a slash and replaces
// spaces with underscores. It differs from the upstream implmentation as
// it does not use the buggy nvdtools implementation, specifically the "addSlashesAt" part of the
// function which stops the loop as soon as it encounters ":" a valid
// character for a WFN attribute after quoting, but the way nvdtools
// handles it causes it to truncate strings that container ":". As a result
// strings like "prefix:1.2" which would have been quoted as "prefix\:1.2"
// end up becoming "prefix" instead causing loss of information and
// incorrect CPEs being generated.
func sanitize(s string) string {
	// replace spaces with underscores
	in := strings.ReplaceAll(s, " ", "_")

	// escape allowable punctuation per section 5.3.2 in the CPE 2.3 spec
	sb := strings.Builder{}
	for _, c := range in {
		if strings.ContainsRune(allowedCPEPunctuation, c) {
			sb.WriteRune('\\')
		}
		sb.WriteRune(c)
	}
	return sb.String()
}