syft/syft/pkg/cpe.go

package pkg

import (
	"fmt"
	"regexp"
	"strings"

	"github.com/facebookincubator/nvdtools/wfn"
)

type CPE = wfn.Attributes

const (
	allowedCPEPunctuation = "-!\"#$%&'()+,./:;<=>@[]^`{|}~"
)

// This regex string is taken from
// https://csrc.nist.gov/schema/cpe/2.3/cpe-naming_2.3.xsd which has the official cpe spec
// This first part matches CPE urls and the second part matches binding strings
const cpeRegexString = ((`^([c][pP][eE]:/[AHOaho]?(:[A-Za-z0-9\._\-~%]*){0,6})`) +
	// Or match the CPE binding string
	// Note that we had to replace '`' with '\x60' to escape the backticks
	`|(cpe:2\.3:[aho\*\-](:(((\?*|\*?)([a-zA-Z0-9\-\._]|(\\[\\\*\?!"#$$%&'\(\)\+,/:;<=>@\[\]\^\x60\{\|}~]))+(\?*|\*?))|[\*\-])){5}(:(([a-zA-Z]{2,3}(-([a-zA-Z]{2}|[0-9]{3}))?)|[\*\-]))(:(((\?*|\*?)([a-zA-Z0-9\-\._]|(\\[\\\*\?!"#$$%&'\(\)\+,/:;<=>@\[\]\^\x60\{\|}~]))+(\?*|\*?))|[\*\-])){4})$`)

var cpeRegex = regexp.MustCompile(cpeRegexString)

// NewCPE will parse a formatted CPE string and return a CPE object. Some input, such as the existence of whitespace
// characters is allowed, however, a more strict validation is done after this sanitization process.
func NewCPE(cpeStr string) (CPE, error) {
	// get a CPE object based on the given string --don't validate yet since it may be possible to escape select cases on the callers behalf
	c, err := newCPEWithoutValidation(cpeStr)
	if err != nil {
		return CPE{}, fmt.Errorf("unable to parse CPE string: %w", err)
	}

	// ensure that this CPE can be validated after being fully sanitized
	if ValidateCPEString(CPEString(c)) != nil {
		return CPE{}, err
	}

	// we don't return the sanitized string, as this is a concern for later when creating CPE strings. In fact, since
	// sanitization is lossy (whitespace is replaced, not escaped) it's important that the raw values are left as.
	return c, nil
}

func ValidateCPEString(cpeStr string) error {
	// We should filter out all CPEs that do not match the official CPE regex
	// The facebook nvdtools parser can sometimes incorrectly parse invalid CPE strings
	if !cpeRegex.MatchString(cpeStr) {
		return fmt.Errorf("failed to parse CPE=%q as it doesn't match the regex=%s", cpeStr, cpeRegexString)
	}
	return nil
}

func newCPEWithoutValidation(cpeStr string) (CPE, error) {
	value, err := wfn.Parse(cpeStr)
	if err != nil {
		return CPE{}, fmt.Errorf("failed to parse CPE=%q: %w", cpeStr, err)
	}

	if value == nil {
		return CPE{}, fmt.Errorf("failed to parse CPE=%q", cpeStr)
	}

	// we need to compare the raw data since we are constructing CPEs in other locations
	value.Vendor = normalizeCpeField(value.Vendor)
	value.Product = normalizeCpeField(value.Product)
	value.Language = normalizeCpeField(value.Language)
	value.Version = normalizeCpeField(value.Version)
	value.TargetSW = normalizeCpeField(value.TargetSW)
	value.Part = normalizeCpeField(value.Part)
	value.Edition = normalizeCpeField(value.Edition)
	value.Other = normalizeCpeField(value.Other)
	value.SWEdition = normalizeCpeField(value.SWEdition)
	value.TargetHW = normalizeCpeField(value.TargetHW)
	value.Update = normalizeCpeField(value.Update)

	return *value, nil
}

func MustCPE(cpeStr string) CPE {
	c, err := NewCPE(cpeStr)
	if err != nil {
		panic(err)
	}
	return c
}

func normalizeCpeField(field string) string {
	// replace spaces with underscores (per section 5.3.2 of the CPE spec v 2.3)
	field = strings.ReplaceAll(field, " ", "_")

	// keep dashes and forward slashes unescaped
	if field == "*" {
		return wfn.Any
	}
	return stripSlashes(field)
}

// stripSlashes is a reverse of the sanitize function below.
// It correctly removes slashes that are followed by allowed puncts.
// This is to allow for a correct round trip parsing of cpes with quoted characters.
func stripSlashes(s string) string {
	sb := strings.Builder{}
	for i, c := range s {
		if c == '\\' && i+1 < len(s) && strings.ContainsRune(allowedCPEPunctuation, rune(s[i+1])) {
			continue
		} else {
			sb.WriteRune(c)
		}
	}
	return sb.String()
}

func CPEString(c CPE) string {
	output := CPE{}
	output.Vendor = sanitize(c.Vendor)
	output.Product = sanitize(c.Product)
	output.Language = sanitize(c.Language)
	output.Version = sanitize(c.Version)
	output.TargetSW = sanitize(c.TargetSW)
	output.Part = sanitize(c.Part)
	output.Edition = sanitize(c.Edition)
	output.Other = sanitize(c.Other)
	output.SWEdition = sanitize(c.SWEdition)
	output.TargetHW = sanitize(c.TargetHW)
	output.Update = sanitize(c.Update)
	return output.BindToFmtString()
}

// sanitize is a modified version of WFNize function from nvdtools
// that quotes all the allowed punctation chars with a slash and replaces
// spaces with underscores. It differs from the upstream implmentation as
// it does not use the buggy nvdtools implementation, specifically the "addSlashesAt" part of the
// function which stops the loop as soon as it encounters ":" a valid
// character for a WFN attribute after quoting, but the way nvdtools
// handles it causes it to truncate strings that container ":". As a result
// strings like "prefix:1.2" which would have been quoted as "prefix\:1.2"
// end up becoming "prefix" instead causing loss of information and
// incorrect CPEs being generated.
func sanitize(s string) string {
	// replace spaces with underscores
	in := strings.ReplaceAll(s, " ", "_")

	// escape allowable punctuation per section 5.3.2 in the CPE 2.3 spec
	sb := strings.Builder{}
	for _, c := range in {
		if strings.ContainsRune(allowedCPEPunctuation, c) {
			sb.WriteRune('\\')
		}
		sb.WriteRune(c)
	}
	return sb.String()
}