feat: CPEs format decoder (#4207)

Signed-off-by: Adam Chovanec <git@adamchovanec.cz>
This commit is contained in:
Adam Chovanec 2025-11-12 16:45:09 +01:00 committed by GitHub
parent 66c78d44af
commit 102d362daf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 359 additions and 1 deletions

View File

@ -0,0 +1,95 @@
package cpes
import (
"bufio"
"errors"
"fmt"
"io"
"strings"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/cpe"
"github.com/anchore/syft/syft/format/internal"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
)
const ID sbom.FormatID = "cpes"
const version = "1"
var _ sbom.FormatDecoder = (*decoder)(nil)
type decoder struct{}
func NewFormatDecoder() sbom.FormatDecoder {
return decoder{}
}
func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) {
if r == nil {
return nil, "", "", fmt.Errorf("no reader provided")
}
s, err := toSyftModel(r)
return s, ID, version, err
}
func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) {
if r == nil {
return "", ""
}
scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
// skip whitespace only lines
continue
}
err := cpe.ValidateString(line)
if err != nil {
return "", ""
}
return ID, version
}
return "", ""
}
func toSyftModel(r io.Reader) (*sbom.SBOM, error) {
var errs []error
pkgs := pkg.NewCollection()
scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
// skip invalid CPEs
c, err := cpe.New(line, "")
if err != nil {
log.WithFields("error", err, "line", line).Debug("unable to parse cpe")
continue
}
p := pkg.Package{
Name: c.Attributes.Product,
Version: c.Attributes.Version,
CPEs: []cpe.CPE{c},
}
internal.Backfill(&p)
p.SetID()
pkgs.Add(p)
}
return &sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: pkgs,
},
}, errors.Join(errs...)
}

View File

@ -0,0 +1,171 @@
package cpes
import (
"strings"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/cpe"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom"
)
func Test_CPEProvider(t *testing.T) {
tests := []struct {
name string
userInput string
sbom *sbom.SBOM
}{
{
name: "takes a single cpe",
userInput: "cpe:/a:apache:log4j:2.14.1",
sbom: &sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: pkg.NewCollection(pkg.Package{
Name: "log4j",
Version: "2.14.1",
CPEs: []cpe.CPE{
cpe.Must("cpe:/a:apache:log4j:2.14.1", ""),
},
}),
},
},
},
{
name: "takes multiple cpes",
userInput: `cpe:/a:apache:log4j:2.14.1
cpe:2.3:a:f5:nginx:*:*:*:*:*:*:*:*;
cpe:2.3:a:f5:nginx:0.5.2:*:*:*:*:*:*:*;
cpe:2.3:a:f5:nginx:0.5.3:*:*:*:*:*:*:*;`,
sbom: &sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: pkg.NewCollection(
pkg.Package{
Name: "log4j",
Version: "2.14.1",
CPEs: []cpe.CPE{
cpe.Must("cpe:/a:apache:log4j:2.14.1", ""),
},
},
pkg.Package{
Name: "nginx",
Version: "",
CPEs: []cpe.CPE{
cpe.Must("cpe:2.3:a:f5:nginx:*:*:*:*:*:*:*:*;", ""),
},
},
pkg.Package{
Name: "nginx",
Version: "0.5.2",
CPEs: []cpe.CPE{
cpe.Must("cpe:2.3:a:f5:nginx:0.5.2:*:*:*:*:*:*:*;", ""),
},
},
pkg.Package{
Name: "nginx",
Version: "0.5.3",
CPEs: []cpe.CPE{
cpe.Must("cpe:2.3:a:f5:nginx:0.5.3:*:*:*:*:*:*:*;", ""),
},
},
),
},
},
},
{
name: "takes cpe with no version",
userInput: "cpe:/a:apache:log4j",
sbom: &sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: pkg.NewCollection(pkg.Package{
Name: "log4j",
CPEs: []cpe.CPE{
cpe.Must("cpe:/a:apache:log4j", ""),
},
}),
},
},
},
{
name: "takes CPE 2.3 format",
userInput: "cpe:2.3:a:apache:log4j:2.14.1:*:*:*:*:*:*:*",
sbom: &sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: pkg.NewCollection(pkg.Package{
Name: "log4j",
Version: "2.14.1",
CPEs: []cpe.CPE{
cpe.Must("cpe:2.3:a:apache:log4j:2.14.1:*:*:*:*:*:*:*", ""),
},
}),
},
},
},
{
name: "deduces target SW from CPE - known target_sw",
userInput: "cpe:2.3:a:amazon:opensearch:*:*:*:*:*:ruby:*:*",
sbom: &sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: pkg.NewCollection(pkg.Package{
Name: "opensearch",
Type: pkg.GemPkg,
CPEs: []cpe.CPE{
cpe.Must("cpe:2.3:a:amazon:opensearch:*:*:*:*:*:ruby:*:*", ""),
},
}),
},
},
},
{
name: "handles unknown target_sw CPE field",
userInput: "cpe:2.3:a:amazon:opensearch:*:*:*:*:*:loremipsum:*:*",
sbom: &sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: pkg.NewCollection(pkg.Package{
Name: "opensearch",
Type: "",
CPEs: []cpe.CPE{
cpe.Must("cpe:2.3:a:amazon:opensearch:*:*:*:*:*:loremipsum:*:*", ""),
},
}),
},
},
},
{
name: "invalid prefix",
userInput: "dir:test-fixtures/cpe",
sbom: &sbom.SBOM{
Artifacts: sbom.Artifacts{
Packages: pkg.NewCollection(),
},
},
},
}
syftPkgOpts := []cmp.Option{
cmpopts.IgnoreFields(pkg.Package{}, "id", "Language"),
cmpopts.IgnoreUnexported(pkg.Package{}, file.LocationSet{}, pkg.LicenseSet{}),
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
dec := NewFormatDecoder()
decodedSBOM, _, _, err := dec.Decode(strings.NewReader(tc.userInput))
require.NoError(t, err)
gotSyftPkgs := decodedSBOM.Artifacts.Packages.Sorted()
wantSyftPkgs := tc.sbom.Artifacts.Packages.Sorted()
require.Equal(t, len(gotSyftPkgs), len(wantSyftPkgs))
for idx, wantPkg := range wantSyftPkgs {
if d := cmp.Diff(wantPkg, gotSyftPkgs[idx], syftPkgOpts...); d != "" {
t.Errorf("unexpected Syft Package (-want +got):\n%s", d)
}
}
})
}
}

View File

@ -3,6 +3,7 @@ package format
import (
"io"
"github.com/anchore/syft/syft/format/cpes"
"github.com/anchore/syft/syft/format/cyclonedxjson"
"github.com/anchore/syft/syft/format/cyclonedxxml"
"github.com/anchore/syft/syft/format/purls"
@ -26,6 +27,7 @@ func Decoders() []sbom.FormatDecoder {
spdxtagvalue.NewFormatDecoder(),
spdxjson.NewFormatDecoder(),
purls.NewFormatDecoder(),
cpes.NewFormatDecoder(),
}
}

View File

@ -10,13 +10,31 @@ import (
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/cpe"
"github.com/anchore/syft/syft/pkg"
cataloger "github.com/anchore/syft/syft/pkg/cataloger/common/cpe"
)
// Backfill takes all information present in the package and attempts to fill in any missing information
// from any available sources, such as the Metadata and PURL.
// from any available sources, such as the Metadata, PURL, or CPEs.
//
// Backfill does not call p.SetID(), but this needs to be called later to ensure it's up to date
func Backfill(p *pkg.Package) {
backfillFromPurl(p)
backfillFromCPE(p)
}
func backfillFromCPE(p *pkg.Package) {
if len(p.CPEs) == 0 {
return
}
c := p.CPEs[0]
if p.Type == "" {
p.Type = cataloger.TargetSoftwareToPackageType(c.Attributes.TargetSW)
}
}
func backfillFromPurl(p *pkg.Package) {
if p.PURL == "" {
return
}

View File

@ -121,6 +121,20 @@ func Test_Backfill(t *testing.T) {
Metadata: pkg.JavaArchive{},
},
},
{
name: "target-sw from CPE",
in: pkg.Package{
CPEs: []cpe.CPE{
cpe.Must("cpe:2.3:a:amazon:opensearch:*:*:*:*:*:ruby:*:*", ""),
},
},
expected: pkg.Package{
CPEs: []cpe.CPE{
cpe.Must("cpe:2.3:a:amazon:opensearch:*:*:*:*:*:ruby:*:*", ""),
},
Type: pkg.GemPkg,
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {

View File

@ -0,0 +1,58 @@
package cpe
import (
"strings"
"github.com/anchore/syft/syft/pkg"
)
// TargetSoftwareToPackageType is derived from looking at target_software attributes in the NVD dataset
// TODO: ideally this would be driven from the store, where we can resolve ecosystem aliases directly
func TargetSoftwareToPackageType(tsw string) pkg.Type {
tsw = strings.NewReplacer("-", "_", " ", "_").Replace(strings.ToLower(tsw))
switch tsw {
case "alpine", "apk":
return pkg.ApkPkg
case "debian", "dpkg":
return pkg.DebPkg
case "java", "maven", "ant", "gradle", "jenkins", "jenkins_ci", "kafka", "logstash", "mule", "nifi", "solr", "spark", "storm", "struts", "tomcat", "zookeeper", "log4j":
return pkg.JavaPkg
case "javascript", "node", "nodejs", "node.js", "npm", "yarn", "apache", "jquery", "next.js", "prismjs":
return pkg.NpmPkg
case "c", "c++", "c/c++", "conan", "gnu_c++", "qt":
return pkg.ConanPkg
case "dart":
return pkg.DartPubPkg
case "redhat", "rpm", "redhat_enterprise_linux", "rhel", "suse", "suse_linux", "opensuse", "opensuse_linux", "fedora", "centos", "oracle_linux", "ol":
return pkg.RpmPkg
case "elixir", "hex":
return pkg.HexPkg
case "erlang":
return pkg.ErlangOTPPkg
case ".net", ".net_framework", "asp", "asp.net", "dotnet", "dotnet_framework", "c#", "csharp", "nuget":
return pkg.DotnetPkg
case "ruby", "gem", "nokogiri", "ruby_on_rails":
return pkg.GemPkg
case "rust", "cargo", "crates":
return pkg.RustPkg
case "python", "pip", "pypi", "flask":
return pkg.PythonPkg
case "kb", "knowledgebase", "msrc", "mskb", "microsoft":
return pkg.KbPkg
case "portage", "gentoo":
return pkg.PortagePkg
case "go", "golang", "gomodule":
return pkg.GoModulePkg
case "linux_kernel", "linux", "z/linux":
return pkg.LinuxKernelPkg
case "php":
return pkg.PhpComposerPkg
case "swift":
return pkg.SwiftPkg
case "wordpress", "wordpress_plugin", "wordpress_":
return pkg.WordpressPluginPkg
case "lua", "luarocks":
return pkg.LuaRocksPkg
}
return ""
}