diff --git a/syft/format/cpes/decoder.go b/syft/format/cpes/decoder.go new file mode 100644 index 000000000..b5d3ab62e --- /dev/null +++ b/syft/format/cpes/decoder.go @@ -0,0 +1,95 @@ +package cpes + +import ( + "bufio" + "errors" + "fmt" + "io" + "strings" + + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/cpe" + "github.com/anchore/syft/syft/format/internal" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" +) + +const ID sbom.FormatID = "cpes" +const version = "1" + +var _ sbom.FormatDecoder = (*decoder)(nil) + +type decoder struct{} + +func NewFormatDecoder() sbom.FormatDecoder { + return decoder{} +} + +func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) { + if r == nil { + return nil, "", "", fmt.Errorf("no reader provided") + } + s, err := toSyftModel(r) + return s, ID, version, err +} + +func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) { + if r == nil { + return "", "" + } + + scanner := bufio.NewScanner(r) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" { + // skip whitespace only lines + continue + } + + err := cpe.ValidateString(line) + if err != nil { + return "", "" + } + + return ID, version + } + + return "", "" +} + +func toSyftModel(r io.Reader) (*sbom.SBOM, error) { + var errs []error + pkgs := pkg.NewCollection() + + scanner := bufio.NewScanner(r) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + + if line == "" { + continue + } + + // skip invalid CPEs + c, err := cpe.New(line, "") + if err != nil { + log.WithFields("error", err, "line", line).Debug("unable to parse cpe") + continue + } + + p := pkg.Package{ + Name: c.Attributes.Product, + Version: c.Attributes.Version, + CPEs: []cpe.CPE{c}, + } + + internal.Backfill(&p) + p.SetID() + pkgs.Add(p) + } + + return &sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkgs, + }, + }, errors.Join(errs...) +} diff --git a/syft/format/cpes/decoder_test.go b/syft/format/cpes/decoder_test.go new file mode 100644 index 000000000..6ad14b966 --- /dev/null +++ b/syft/format/cpes/decoder_test.go @@ -0,0 +1,171 @@ +package cpes + +import ( + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/syft/cpe" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" +) + +func Test_CPEProvider(t *testing.T) { + tests := []struct { + name string + userInput string + sbom *sbom.SBOM + }{ + { + name: "takes a single cpe", + userInput: "cpe:/a:apache:log4j:2.14.1", + sbom: &sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(pkg.Package{ + Name: "log4j", + Version: "2.14.1", + CPEs: []cpe.CPE{ + cpe.Must("cpe:/a:apache:log4j:2.14.1", ""), + }, + }), + }, + }, + }, + { + name: "takes multiple cpes", + userInput: `cpe:/a:apache:log4j:2.14.1 + cpe:2.3:a:f5:nginx:*:*:*:*:*:*:*:*; + cpe:2.3:a:f5:nginx:0.5.2:*:*:*:*:*:*:*; + cpe:2.3:a:f5:nginx:0.5.3:*:*:*:*:*:*:*;`, + sbom: &sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection( + pkg.Package{ + Name: "log4j", + Version: "2.14.1", + CPEs: []cpe.CPE{ + cpe.Must("cpe:/a:apache:log4j:2.14.1", ""), + }, + }, + pkg.Package{ + Name: "nginx", + Version: "", + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:a:f5:nginx:*:*:*:*:*:*:*:*;", ""), + }, + }, + pkg.Package{ + Name: "nginx", + Version: "0.5.2", + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:a:f5:nginx:0.5.2:*:*:*:*:*:*:*;", ""), + }, + }, + pkg.Package{ + Name: "nginx", + Version: "0.5.3", + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:a:f5:nginx:0.5.3:*:*:*:*:*:*:*;", ""), + }, + }, + ), + }, + }, + }, + { + name: "takes cpe with no version", + userInput: "cpe:/a:apache:log4j", + sbom: &sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(pkg.Package{ + Name: "log4j", + CPEs: []cpe.CPE{ + cpe.Must("cpe:/a:apache:log4j", ""), + }, + }), + }, + }, + }, + { + name: "takes CPE 2.3 format", + userInput: "cpe:2.3:a:apache:log4j:2.14.1:*:*:*:*:*:*:*", + sbom: &sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(pkg.Package{ + Name: "log4j", + Version: "2.14.1", + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:a:apache:log4j:2.14.1:*:*:*:*:*:*:*", ""), + }, + }), + }, + }, + }, + { + name: "deduces target SW from CPE - known target_sw", + userInput: "cpe:2.3:a:amazon:opensearch:*:*:*:*:*:ruby:*:*", + sbom: &sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(pkg.Package{ + Name: "opensearch", + Type: pkg.GemPkg, + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:a:amazon:opensearch:*:*:*:*:*:ruby:*:*", ""), + }, + }), + }, + }, + }, + { + name: "handles unknown target_sw CPE field", + userInput: "cpe:2.3:a:amazon:opensearch:*:*:*:*:*:loremipsum:*:*", + sbom: &sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(pkg.Package{ + Name: "opensearch", + Type: "", + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:a:amazon:opensearch:*:*:*:*:*:loremipsum:*:*", ""), + }, + }), + }, + }, + }, + { + name: "invalid prefix", + userInput: "dir:test-fixtures/cpe", + sbom: &sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(), + }, + }, + }, + } + + syftPkgOpts := []cmp.Option{ + cmpopts.IgnoreFields(pkg.Package{}, "id", "Language"), + cmpopts.IgnoreUnexported(pkg.Package{}, file.LocationSet{}, pkg.LicenseSet{}), + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + dec := NewFormatDecoder() + + decodedSBOM, _, _, err := dec.Decode(strings.NewReader(tc.userInput)) + require.NoError(t, err) + + gotSyftPkgs := decodedSBOM.Artifacts.Packages.Sorted() + wantSyftPkgs := tc.sbom.Artifacts.Packages.Sorted() + require.Equal(t, len(gotSyftPkgs), len(wantSyftPkgs)) + for idx, wantPkg := range wantSyftPkgs { + if d := cmp.Diff(wantPkg, gotSyftPkgs[idx], syftPkgOpts...); d != "" { + t.Errorf("unexpected Syft Package (-want +got):\n%s", d) + } + } + }) + } +} diff --git a/syft/format/decoders.go b/syft/format/decoders.go index 6ca1f94a2..48dba03e9 100644 --- a/syft/format/decoders.go +++ b/syft/format/decoders.go @@ -3,6 +3,7 @@ package format import ( "io" + "github.com/anchore/syft/syft/format/cpes" "github.com/anchore/syft/syft/format/cyclonedxjson" "github.com/anchore/syft/syft/format/cyclonedxxml" "github.com/anchore/syft/syft/format/purls" @@ -26,6 +27,7 @@ func Decoders() []sbom.FormatDecoder { spdxtagvalue.NewFormatDecoder(), spdxjson.NewFormatDecoder(), purls.NewFormatDecoder(), + cpes.NewFormatDecoder(), } } diff --git a/syft/format/internal/backfill.go b/syft/format/internal/backfill.go index 6e5544b95..230aac18a 100644 --- a/syft/format/internal/backfill.go +++ b/syft/format/internal/backfill.go @@ -10,13 +10,31 @@ import ( "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/cpe" "github.com/anchore/syft/syft/pkg" + cataloger "github.com/anchore/syft/syft/pkg/cataloger/common/cpe" ) // Backfill takes all information present in the package and attempts to fill in any missing information -// from any available sources, such as the Metadata and PURL. +// from any available sources, such as the Metadata, PURL, or CPEs. // // Backfill does not call p.SetID(), but this needs to be called later to ensure it's up to date func Backfill(p *pkg.Package) { + backfillFromPurl(p) + backfillFromCPE(p) +} + +func backfillFromCPE(p *pkg.Package) { + if len(p.CPEs) == 0 { + return + } + + c := p.CPEs[0] + + if p.Type == "" { + p.Type = cataloger.TargetSoftwareToPackageType(c.Attributes.TargetSW) + } +} + +func backfillFromPurl(p *pkg.Package) { if p.PURL == "" { return } diff --git a/syft/format/internal/backfill_test.go b/syft/format/internal/backfill_test.go index 7e396e2b5..79918d6e9 100644 --- a/syft/format/internal/backfill_test.go +++ b/syft/format/internal/backfill_test.go @@ -121,6 +121,20 @@ func Test_Backfill(t *testing.T) { Metadata: pkg.JavaArchive{}, }, }, + { + name: "target-sw from CPE", + in: pkg.Package{ + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:a:amazon:opensearch:*:*:*:*:*:ruby:*:*", ""), + }, + }, + expected: pkg.Package{ + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:a:amazon:opensearch:*:*:*:*:*:ruby:*:*", ""), + }, + Type: pkg.GemPkg, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/syft/pkg/cataloger/common/cpe/target_software_to_pkg_type.go b/syft/pkg/cataloger/common/cpe/target_software_to_pkg_type.go new file mode 100644 index 000000000..d3846f47b --- /dev/null +++ b/syft/pkg/cataloger/common/cpe/target_software_to_pkg_type.go @@ -0,0 +1,58 @@ +package cpe + +import ( + "strings" + + "github.com/anchore/syft/syft/pkg" +) + +// TargetSoftwareToPackageType is derived from looking at target_software attributes in the NVD dataset +// TODO: ideally this would be driven from the store, where we can resolve ecosystem aliases directly +func TargetSoftwareToPackageType(tsw string) pkg.Type { + tsw = strings.NewReplacer("-", "_", " ", "_").Replace(strings.ToLower(tsw)) + switch tsw { + case "alpine", "apk": + return pkg.ApkPkg + case "debian", "dpkg": + return pkg.DebPkg + case "java", "maven", "ant", "gradle", "jenkins", "jenkins_ci", "kafka", "logstash", "mule", "nifi", "solr", "spark", "storm", "struts", "tomcat", "zookeeper", "log4j": + return pkg.JavaPkg + case "javascript", "node", "nodejs", "node.js", "npm", "yarn", "apache", "jquery", "next.js", "prismjs": + return pkg.NpmPkg + case "c", "c++", "c/c++", "conan", "gnu_c++", "qt": + return pkg.ConanPkg + case "dart": + return pkg.DartPubPkg + case "redhat", "rpm", "redhat_enterprise_linux", "rhel", "suse", "suse_linux", "opensuse", "opensuse_linux", "fedora", "centos", "oracle_linux", "ol": + return pkg.RpmPkg + case "elixir", "hex": + return pkg.HexPkg + case "erlang": + return pkg.ErlangOTPPkg + case ".net", ".net_framework", "asp", "asp.net", "dotnet", "dotnet_framework", "c#", "csharp", "nuget": + return pkg.DotnetPkg + case "ruby", "gem", "nokogiri", "ruby_on_rails": + return pkg.GemPkg + case "rust", "cargo", "crates": + return pkg.RustPkg + case "python", "pip", "pypi", "flask": + return pkg.PythonPkg + case "kb", "knowledgebase", "msrc", "mskb", "microsoft": + return pkg.KbPkg + case "portage", "gentoo": + return pkg.PortagePkg + case "go", "golang", "gomodule": + return pkg.GoModulePkg + case "linux_kernel", "linux", "z/linux": + return pkg.LinuxKernelPkg + case "php": + return pkg.PhpComposerPkg + case "swift": + return pkg.SwiftPkg + case "wordpress", "wordpress_plugin", "wordpress_": + return pkg.WordpressPluginPkg + case "lua", "luarocks": + return pkg.LuaRocksPkg + } + return "" +}