fix: improvements to known CPE index construction (#2801)

* fix: stop pre-filtering potential known CPE URLs

Previously when building the known CPE index, there was logic to
de-duplicate processing based on the normalized CPE name; however, this
means a significant number of known CPE's don't get indexed because the
first instance of that name didn't have a supported collection url but a
later one did.  This isn't code that executes at runtime in syft so
de-duplicating the processing for performance isn't really necessary
here and it doesn't add much to the total runtime anyways

Signed-off-by: Weston Steimel <commits@weston.slmail.me>

* fix: CPE index builder should extract and consider all reference urls

Previously the struct definition for CpeItem caused only the last URL
reference in the list to be kept and processed for inclusion in the
index

Signed-off-by: Weston Steimel <commits@weston.slmail.me>

---------

Signed-off-by: Weston Steimel <commits@weston.slmail.me>
This commit is contained in:
Weston Steimel 2024-04-23 14:28:18 +01:00 committed by GitHub
parent f7d3d552ce
commit 891e61a2ef
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 700 additions and 22 deletions

View File

@ -52,11 +52,9 @@ func generateIndexedDictionaryJSON(rawGzipData io.Reader) ([]byte, error) {
func filterCpeList(cpeList CpeList) CpeList { func filterCpeList(cpeList CpeList) CpeList {
var processedCpeList CpeList var processedCpeList CpeList
seen := make(map[string]struct{})
for _, cpeItem := range cpeList.CpeItems { for _, cpeItem := range cpeList.CpeItems {
// Skip CPE items that don't have any references. // Skip CPE items that don't have any references.
if len(cpeItem.References) == 0 { if len(cpeItem.References.Reference) == 0 {
continue continue
} }
@ -64,6 +62,7 @@ func filterCpeList(cpeList CpeList) CpeList {
parsedName, err := wfn.Parse(cpeItem.Name) parsedName, err := wfn.Parse(cpeItem.Name)
if err != nil { if err != nil {
log.Printf("unable to parse CPE URI %q: %s", cpeItem.Name, err) log.Printf("unable to parse CPE URI %q: %s", cpeItem.Name, err)
continue
} }
if slices.Contains([]string{"h", "o"}, parsedName.Part) { if slices.Contains([]string{"h", "o"}, parsedName.Part) {
@ -71,15 +70,12 @@ func filterCpeList(cpeList CpeList) CpeList {
} }
normalizedName := normalizeCPE(parsedName).BindToURI() normalizedName := normalizeCPE(parsedName).BindToURI()
if _, ok := seen[normalizedName]; ok {
continue
}
seen[normalizedName] = struct{}{}
cpeItem.Name = normalizedName cpeItem.Name = normalizedName
parsedCPE, err := wfn.Parse(cpeItem.Cpe23Item.Name) parsedCPE, err := wfn.Parse(cpeItem.Cpe23Item.Name)
if err != nil { if err != nil {
log.Printf("unable to parse CPE value %q: %s", cpeItem.Cpe23Item.Name, err) log.Printf("unable to parse CPE value %q: %s", cpeItem.Cpe23Item.Name, err)
continue
} }
cpeItem.Cpe23Item.Name = normalizeCPE(parsedCPE).BindToFmtString() cpeItem.Cpe23Item.Name = normalizeCPE(parsedCPE).BindToFmtString()
@ -123,8 +119,8 @@ func indexCPEList(list CpeList) *dictionary.Indexed {
for _, cpeItem := range list.CpeItems { for _, cpeItem := range list.CpeItems {
cpeItemName := cpeItem.Cpe23Item.Name cpeItemName := cpeItem.Cpe23Item.Name
for _, reference := range cpeItem.References { for _, reference := range cpeItem.References.Reference {
ref := reference.Reference.Href ref := reference.Href
switch { switch {
case strings.HasPrefix(ref, prefixForNPMPackages): case strings.HasPrefix(ref, prefixForNPMPackages):

View File

@ -3,8 +3,8 @@ package main
type CpeItem struct { type CpeItem struct {
Name string `xml:"name,attr"` Name string `xml:"name,attr"`
Title string `xml:"title"` Title string `xml:"title"`
References []struct { References struct {
Reference struct { Reference []struct {
Href string `xml:"href,attr"` Href string `xml:"href,attr"`
Body string `xml:",chardata"` Body string `xml:",chardata"`
} `xml:"reference"` } `xml:"reference"`

View File

@ -5,12 +5,15 @@
}, },
"npm": { "npm": {
"merge-recursive": "cpe:2.3:a:umbraengineering:merge-recursive:*:*:*:*:*:node.js:*:*", "merge-recursive": "cpe:2.3:a:umbraengineering:merge-recursive:*:*:*:*:*:node.js:*:*",
"ps": "cpe:2.3:a:umbraengineering:ps:*:*:*:*:*:node.js:*:*",
"static-dev-server": "cpe:2.3:a:static-dev-server_project:static-dev-server:*:*:*:*:*:node.js:*:*", "static-dev-server": "cpe:2.3:a:static-dev-server_project:static-dev-server:*:*:*:*:*:node.js:*:*",
"umount": "cpe:2.3:a:umount_project:umount:*:*:*:*:*:node.js:*:*", "umount": "cpe:2.3:a:umount_project:umount:*:*:*:*:*:node.js:*:*",
"undefsafe": "cpe:2.3:a:undefsafe_project:undefsafe:*:*:*:*:*:node.js:*:*", "undefsafe": "cpe:2.3:a:undefsafe_project:undefsafe:*:*:*:*:*:node.js:*:*",
"underscore": "cpe:2.3:a:underscorejs:underscore:*:*:*:*:*:node.js:*:*", "underscore": "cpe:2.3:a:underscorejs:underscore:*:*:*:*:*:node.js:*:*",
"underscore-99xp": "cpe:2.3:a:underscore-99xp_project:underscore-99xp:*:*:*:*:*:node.js:*:*", "underscore-99xp": "cpe:2.3:a:underscore-99xp_project:underscore-99xp:*:*:*:*:*:node.js:*:*",
"ungit": "cpe:2.3:a:ungit_project:ungit:*:*:*:*:*:node.js:*:*",
"unicode": "cpe:2.3:a:unicode_project:unicode:*:*:*:*:*:node.js:*:*", "unicode": "cpe:2.3:a:unicode_project:unicode:*:*:*:*:*:node.js:*:*",
"unicode-json": "cpe:2.3:a:unicode:unicode-json:*:*:*:*:*:node.js:*:*",
"unicorn-list": "cpe:2.3:a:unicorn-list_project:unicorn-list:*:*:*:*:*:node.js:*:*" "unicorn-list": "cpe:2.3:a:unicorn-list_project:unicorn-list:*:*:*:*:*:node.js:*:*"
}, },
"php_pear": { "php_pear": {
@ -23,6 +26,9 @@
"memcached": "cpe:2.3:a:php:memcached:*:*:*:*:*:*:*:*", "memcached": "cpe:2.3:a:php:memcached:*:*:*:*:*:*:*:*",
"xhprof": "cpe:2.3:a:php:xhprof:*:*:*:*:*:*:*:*" "xhprof": "cpe:2.3:a:php:xhprof:*:*:*:*:*:*:*:*"
}, },
"pypi": {
"vault-cli": "cpe:2.3:a:ukg:vault-cli:*:*:*:*:*:python:*:*"
},
"rubygems": { "rubygems": {
"openssl": "cpe:2.3:a:ruby-lang:openssl:*:*:*:*:*:*:*:*" "openssl": "cpe:2.3:a:ruby-lang:openssl:*:*:*:*:*:*:*:*"
}, },

View File

@ -24972,6 +24972,7 @@
<references> <references>
<reference href="http://pear.php.net/package/HTML_QuickForm/">Project</reference> <reference href="http://pear.php.net/package/HTML_QuickForm/">Project</reference>
<reference href="http://pear.php.net/package/HTML_QuickForm/download">Version</reference> <reference href="http://pear.php.net/package/HTML_QuickForm/download">Version</reference>
<reference href="http://some-other-unrelated-reference">Test</reference>
</references> </references>
<cpe-23:cpe23-item name="cpe:2.3:a:html_quickform_project:html_quickform:2.0:*:*:*:*:*:*:*"/> <cpe-23:cpe23-item name="cpe:2.3:a:html_quickform_project:html_quickform:2.0:*:*:*:*:*:*:*"/>
</cpe-item> </cpe-item>
@ -24980,6 +24981,7 @@
<references> <references>
<reference href="http://pear.php.net/package/HTML_QuickForm/">Project</reference> <reference href="http://pear.php.net/package/HTML_QuickForm/">Project</reference>
<reference href="http://pear.php.net/package/HTML_QuickForm/download">Version</reference> <reference href="http://pear.php.net/package/HTML_QuickForm/download">Version</reference>
<reference href="http://some-other-unrelated-reference">Test</reference>
</references> </references>
<cpe-23:cpe23-item name="cpe:2.3:a:html_quickform_project:html_quickform:2.1:*:*:*:*:*:*:*"/> <cpe-23:cpe23-item name="cpe:2.3:a:html_quickform_project:html_quickform:2.1:*:*:*:*:*:*:*"/>
</cpe-item> </cpe-item>