mirror of
https://github.com/anchore/syft.git
synced 2025-11-18 08:53:15 +01:00
add render unicode in gemspec parser
Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
parent
1f0f6fa3e5
commit
46c74865e5
@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/mitchellh/mapstructure"
|
||||
@ -62,18 +63,15 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) {
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
|
||||
// TODO: sanitize unicode? (see engine code)
|
||||
sanitizedLine := strings.TrimSpace(line)
|
||||
sanitizedLine = strings.ReplaceAll(sanitizedLine, ".freeze", "")
|
||||
sanitizedLine = renderUtf8(sanitizedLine)
|
||||
|
||||
if sanitizedLine == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
for field, pattern := range patterns {
|
||||
if strings.Contains(sanitizedLine, "licenses") {
|
||||
println("Found it.")
|
||||
}
|
||||
matchMap := matchCaptureGroups(pattern, sanitizedLine)
|
||||
if value := matchMap[field]; value != "" {
|
||||
if postProcessor := postProcessors[field]; postProcessor != nil {
|
||||
@ -106,6 +104,49 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) {
|
||||
return pkgs, nil
|
||||
}
|
||||
|
||||
// renderUtf8 takes any string escaped string sub-sections from the ruby string and replaces those sections with the UTF8 runes.
|
||||
func renderUtf8(s string) string {
|
||||
pattern := regexp.MustCompile(`\\u(?P<unicode>[0-9A-F]{4,8})`)
|
||||
fullReplacement := replaceAllStringSubmatchFunc(pattern, s, func(unicodeSection []string) string {
|
||||
replacement := ""
|
||||
if len(unicodeSection) == 1 {
|
||||
return unicodeSection[0]
|
||||
}
|
||||
for idx, m := range unicodeSection {
|
||||
if idx == 0 {
|
||||
continue
|
||||
}
|
||||
value, err := strconv.ParseInt(m, 16, 64)
|
||||
if err != nil {
|
||||
// TODO: log?
|
||||
panic(err)
|
||||
//return unicodeSection[0]
|
||||
}
|
||||
replacement = strings.ReplaceAll(unicodeSection[0], "\\u"+m, string(rune(value)))
|
||||
}
|
||||
return replacement
|
||||
})
|
||||
return fullReplacement
|
||||
}
|
||||
|
||||
// replaceAllStringSubmatchFunc finds and replaces the given capture groups from the
|
||||
func replaceAllStringSubmatchFunc(re *regexp.Regexp, str string, repl func([]string) string) string {
|
||||
result := ""
|
||||
lastIndex := 0
|
||||
|
||||
for _, v := range re.FindAllSubmatchIndex([]byte(str), -1) {
|
||||
var groups []string
|
||||
for i := 0; i < len(v); i += 2 {
|
||||
groups = append(groups, str[v[i]:v[i+1]])
|
||||
}
|
||||
|
||||
result += str[lastIndex:v[0]] + repl(groups)
|
||||
lastIndex = v[1]
|
||||
}
|
||||
|
||||
return result + str[lastIndex:]
|
||||
}
|
||||
|
||||
// matchCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map.
|
||||
func matchCaptureGroups(regEx *regexp.Regexp, str string) map[string]string {
|
||||
match := regEx.FindStringSubmatch(str)
|
||||
|
||||
@ -16,9 +16,9 @@ func TestParseGemspec(t *testing.T) {
|
||||
Licenses: []string{"MIT"},
|
||||
Language: pkg.Ruby,
|
||||
Metadata: pkg.GemMetadata{
|
||||
Name: "bundler",
|
||||
Version: "2.1.4",
|
||||
Files: []string{"exe/bundle", "exe/bundler"},
|
||||
Name: "bundler",
|
||||
Version: "2.1.4",
|
||||
Files: []string{"exe/bundle", "exe/bundler"},
|
||||
Authors: []string{"André Arko", "Samuel Giddins", "Colby Swandale", "Hiroshi Shibata", "David Rodréguez", "Grey Baker", "Stephanie Morillo", "Chris Morris", "James Wen", "Tim Moore", "André Medeiros", "Jessica Lynn Suttles", "Terence Lee", "Carl Lerche", "Yehuda Katz"},
|
||||
Licenses: []string{"MIT"},
|
||||
},
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user