add render unicode in gemspec parser

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2020-10-07 11:19:29 -04:00 committed by Toure Dunnon
parent 1f0f6fa3e5
commit 46c74865e5
2 changed files with 48 additions and 7 deletions

View File

@ -5,6 +5,7 @@ import (
"fmt"
"io"
"regexp"
"strconv"
"strings"
"github.com/mitchellh/mapstructure"
@ -62,18 +63,15 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) {
for scanner.Scan() {
line := scanner.Text()
// TODO: sanitize unicode? (see engine code)
sanitizedLine := strings.TrimSpace(line)
sanitizedLine = strings.ReplaceAll(sanitizedLine, ".freeze", "")
sanitizedLine = renderUtf8(sanitizedLine)
if sanitizedLine == "" {
continue
}
for field, pattern := range patterns {
if strings.Contains(sanitizedLine, "licenses") {
println("Found it.")
}
matchMap := matchCaptureGroups(pattern, sanitizedLine)
if value := matchMap[field]; value != "" {
if postProcessor := postProcessors[field]; postProcessor != nil {
@ -106,6 +104,49 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) {
return pkgs, nil
}
// renderUtf8 takes any string escaped string sub-sections from the ruby string and replaces those sections with the UTF8 runes.
func renderUtf8(s string) string {
pattern := regexp.MustCompile(`\\u(?P<unicode>[0-9A-F]{4,8})`)
fullReplacement := replaceAllStringSubmatchFunc(pattern, s, func(unicodeSection []string) string {
replacement := ""
if len(unicodeSection) == 1 {
return unicodeSection[0]
}
for idx, m := range unicodeSection {
if idx == 0 {
continue
}
value, err := strconv.ParseInt(m, 16, 64)
if err != nil {
// TODO: log?
panic(err)
//return unicodeSection[0]
}
replacement = strings.ReplaceAll(unicodeSection[0], "\\u"+m, string(rune(value)))
}
return replacement
})
return fullReplacement
}
// replaceAllStringSubmatchFunc finds and replaces the given capture groups from the
func replaceAllStringSubmatchFunc(re *regexp.Regexp, str string, repl func([]string) string) string {
result := ""
lastIndex := 0
for _, v := range re.FindAllSubmatchIndex([]byte(str), -1) {
var groups []string
for i := 0; i < len(v); i += 2 {
groups = append(groups, str[v[i]:v[i+1]])
}
result += str[lastIndex:v[0]] + repl(groups)
lastIndex = v[1]
}
return result + str[lastIndex:]
}
// matchCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map.
func matchCaptureGroups(regEx *regexp.Regexp, str string) map[string]string {
match := regEx.FindStringSubmatch(str)

View File

@ -16,9 +16,9 @@ func TestParseGemspec(t *testing.T) {
Licenses: []string{"MIT"},
Language: pkg.Ruby,
Metadata: pkg.GemMetadata{
Name: "bundler",
Version: "2.1.4",
Files: []string{"exe/bundle", "exe/bundler"},
Name: "bundler",
Version: "2.1.4",
Files: []string{"exe/bundle", "exe/bundler"},
Authors: []string{"André Arko", "Samuel Giddins", "Colby Swandale", "Hiroshi Shibata", "David Rodréguez", "Grey Baker", "Stephanie Morillo", "Chris Morris", "James Wen", "Tim Moore", "André Medeiros", "Jessica Lynn Suttles", "Terence Lee", "Carl Lerche", "Yehuda Katz"},
Licenses: []string{"MIT"},
},