add render unicode in gemspec parser

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2020-10-07 11:19:29 -04:00 committed by Toure Dunnon
parent 1f0f6fa3e5
commit 46c74865e5
2 changed files with 48 additions and 7 deletions

View File

@ -5,6 +5,7 @@ import (
"fmt" "fmt"
"io" "io"
"regexp" "regexp"
"strconv"
"strings" "strings"
"github.com/mitchellh/mapstructure" "github.com/mitchellh/mapstructure"
@ -62,18 +63,15 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) {
for scanner.Scan() { for scanner.Scan() {
line := scanner.Text() line := scanner.Text()
// TODO: sanitize unicode? (see engine code)
sanitizedLine := strings.TrimSpace(line) sanitizedLine := strings.TrimSpace(line)
sanitizedLine = strings.ReplaceAll(sanitizedLine, ".freeze", "") sanitizedLine = strings.ReplaceAll(sanitizedLine, ".freeze", "")
sanitizedLine = renderUtf8(sanitizedLine)
if sanitizedLine == "" { if sanitizedLine == "" {
continue continue
} }
for field, pattern := range patterns { for field, pattern := range patterns {
if strings.Contains(sanitizedLine, "licenses") {
println("Found it.")
}
matchMap := matchCaptureGroups(pattern, sanitizedLine) matchMap := matchCaptureGroups(pattern, sanitizedLine)
if value := matchMap[field]; value != "" { if value := matchMap[field]; value != "" {
if postProcessor := postProcessors[field]; postProcessor != nil { if postProcessor := postProcessors[field]; postProcessor != nil {
@ -106,6 +104,49 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) {
return pkgs, nil return pkgs, nil
} }
// renderUtf8 takes any string escaped string sub-sections from the ruby string and replaces those sections with the UTF8 runes.
func renderUtf8(s string) string {
pattern := regexp.MustCompile(`\\u(?P<unicode>[0-9A-F]{4,8})`)
fullReplacement := replaceAllStringSubmatchFunc(pattern, s, func(unicodeSection []string) string {
replacement := ""
if len(unicodeSection) == 1 {
return unicodeSection[0]
}
for idx, m := range unicodeSection {
if idx == 0 {
continue
}
value, err := strconv.ParseInt(m, 16, 64)
if err != nil {
// TODO: log?
panic(err)
//return unicodeSection[0]
}
replacement = strings.ReplaceAll(unicodeSection[0], "\\u"+m, string(rune(value)))
}
return replacement
})
return fullReplacement
}
// replaceAllStringSubmatchFunc finds and replaces the given capture groups from the
func replaceAllStringSubmatchFunc(re *regexp.Regexp, str string, repl func([]string) string) string {
result := ""
lastIndex := 0
for _, v := range re.FindAllSubmatchIndex([]byte(str), -1) {
var groups []string
for i := 0; i < len(v); i += 2 {
groups = append(groups, str[v[i]:v[i+1]])
}
result += str[lastIndex:v[0]] + repl(groups)
lastIndex = v[1]
}
return result + str[lastIndex:]
}
// matchCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map. // matchCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map.
func matchCaptureGroups(regEx *regexp.Regexp, str string) map[string]string { func matchCaptureGroups(regEx *regexp.Regexp, str string) map[string]string {
match := regEx.FindStringSubmatch(str) match := regEx.FindStringSubmatch(str)