diff --git a/syft/cataloger/bundler/parse_gemspec.go b/syft/cataloger/bundler/parse_gemspec.go index 00a3f5c70..11fa6c059 100644 --- a/syft/cataloger/bundler/parse_gemspec.go +++ b/syft/cataloger/bundler/parse_gemspec.go @@ -5,6 +5,7 @@ import ( "fmt" "io" "regexp" + "strconv" "strings" "github.com/mitchellh/mapstructure" @@ -62,18 +63,15 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) { for scanner.Scan() { line := scanner.Text() - // TODO: sanitize unicode? (see engine code) sanitizedLine := strings.TrimSpace(line) sanitizedLine = strings.ReplaceAll(sanitizedLine, ".freeze", "") + sanitizedLine = renderUtf8(sanitizedLine) if sanitizedLine == "" { continue } for field, pattern := range patterns { - if strings.Contains(sanitizedLine, "licenses") { - println("Found it.") - } matchMap := matchCaptureGroups(pattern, sanitizedLine) if value := matchMap[field]; value != "" { if postProcessor := postProcessors[field]; postProcessor != nil { @@ -106,6 +104,49 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) { return pkgs, nil } +// renderUtf8 takes any string escaped string sub-sections from the ruby string and replaces those sections with the UTF8 runes. +func renderUtf8(s string) string { + pattern := regexp.MustCompile(`\\u(?P[0-9A-F]{4,8})`) + fullReplacement := replaceAllStringSubmatchFunc(pattern, s, func(unicodeSection []string) string { + replacement := "" + if len(unicodeSection) == 1 { + return unicodeSection[0] + } + for idx, m := range unicodeSection { + if idx == 0 { + continue + } + value, err := strconv.ParseInt(m, 16, 64) + if err != nil { + // TODO: log? + panic(err) + //return unicodeSection[0] + } + replacement = strings.ReplaceAll(unicodeSection[0], "\\u"+m, string(rune(value))) + } + return replacement + }) + return fullReplacement +} + +// replaceAllStringSubmatchFunc finds and replaces the given capture groups from the +func replaceAllStringSubmatchFunc(re *regexp.Regexp, str string, repl func([]string) string) string { + result := "" + lastIndex := 0 + + for _, v := range re.FindAllSubmatchIndex([]byte(str), -1) { + var groups []string + for i := 0; i < len(v); i += 2 { + groups = append(groups, str[v[i]:v[i+1]]) + } + + result += str[lastIndex:v[0]] + repl(groups) + lastIndex = v[1] + } + + return result + str[lastIndex:] +} + // matchCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map. func matchCaptureGroups(regEx *regexp.Regexp, str string) map[string]string { match := regEx.FindStringSubmatch(str) diff --git a/syft/cataloger/bundler/parse_gemspec_test.go b/syft/cataloger/bundler/parse_gemspec_test.go index 11d1276f1..539372b53 100644 --- a/syft/cataloger/bundler/parse_gemspec_test.go +++ b/syft/cataloger/bundler/parse_gemspec_test.go @@ -16,9 +16,9 @@ func TestParseGemspec(t *testing.T) { Licenses: []string{"MIT"}, Language: pkg.Ruby, Metadata: pkg.GemMetadata{ - Name: "bundler", - Version: "2.1.4", - Files: []string{"exe/bundle", "exe/bundler"}, + Name: "bundler", + Version: "2.1.4", + Files: []string{"exe/bundle", "exe/bundler"}, Authors: []string{"André Arko", "Samuel Giddins", "Colby Swandale", "Hiroshi Shibata", "David Rodréguez", "Grey Baker", "Stephanie Morillo", "Chris Morris", "James Wen", "Tim Moore", "André Medeiros", "Jessica Lynn Suttles", "Terence Lee", "Carl Lerche", "Yehuda Katz"}, Licenses: []string{"MIT"}, },