mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 16:33:21 +01:00
add tests around MatchNamedCaptureGroups + rename
Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
parent
66ebe49a04
commit
5743e32e02
@ -1,15 +0,0 @@
|
|||||||
package internal
|
|
||||||
|
|
||||||
import "regexp"
|
|
||||||
|
|
||||||
// MatchCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map.
|
|
||||||
func MatchCaptureGroups(regEx *regexp.Regexp, str string) map[string]string {
|
|
||||||
match := regEx.FindStringSubmatch(str)
|
|
||||||
results := make(map[string]string)
|
|
||||||
for i, name := range regEx.SubexpNames() {
|
|
||||||
if i > 0 && i <= len(match) {
|
|
||||||
results[name] = match[i]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return results
|
|
||||||
}
|
|
||||||
39
internal/regex_helpers.go
Normal file
39
internal/regex_helpers.go
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
package internal
|
||||||
|
|
||||||
|
import "regexp"
|
||||||
|
|
||||||
|
// MatchNamedCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map.
|
||||||
|
// Note: this is only for the first match in the regex.
|
||||||
|
func MatchNamedCaptureGroups(regEx *regexp.Regexp, content string) map[string]string {
|
||||||
|
// note: we are looking across all matches and stopping on the first non-empty match. Why? Take the following example:
|
||||||
|
// input: "cool something to match against" pattern: `((?P<name>match) (?P<version>against))?`. Since the pattern is
|
||||||
|
// encapsulated in an optional capture group, there will be results for each character, but the results will match
|
||||||
|
// on nothing. The only "true" match will be at the end ("match against").
|
||||||
|
allMatches := regEx.FindAllStringSubmatch(content, -1)
|
||||||
|
for matchIdx, match := range allMatches {
|
||||||
|
// fill a candidate results map with named capture group results, accepting empty values, but not groups with
|
||||||
|
// no names
|
||||||
|
results := make(map[string]string)
|
||||||
|
for nameIdx, name := range regEx.SubexpNames() {
|
||||||
|
if nameIdx <= len(match) && len(name) > 0 {
|
||||||
|
results[name] = match[nameIdx]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// note: since we are looking for the first best potential match we should stop when we find the first one
|
||||||
|
// with non-empty results.
|
||||||
|
if len(results) > 0 {
|
||||||
|
foundNonEmptyValue := false
|
||||||
|
for _, value := range results {
|
||||||
|
if value != "" {
|
||||||
|
foundNonEmptyValue = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// return the first non-empty result, or if this is the last match, the results that were found.
|
||||||
|
if foundNonEmptyValue || matchIdx == len(allMatches)-1 {
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
70
internal/regex_helpers_test.go
Normal file
70
internal/regex_helpers_test.go
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
package internal
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestMatchCaptureGroups(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
input string
|
||||||
|
pattern string
|
||||||
|
expected map[string]string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "go-case",
|
||||||
|
input: "match this thing",
|
||||||
|
pattern: `(?P<name>match).*(?P<version>thing)`,
|
||||||
|
expected: map[string]string{
|
||||||
|
"name": "match",
|
||||||
|
"version": "thing",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "only matches the first instance",
|
||||||
|
input: "match this thing batch another think",
|
||||||
|
pattern: `(?P<name>[mb]atch).*?(?P<version>thin[gk])`,
|
||||||
|
expected: map[string]string{
|
||||||
|
"name": "match",
|
||||||
|
"version": "thing",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "nested capture groups",
|
||||||
|
input: "cool something to match against",
|
||||||
|
pattern: `((?P<name>match) (?P<version>against))`,
|
||||||
|
expected: map[string]string{
|
||||||
|
"name": "match",
|
||||||
|
"version": "against",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "nested optional capture groups",
|
||||||
|
input: "cool something to match against",
|
||||||
|
pattern: `((?P<name>match) (?P<version>against))?`,
|
||||||
|
expected: map[string]string{
|
||||||
|
"name": "match",
|
||||||
|
"version": "against",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "nested optional capture groups with larger match",
|
||||||
|
input: "cool something to match against match never",
|
||||||
|
pattern: `.*?((?P<name>match) (?P<version>(against|never)))?`,
|
||||||
|
expected: map[string]string{
|
||||||
|
"name": "match",
|
||||||
|
"version": "against",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
actual := MatchNamedCaptureGroups(regexp.MustCompile(test.pattern), test.input)
|
||||||
|
assert.Equal(t, test.expected, actual)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -21,7 +21,7 @@ func parseLicensesFromCopyright(reader io.Reader) []string {
|
|||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Text()
|
line := scanner.Text()
|
||||||
|
|
||||||
matchesByGroup := internal.MatchCaptureGroups(licensePattern, line)
|
matchesByGroup := internal.MatchNamedCaptureGroups(licensePattern, line)
|
||||||
if len(matchesByGroup) > 0 {
|
if len(matchesByGroup) > 0 {
|
||||||
candidate, ok := matchesByGroup["license"]
|
candidate, ok := matchesByGroup["license"]
|
||||||
if !ok {
|
if !ok {
|
||||||
|
|||||||
@ -145,7 +145,7 @@ func extractAllFields(reader *bufio.Reader) (map[string]interface{}, error) {
|
|||||||
// of the "<name>" form, then return name and nil
|
// of the "<name>" form, then return name and nil
|
||||||
func extractSourceVersion(source string) (string, string) {
|
func extractSourceVersion(source string) (string, string) {
|
||||||
// special handling for the Source field since it has formatted data
|
// special handling for the Source field since it has formatted data
|
||||||
match := internal.MatchCaptureGroups(sourceRegexp, source)
|
match := internal.MatchNamedCaptureGroups(sourceRegexp, source)
|
||||||
return match["name"], match["version"]
|
return match["name"], match["version"]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -63,7 +63,7 @@ func (a *Author) UnmarshalJSON(b []byte) error {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// parse out "name <email> (url)" into an Author struct
|
// parse out "name <email> (url)" into an Author struct
|
||||||
fields = internal.MatchCaptureGroups(authorPattern, authorStr)
|
fields = internal.MatchNamedCaptureGroups(authorPattern, authorStr)
|
||||||
}
|
}
|
||||||
|
|
||||||
// translate the map into a structure
|
// translate the map into a structure
|
||||||
|
|||||||
@ -77,7 +77,7 @@ func parseGemSpecEntries(_ string, reader io.Reader) ([]pkg.Package, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for field, pattern := range patterns {
|
for field, pattern := range patterns {
|
||||||
matchMap := internal.MatchCaptureGroups(pattern, sanitizedLine)
|
matchMap := internal.MatchNamedCaptureGroups(pattern, sanitizedLine)
|
||||||
if value := matchMap[field]; value != "" {
|
if value := matchMap[field]; value != "" {
|
||||||
if postProcessor := postProcessors[field]; postProcessor != nil {
|
if postProcessor := postProcessors[field]; postProcessor != nil {
|
||||||
fields[field] = postProcessor(value)
|
fields[field] = postProcessor(value)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user