diff --git a/internal/regex_helpers.go b/internal/regex_helpers.go index 7130f21a8..78a024c4b 100644 --- a/internal/regex_helpers.go +++ b/internal/regex_helpers.go @@ -1,6 +1,11 @@ package internal -import "regexp" +import ( + "io" + "regexp" +) + +const readerChunkSize = 1024 * 1024 // MatchNamedCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map. // This is only for the first match in the regex. Callers shouldn't be providing regexes with multiple capture groups with the same name. @@ -32,6 +37,89 @@ func MatchNamedCaptureGroups(regEx *regexp.Regexp, content string) map[string]st return results } +// MatchNamedCaptureGroupsFromReader matches named capture groups from a reader, assuming the pattern fits within +// 1.5x the reader chunk size (1MB * 1.5). +func MatchNamedCaptureGroupsFromReader(re *regexp.Regexp, r io.Reader) (map[string]string, error) { + results := make(map[string]string) + _, err := processReaderInChunks(r, readerChunkSize, matchNamedCaptureGroupsHandler(re, results)) + if err != nil { + return nil, err + } + if len(results) == 0 { + return nil, nil + } + return results, nil +} + +// MatchAnyFromReader matches any of the provided regular expressions from a reader, assuming the pattern fits within +// 1.5x the reader chunk size (1MB * 1.5). +func MatchAnyFromReader(r io.Reader, res ...*regexp.Regexp) (bool, error) { + return processReaderInChunks(r, readerChunkSize, matchAnyHandler(res)) +} + +func matchNamedCaptureGroupsHandler(re *regexp.Regexp, results map[string]string) func(data []byte) (bool, error) { + return func(data []byte) (bool, error) { + if match := re.FindSubmatch(data); match != nil { + groupNames := re.SubexpNames() + for i, name := range groupNames { + if i > 0 && name != "" { + results[name] = string(match[i]) + } + } + return true, nil + } + return false, nil + } +} + +func matchAnyHandler(res []*regexp.Regexp) func(data []byte) (bool, error) { + return func(data []byte) (bool, error) { + for _, re := range res { + if re.Match(data) { + return true, nil + } + } + return false, nil + } +} + +// processReaderInChunks reads from the provided reader in chunks and calls the provided handler with each chunk + portion of the previous neighboring chunk. +// Note that we only overlap the last half of the previous chunk with the current chunk to avoid missing matches that span chunk boundaries. +func processReaderInChunks(rdr io.Reader, chunkSize int, handler func(data []byte) (bool, error)) (bool, error) { + half := chunkSize / 2 + bufSize := chunkSize + half + buf := make([]byte, bufSize) + lastRead := 0 + + for { + offset := half + if lastRead < half { + offset = lastRead + } + start := half - offset + if lastRead > 0 { + copy(buf[start:], buf[half+offset:half+lastRead]) + } + n, err := rdr.Read(buf[half:]) + if err != nil { + break + } + + // process the combined data with the handler + matched, handlerErr := handler(buf[start : half+n]) + if handlerErr != nil { + return false, handlerErr + } + if matched { + return true, nil + } + + lastRead = n + } + + return false, nil +} + func isEmptyMap(m map[string]string) bool { if len(m) == 0 { return true diff --git a/internal/regex_helpers_test.go b/internal/regex_helpers_test.go index 1c4837753..a3fa4f61c 100644 --- a/internal/regex_helpers_test.go +++ b/internal/regex_helpers_test.go @@ -2,9 +2,11 @@ package internal import ( "regexp" + "strings" "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestMatchCaptureGroups(t *testing.T) { @@ -68,3 +70,149 @@ func TestMatchCaptureGroups(t *testing.T) { }) } } + +func TestMatchNamedCaptureGroupsFromReader(t *testing.T) { + tests := []struct { + name string + pattern string + input string + want map[string]string + wantErr require.ErrorAssertionFunc + }{ + { + name: "match single group", + pattern: `(?P[^1-9]+)`, + input: "key", + want: map[string]string{"key": "key"}, + wantErr: require.NoError, + }, + { + name: "match multiple groups", + pattern: `(?P[^1-9]+):(?P\w+)`, + input: "key:value", + want: map[string]string{"key": "key", "value": "value"}, + wantErr: require.NoError, + }, + { + name: "no match", + pattern: `(?P[^1-9]+)`, + input: "2345", + want: nil, + wantErr: require.NoError, + }, + { + name: "error empty reader", + pattern: `(?P\w+)`, + input: "", + want: nil, + wantErr: require.NoError, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + re := regexp.MustCompile(tt.pattern) + r := strings.NewReader(tt.input) + got, err := MatchNamedCaptureGroupsFromReader(re, r) + tt.wantErr(t, err) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestMatchAnyFromReader(t *testing.T) { + tests := []struct { + name string + input string + patterns []*regexp.Regexp + want bool + wantErr require.ErrorAssertionFunc + }{ + { + name: "match single pattern", + input: "hello world", + patterns: []*regexp.Regexp{regexp.MustCompile(`hello`)}, + want: true, + wantErr: require.NoError, + }, + { + name: "match multiple patterns", + input: "test case", + patterns: []*regexp.Regexp{regexp.MustCompile(`case`), regexp.MustCompile(`test`)}, + want: true, + wantErr: require.NoError, + }, + { + name: "no match", + input: "nothing here", + patterns: []*regexp.Regexp{regexp.MustCompile(`absent`)}, + want: false, + wantErr: require.NoError, + }, + { + name: "error empty reader", + input: "", + patterns: []*regexp.Regexp{regexp.MustCompile(`match`)}, + want: false, + wantErr: require.NoError, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r := strings.NewReader(tt.input) + got, err := MatchAnyFromReader(r, tt.patterns...) + tt.wantErr(t, err) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestProcessReaderInChunks_ChunkBoundaries(t *testing.T) { + tests := []struct { + name string + input string + chunkSize int + expectedCalls []string + returnOnChunk int + wantErr require.ErrorAssertionFunc + }{ + { + name: "go case", + input: "123456789012345", + chunkSize: 4, + returnOnChunk: 2, + expectedCalls: []string{"1234", "345678", "789012"}, + wantErr: require.NoError, + }, + { + name: "no match", + input: "123456789012345", + chunkSize: 4, + returnOnChunk: -1, + expectedCalls: []string{"1234", "345678", "789012", "12345"}, + wantErr: require.NoError, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var actualCalls []string + var current int + handler := func(data []byte) (bool, error) { + actualCalls = append(actualCalls, string(data)) + if current == tt.returnOnChunk { + return true, nil + } + current++ + return false, nil + } + r := strings.NewReader(tt.input) + got, err := processReaderInChunks(r, tt.chunkSize, handler) + tt.wantErr(t, err) + if tt.returnOnChunk == -1 { + assert.False(t, got) + } else { + assert.True(t, got) + } + assert.Equal(t, tt.expectedCalls, actualCalls) + }) + } +} diff --git a/syft/pkg/cataloger/binary/classifier.go b/syft/pkg/cataloger/binary/classifier.go index 490df82b3..523645b32 100644 --- a/syft/pkg/cataloger/binary/classifier.go +++ b/syft/pkg/cataloger/binary/classifier.go @@ -18,6 +18,7 @@ import ( "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/cpe" "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/internal/unionreader" "github.com/anchore/syft/syft/pkg" ) @@ -74,9 +75,9 @@ func (cfg Classifier) MarshalJSON() ([]byte, error) { type EvidenceMatcher func(classifier Classifier, context matcherContext) ([]pkg.Package, error) type matcherContext struct { - resolver file.Resolver - location file.Location - getContents func(resolver matcherContext) ([]byte, error) + resolver file.Resolver + location file.Location + getReader func(resolver matcherContext) (unionreader.UnionReader, error) } func evidenceMatchers(matchers ...EvidenceMatcher) EvidenceMatcher { @@ -124,12 +125,15 @@ func fileNameTemplateVersionMatcher(fileNamePattern string, contentTemplate stri return nil, fmt.Errorf("unable to compile rendered regex=%q: %w", patternBuf.String(), err) } - contents, err := getContents(context) + contents, err := getReader(context) if err != nil { return nil, fmt.Errorf("unable to get read contents for file: %w", err) } - matchMetadata := internal.MatchNamedCaptureGroups(tmplPattern, string(contents)) + matchMetadata, err := internal.MatchNamedCaptureGroupsFromReader(tmplPattern, contents) + if err != nil { + return nil, fmt.Errorf("unable to match version: %w", err) + } p := newClassifierPackage(classifier, context.location, matchMetadata) if p == nil { @@ -143,12 +147,15 @@ func fileNameTemplateVersionMatcher(fileNamePattern string, contentTemplate stri func FileContentsVersionMatcher(pattern string) EvidenceMatcher { pat := regexp.MustCompile(pattern) return func(classifier Classifier, context matcherContext) ([]pkg.Package, error) { - contents, err := getContents(context) + contents, err := getReader(context) if err != nil { return nil, fmt.Errorf("unable to get read contents for file: %w", err) } - matchMetadata := internal.MatchNamedCaptureGroups(pat, string(contents)) + matchMetadata, err := internal.MatchNamedCaptureGroupsFromReader(pat, contents) + if err != nil { + return nil, fmt.Errorf("unable to match version: %w", err) + } // Convert {major: 1, minor: 2, patch: 3} to "1.2.3" _, versionOk := matchMetadata["version"] @@ -183,14 +190,16 @@ func matchExcluding(matcher EvidenceMatcher, contentPatternsToExclude ...string) nonMatchPatterns = append(nonMatchPatterns, regexp.MustCompile(p)) } return func(classifier Classifier, context matcherContext) ([]pkg.Package, error) { - contents, err := getContents(context) + contents, err := getReader(context) if err != nil { return nil, fmt.Errorf("unable to get read contents for file: %w", err) } - for _, nonMatch := range nonMatchPatterns { - if nonMatch.Match(contents) { - return nil, nil - } + matches, err := internal.MatchAnyFromReader(contents, nonMatchPatterns...) + if err != nil { + return nil, fmt.Errorf("unable to match content: %w", err) + } + if matches { + return nil, nil } return matcher(classifier, context) } @@ -214,9 +223,9 @@ func sharedLibraryLookup(sharedLibraryPattern string, sharedLibraryMatcher Evide } for _, libraryLocation := range locations { newResolver := matcherContext{ - resolver: context.resolver, - location: libraryLocation, - getContents: context.getContents, + resolver: context.resolver, + location: libraryLocation, + getReader: context.getReader, } newResolver.location = libraryLocation pkgs, err := sharedLibraryMatcher(classifier, newResolver) @@ -253,23 +262,16 @@ func mustPURL(purl string) packageurl.PackageURL { return p } -func getContents(context matcherContext) ([]byte, error) { - if context.getContents != nil { - return context.getContents(context) +func getReader(context matcherContext) (unionreader.UnionReader, error) { + if context.getReader != nil { + return context.getReader(context) } - reader, err := context.resolver.FileContentsByLocation(context.location) + reader, err := context.resolver.FileContentsByLocation(context.location) //nolint:gocritic if err != nil { return nil, err } - defer internal.CloseAndLogError(reader, context.location.AccessPath) - // TODO: there may be room for improvement here, as this may use an excessive amount of memory. Alternate approach is to leverage a RuneReader. - contents, err := io.ReadAll(reader) - if err != nil { - return nil, fmt.Errorf("unable to get contents for file: %w", err) - } - - return contents, nil + return unionreader.GetUnionReader(reader) } // singleCPE returns a []cpe.CPE with Source: Generated based on the cpe string or panics if the @@ -287,14 +289,13 @@ func singleCPE(cpeString string, source ...cpe.Source) []cpe.CPE { // sharedLibraries returns a list of all shared libraries found within a binary, currently // supporting: elf, macho, and windows pe func sharedLibraries(context matcherContext) ([]string, error) { - contents, err := getContents(context) + contents, err := getReader(context) if err != nil { return nil, err } + defer internal.CloseAndLogError(contents, context.location.RealPath) - r := bytes.NewReader(contents) - - e, _ := elf.NewFile(r) + e, _ := elf.NewFile(contents) if e != nil { symbols, err := e.ImportedLibraries() if err != nil { @@ -302,8 +303,11 @@ func sharedLibraries(context matcherContext) ([]string, error) { } return symbols, nil } + if _, err := contents.Seek(0, io.SeekStart); err != nil { + return nil, fmt.Errorf("unable to seek to beginning of file: %w", err) + } - m, _ := macho.NewFile(r) + m, _ := macho.NewFile(contents) if m != nil { symbols, err := m.ImportedLibraries() if err != nil { @@ -311,8 +315,11 @@ func sharedLibraries(context matcherContext) ([]string, error) { } return symbols, nil } + if _, err := contents.Seek(0, io.SeekStart); err != nil { + return nil, fmt.Errorf("unable to seek to beginning of file: %w", err) + } - p, _ := pe.NewFile(r) + p, _ := pe.NewFile(contents) if p != nil { symbols, err := p.ImportedLibraries() if err != nil { @@ -320,6 +327,9 @@ func sharedLibraries(context matcherContext) ([]string, error) { } return symbols, nil } + if _, err := contents.Seek(0, io.SeekStart); err != nil { + return nil, fmt.Errorf("unable to seek to beginning of file: %w", err) + } return nil, nil } diff --git a/syft/pkg/cataloger/binary/classifier_test.go b/syft/pkg/cataloger/binary/classifier_test.go index a773bf6bd..445b5f0a0 100644 --- a/syft/pkg/cataloger/binary/classifier_test.go +++ b/syft/pkg/cataloger/binary/classifier_test.go @@ -1,6 +1,8 @@ package binary import ( + "bytes" + "io" "testing" "github.com/stretchr/testify/assert" @@ -9,6 +11,7 @@ import ( "github.com/anchore/packageurl-go" "github.com/anchore/syft/syft/cpe" "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/internal/unionreader" ) func Test_ClassifierCPEs(t *testing.T) { @@ -162,12 +165,12 @@ func TestFileContentsVersionMatcher(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - mockGetContent := func(context matcherContext) ([]byte, error) { - return []byte(tt.data), nil + mockGetContent := func(context matcherContext) (unionreader.UnionReader, error) { + return unionreader.GetUnionReader(io.NopCloser(bytes.NewBufferString(tt.data))) } fn := FileContentsVersionMatcher(tt.pattern) p, err := fn(Classifier{}, matcherContext{ - getContents: mockGetContent, + getReader: mockGetContent, }) if err != nil { diff --git a/syft/pkg/cataloger/binary/classifiers.go b/syft/pkg/cataloger/binary/classifiers.go index 2eb52408f..9bb623731 100644 --- a/syft/pkg/cataloger/binary/classifiers.go +++ b/syft/pkg/cataloger/binary/classifiers.go @@ -77,8 +77,12 @@ func DefaultClassifiers() []Classifier { Class: "redis-binary", FileGlob: "**/redis-server", EvidenceMatcher: evidenceMatchers( - FileContentsVersionMatcher(`(?s)payload %5.*?(?P\d.\d\.\d\d*)[a-z0-9]{12,15}-[0-9]{19}`), - FileContentsVersionMatcher(`(?s)\x00(?P\d.\d\.\d\d*)[a-z0-9]{12,15}-[0-9]{19}\x00.*?payload %5`), + // matches most recent versions of redis (~v7), e.g. "7.0.14buildkitsandbox-1702957741000000000" + FileContentsVersionMatcher(`[^\d](?P\d+.\d+\.\d+)buildkitsandbox-\d+`), + // matches against older versions of redis (~v3 - v6), e.g. "4.0.11841ce7054bd9-1542359302000000000" + FileContentsVersionMatcher(`[^\d](?P[0-9]+\.[0-9]+\.[0-9]+)\w{12}-\d+`), + // matches against older versions of redis (~v2), e.g. "Server started, Redis version 2.8.23" + FileContentsVersionMatcher(`Redis version (?P[0-9]+\.[0-9]+\.[0-9]+)`), ), Package: "redis", PURL: mustPURL("pkg:generic/redis@version"), diff --git a/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/2.8.23/linux-amd64/redis-server b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/2.8.23/linux-amd64/redis-server new file mode 100644 index 000000000..7169bc6b7 Binary files /dev/null and b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/2.8.23/linux-amd64/redis-server differ diff --git a/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/4.0.11/linux-amd64/redis-server b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/4.0.11/linux-amd64/redis-server new file mode 100644 index 000000000..c43f5f381 Binary files /dev/null and b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/4.0.11/linux-amd64/redis-server differ diff --git a/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/5.0.0/linux-amd64/redis-server b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/5.0.0/linux-amd64/redis-server index d301ebb18..522336dee 100644 Binary files a/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/5.0.0/linux-amd64/redis-server and b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/5.0.0/linux-amd64/redis-server differ diff --git a/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/6.0.16/linux-amd64/redis-server b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/6.0.16/linux-amd64/redis-server index a0d038ecd..4dd832cc9 100644 Binary files a/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/6.0.16/linux-amd64/redis-server and b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/6.0.16/linux-amd64/redis-server differ diff --git a/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/7.0.0/linux-amd64/redis-server b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/7.0.0/linux-amd64/redis-server index 8d2a3e7d3..2dbea6982 100644 Binary files a/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/7.0.0/linux-amd64/redis-server and b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/7.0.0/linux-amd64/redis-server differ diff --git a/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/7.0.14/linux-amd64/redis-server b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/7.0.14/linux-amd64/redis-server index 7675053c1..631acb086 100644 Binary files a/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/7.0.14/linux-amd64/redis-server and b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/7.0.14/linux-amd64/redis-server differ diff --git a/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/7.2.3/linux-amd64/redis-server b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/7.2.3/linux-amd64/redis-server new file mode 100644 index 000000000..38cead012 Binary files /dev/null and b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/7.2.3/linux-amd64/redis-server differ diff --git a/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/7.2.3/linux-arm64/redis-server b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/7.2.3/linux-arm64/redis-server new file mode 100644 index 000000000..aea46692a Binary files /dev/null and b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/7.2.3/linux-arm64/redis-server differ diff --git a/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/7.2.5/linux-unknown-454d5f333836/redis-server b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/7.2.5/linux-unknown-454d5f333836/redis-server new file mode 100644 index 000000000..4b9386b79 Binary files /dev/null and b/syft/pkg/cataloger/binary/test-fixtures/classifiers/snippets/redis-server/7.2.5/linux-unknown-454d5f333836/redis-server differ diff --git a/syft/pkg/cataloger/golang/parse_go_binary.go b/syft/pkg/cataloger/golang/parse_go_binary.go index b0fe56a4e..187649565 100644 --- a/syft/pkg/cataloger/golang/parse_go_binary.go +++ b/syft/pkg/cataloger/golang/parse_go_binary.go @@ -262,12 +262,11 @@ func (c *goBinaryCataloger) findMainModuleVersion(metadata *pkg.GolangBinaryBuil } func extractVersionFromContents(reader io.Reader) string { - contents, err := io.ReadAll(reader) + matchMetadata, err := internal.MatchNamedCaptureGroupsFromReader(semverPattern, reader) if err != nil { - log.WithFields("error", err).Trace("unable to read from go binary reader") + log.WithFields("error", err).Trace("unable to extract version from go binary reader") return "" } - matchMetadata := internal.MatchNamedCaptureGroups(semverPattern, string(contents)) version, ok := matchMetadata["version"] if ok {