From 93a7d2ee274834ce64cd2d4155b1aac84a549a74 Mon Sep 17 00:00:00 2001 From: William Murphy Date: Wed, 1 May 2024 12:58:17 -0400 Subject: [PATCH] fix: better clean up of file handles (#2823) * always close ELF cataloger file handles The elf-binary-package-cataloger does its own file IO to account for the possibility of a logical ELF package being broken across multiple physical files. However, this casued it to skip the normal invocation pattern in the generic cataloger code that prevented file leaks. Ensure this cataloger always closes its file handles. Signed-off-by: Will Murphy * defer closing of generic cataloger file handles Otherwise, a panicking cataloger could leak file handles. Signed-off-by: Will Murphy * add unit test for file closed on panic parser Signed-off-by: Will Murphy * make invoke parser a static function Signed-off-by: Will Murphy * push error logging down into invoke parser Signed-off-by: Will Murphy --------- Signed-off-by: Will Murphy --- .../cataloger/binary/elf_package_cataloger.go | 54 ++++++---- syft/pkg/cataloger/generic/cataloger.go | 30 ++++-- syft/pkg/cataloger/generic/cataloger_test.go | 101 ++++++++++++++++++ 3 files changed, 155 insertions(+), 30 deletions(-) diff --git a/syft/pkg/cataloger/binary/elf_package_cataloger.go b/syft/pkg/cataloger/binary/elf_package_cataloger.go index 94f656400..9dea999e6 100644 --- a/syft/pkg/cataloger/binary/elf_package_cataloger.go +++ b/syft/pkg/cataloger/binary/elf_package_cataloger.go @@ -6,6 +6,7 @@ import ( "encoding/json" "fmt" + "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/mimetype" "github.com/anchore/syft/syft/artifact" @@ -57,31 +58,13 @@ func (c *elfPackageCataloger) Catalog(_ context.Context, resolver file.Resolver) // first find all ELF binaries that have notes var notesByLocation = make(map[elfPackageKey][]elfBinaryPackageNotes) for _, location := range locations { - reader, err := resolver.FileContentsByLocation(location) + notes, key, err := parseElfPackageNotes(resolver, location, c) if err != nil { - return nil, nil, fmt.Errorf("unable to get binary contents %q: %w", location.Path(), err) + return nil, nil, err } - - notes, err := c.parseElfNotes(file.LocationReadCloser{ - Location: location, - ReadCloser: reader, - }) - if err != nil { - log.WithFields("file", location.Path(), "error", err).Trace("unable to parse ELF notes") - continue - } - if notes == nil { continue } - - notes.Location = location - key := elfPackageKey{ - Name: notes.Name, - Version: notes.Version, - PURL: notes.PURL, - CPE: notes.CPE, - } notesByLocation[key] = append(notesByLocation[key], *notes) } @@ -105,6 +88,37 @@ func (c *elfPackageCataloger) Catalog(_ context.Context, resolver file.Resolver) return pkgs, nil, nil } +func parseElfPackageNotes(resolver file.Resolver, location file.Location, c *elfPackageCataloger) (*elfBinaryPackageNotes, elfPackageKey, error) { + reader, err := resolver.FileContentsByLocation(location) + if err != nil { + return nil, elfPackageKey{}, fmt.Errorf("unable to get binary contents %q: %w", location.Path(), err) + } + defer internal.CloseAndLogError(reader, location.AccessPath) + + notes, err := c.parseElfNotes(file.LocationReadCloser{ + Location: location, + ReadCloser: reader, + }) + + if err != nil { + log.WithFields("file", location.Path(), "error", err).Trace("unable to parse ELF notes") + return nil, elfPackageKey{}, nil + } + + if notes == nil { + return nil, elfPackageKey{}, nil + } + + notes.Location = location + key := elfPackageKey{ + Name: notes.Name, + Version: notes.Version, + PURL: notes.PURL, + CPE: notes.CPE, + } + return notes, key, nil +} + func (c *elfPackageCataloger) parseElfNotes(reader file.LocationReadCloser) (*elfBinaryPackageNotes, error) { metadata, err := getELFNotes(reader) if err != nil { diff --git a/syft/pkg/cataloger/generic/cataloger.go b/syft/pkg/cataloger/generic/cataloger.go index 51b1cd337..49052ec88 100644 --- a/syft/pkg/cataloger/generic/cataloger.go +++ b/syft/pkg/cataloger/generic/cataloger.go @@ -3,6 +3,7 @@ package generic import ( "context" + "github.com/anchore/go-logger" "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/artifact" @@ -122,17 +123,9 @@ func (c *Cataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]pkg. log.WithFields("path", location.RealPath).Trace("parsing file contents") - contentReader, err := resolver.FileContentsByLocation(location) + discoveredPackages, discoveredRelationships, err := invokeParser(ctx, resolver, location, logger, parser, &env) if err != nil { - logger.WithFields("location", location.RealPath, "error", err).Warn("unable to fetch contents") - continue - } - - discoveredPackages, discoveredRelationships, err := parser(ctx, resolver, &env, file.NewLocationReadCloser(location, contentReader)) - internal.CloseAndLogError(contentReader, location.AccessPath) - if err != nil { - logger.WithFields("location", location.RealPath, "error", err).Warnf("cataloger failed") - continue + continue // logging is handled within invokeParser } for _, p := range discoveredPackages { @@ -145,6 +138,23 @@ func (c *Cataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]pkg. return packages, relationships, nil } +func invokeParser(ctx context.Context, resolver file.Resolver, location file.Location, logger logger.Logger, parser Parser, env *Environment) ([]pkg.Package, []artifact.Relationship, error) { + contentReader, err := resolver.FileContentsByLocation(location) + if err != nil { + logger.WithFields("location", location.RealPath, "error", err).Warn("unable to fetch contents") + return nil, nil, err + } + defer internal.CloseAndLogError(contentReader, location.AccessPath) + + discoveredPackages, discoveredRelationships, err := parser(ctx, resolver, env, file.NewLocationReadCloser(location, contentReader)) + if err != nil { + logger.WithFields("location", location.RealPath, "error", err).Warnf("cataloger failed") + return nil, nil, err + } + + return discoveredPackages, discoveredRelationships, nil +} + // selectFiles takes a set of file trees and resolves and file references of interest for future cataloging func (c *Cataloger) selectFiles(resolver file.Resolver) []request { var requests []request diff --git a/syft/pkg/cataloger/generic/cataloger_test.go b/syft/pkg/cataloger/generic/cataloger_test.go index 43d0dc801..c22113432 100644 --- a/syft/pkg/cataloger/generic/cataloger_test.go +++ b/syft/pkg/cataloger/generic/cataloger_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "io" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -86,3 +87,103 @@ func Test_Cataloger(t *testing.T) { } } } + +type spyReturningFileResolver struct { + m *file.MockResolver + s *spyingIoReadCloser +} + +type spyingIoReadCloser struct { + rc io.ReadCloser + closed bool +} + +func newSpyReturningFileResolver(s *spyingIoReadCloser, paths ...string) file.Resolver { + m := file.NewMockResolverForPaths(paths...) + return spyReturningFileResolver{ + m: m, + s: s, + } +} + +func (s *spyingIoReadCloser) Read(p []byte) (n int, err error) { + return s.Read(p) +} + +func (s *spyingIoReadCloser) Close() error { + s.closed = true + return s.rc.Close() +} + +var _ io.ReadCloser = (*spyingIoReadCloser)(nil) + +func (m spyReturningFileResolver) FileContentsByLocation(location file.Location) (io.ReadCloser, error) { + return m.s, nil +} + +func (m spyReturningFileResolver) HasPath(path string) bool { + return m.m.HasPath(path) +} + +func (m spyReturningFileResolver) FilesByPath(paths ...string) ([]file.Location, error) { + return m.m.FilesByPath(paths...) +} + +func (m spyReturningFileResolver) FilesByGlob(patterns ...string) ([]file.Location, error) { + return m.m.FilesByGlob(patterns...) +} + +func (m spyReturningFileResolver) FilesByMIMEType(types ...string) ([]file.Location, error) { + return m.m.FilesByMIMEType(types...) +} + +func (m spyReturningFileResolver) RelativeFileByPath(f file.Location, path string) *file.Location { + return m.m.RelativeFileByPath(f, path) +} + +func (m spyReturningFileResolver) AllLocations(ctx context.Context) <-chan file.Location { + return m.m.AllLocations(ctx) +} + +func (m spyReturningFileResolver) FileMetadataByLocation(location file.Location) (file.Metadata, error) { + return m.m.FileMetadataByLocation(location) +} + +var _ file.Resolver = (*spyReturningFileResolver)(nil) + +func TestClosesFileOnParserPanic(t *testing.T) { + rc := io.NopCloser(strings.NewReader("some string")) + spy := spyingIoReadCloser{ + rc: rc, + } + resolver := newSpyReturningFileResolver(&spy, "test-fixtures/another-path.txt") + ctx := context.TODO() + + processors := []processor{ + func(resolver file.Resolver, env Environment) []request { + return []request{ + { + Location: file.Location{ + LocationData: file.LocationData{ + Coordinates: file.Coordinates{}, + AccessPath: "/some/access/path", + }, + }, + Parser: func(context.Context, file.Resolver, *Environment, file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { + panic("panic!") + }, + }, + } + }, + } + + c := Cataloger{ + processor: processors, + upstreamCataloger: "unit-test-cataloger", + } + + assert.PanicsWithValue(t, "panic!", func() { + _, _, _ = c.Catalog(ctx, resolver) + }) + require.True(t, spy.closed) +}