diff --git a/internal/file/opener.go b/internal/file/opener.go new file mode 100644 index 000000000..c79e2a845 --- /dev/null +++ b/internal/file/opener.go @@ -0,0 +1,14 @@ +package file + +import ( + "io" + "os" +) + +type Opener struct { + path string +} + +func (o Opener) Open() (io.ReadCloser, error) { + return os.Open(o.path) +} diff --git a/internal/file/zip_file_traversal.go b/internal/file/zip_file_traversal.go index 46f103ad0..0f8fe149e 100644 --- a/internal/file/zip_file_traversal.go +++ b/internal/file/zip_file_traversal.go @@ -63,8 +63,8 @@ func TraverseFilesInZip(archivePath string, visitor func(*zip.File) error, paths return nil } -func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (map[string]io.Reader, error) { - results := make(map[string]io.Reader) +func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (map[string]Opener, error) { + results := make(map[string]Opener) // don't allow for full traversal, only select traversal from given paths if len(paths) == 0 { @@ -78,11 +78,21 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m if err != nil { return fmt.Errorf("unable to create temp file: %w", err) } + // we shouldn't try and keep the tempfile open as the returned result may have several files, which takes up + // resources (leading to "too many open files"). Instead we'll return a file opener to the caller which + // provides a ReadCloser. It is up to the caller to handle closing the file explicitly. + defer tempFile.Close() zippedFile, err := file.Open() if err != nil { return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err) } + defer func() { + err := zippedFile.Close() + if err != nil { + log.Errorf("unable to close source file=%q from zip=%q: %+v", file.Name, archivePath, err) + } + }() if file.FileInfo().IsDir() { return fmt.Errorf("unable to extract directories, only files: %s", file.Name) @@ -103,12 +113,8 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m return fmt.Errorf("unable to reset file pointer (%s): %w", tempFile.Name(), err) } - results[file.Name] = tempFile + results[file.Name] = Opener{path: tempFile.Name()} - err = zippedFile.Close() - if err != nil { - return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err) - } return nil } diff --git a/syft/cataloger/java/archive_parser.go b/syft/cataloger/java/archive_parser.go index adb699d8c..8681ffe57 100644 --- a/syft/cataloger/java/archive_parser.go +++ b/syft/cataloger/java/archive_parser.go @@ -5,6 +5,8 @@ import ( "io" "strings" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/file" "github.com/anchore/syft/syft/cataloger/common" @@ -214,18 +216,28 @@ func (j *archiveParser) discoverPkgsFromNestedArchives(parentPkg *pkg.Package) ( } // search and parse pom.properties files & fetch the contents - readers, err := file.ExtractFromZipToUniqueTempFile(j.archivePath, j.contentPath, j.fileManifest.GlobMatch(archiveFormatGlobs...)...) + openers, err := file.ExtractFromZipToUniqueTempFile(j.archivePath, j.contentPath, j.fileManifest.GlobMatch(archiveFormatGlobs...)...) if err != nil { return nil, fmt.Errorf("unable to extract files from zip: %w", err) } // discover nested artifacts - for archivePath, archiveReader := range readers { - nestedPath := fmt.Sprintf("%s:%s", j.virtualPath, archivePath) - nestedPkgs, err := parseJavaArchive(nestedPath, archiveReader) + for archivePath, archiveOpener := range openers { + archiveReadCloser, err := archiveOpener.Open() if err != nil { + return nil, fmt.Errorf("unable to open archived file from tempdir: %w", err) + } + nestedPath := fmt.Sprintf("%s:%s", j.virtualPath, archivePath) + nestedPkgs, err := parseJavaArchive(nestedPath, archiveReadCloser) + if err != nil { + if closeErr := archiveReadCloser.Close(); closeErr != nil { + log.Errorf("unable to close archived file from tempdir: %+v", closeErr) + } return nil, fmt.Errorf("unable to process nested java archive (%s): %w", archivePath, err) } + if err = archiveReadCloser.Close(); err != nil { + return nil, fmt.Errorf("unable to close archived file from tempdir: %w", err) + } // attach the parent package to all discovered packages that are not already associated with a java archive for _, p := range nestedPkgs { diff --git a/syft/cataloger/java/save_archive_to_tmp.go b/syft/cataloger/java/save_archive_to_tmp.go index 699bdf84c..69d786693 100644 --- a/syft/cataloger/java/save_archive_to_tmp.go +++ b/syft/cataloger/java/save_archive_to_tmp.go @@ -35,6 +35,7 @@ func saveArchiveToTmp(reader io.Reader) (string, string, func(), error) { if err != nil { return contentDir, "", cleanupFn, fmt.Errorf("unable to create archive: %w", err) } + defer archiveFile.Close() _, err = io.Copy(archiveFile, reader) if err != nil {