reduce number of open files while processing nested java archives (#227)

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2020-10-16 17:22:14 -04:00 committed by GitHub
parent c0b9d7854b
commit bb14f3b45b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 44 additions and 11 deletions

14
internal/file/opener.go Normal file
View File

@ -0,0 +1,14 @@
package file
import (
"io"
"os"
)
type Opener struct {
path string
}
func (o Opener) Open() (io.ReadCloser, error) {
return os.Open(o.path)
}

View File

@ -63,8 +63,8 @@ func TraverseFilesInZip(archivePath string, visitor func(*zip.File) error, paths
return nil
}
func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (map[string]io.Reader, error) {
results := make(map[string]io.Reader)
func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (map[string]Opener, error) {
results := make(map[string]Opener)
// don't allow for full traversal, only select traversal from given paths
if len(paths) == 0 {
@ -78,11 +78,21 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m
if err != nil {
return fmt.Errorf("unable to create temp file: %w", err)
}
// we shouldn't try and keep the tempfile open as the returned result may have several files, which takes up
// resources (leading to "too many open files"). Instead we'll return a file opener to the caller which
// provides a ReadCloser. It is up to the caller to handle closing the file explicitly.
defer tempFile.Close()
zippedFile, err := file.Open()
if err != nil {
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err)
}
defer func() {
err := zippedFile.Close()
if err != nil {
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.Name, archivePath, err)
}
}()
if file.FileInfo().IsDir() {
return fmt.Errorf("unable to extract directories, only files: %s", file.Name)
@ -103,12 +113,8 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m
return fmt.Errorf("unable to reset file pointer (%s): %w", tempFile.Name(), err)
}
results[file.Name] = tempFile
results[file.Name] = Opener{path: tempFile.Name()}
err = zippedFile.Close()
if err != nil {
return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err)
}
return nil
}

View File

@ -5,6 +5,8 @@ import (
"io"
"strings"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/file"
"github.com/anchore/syft/syft/cataloger/common"
@ -214,18 +216,28 @@ func (j *archiveParser) discoverPkgsFromNestedArchives(parentPkg *pkg.Package) (
}
// search and parse pom.properties files & fetch the contents
readers, err := file.ExtractFromZipToUniqueTempFile(j.archivePath, j.contentPath, j.fileManifest.GlobMatch(archiveFormatGlobs...)...)
openers, err := file.ExtractFromZipToUniqueTempFile(j.archivePath, j.contentPath, j.fileManifest.GlobMatch(archiveFormatGlobs...)...)
if err != nil {
return nil, fmt.Errorf("unable to extract files from zip: %w", err)
}
// discover nested artifacts
for archivePath, archiveReader := range readers {
nestedPath := fmt.Sprintf("%s:%s", j.virtualPath, archivePath)
nestedPkgs, err := parseJavaArchive(nestedPath, archiveReader)
for archivePath, archiveOpener := range openers {
archiveReadCloser, err := archiveOpener.Open()
if err != nil {
return nil, fmt.Errorf("unable to open archived file from tempdir: %w", err)
}
nestedPath := fmt.Sprintf("%s:%s", j.virtualPath, archivePath)
nestedPkgs, err := parseJavaArchive(nestedPath, archiveReadCloser)
if err != nil {
if closeErr := archiveReadCloser.Close(); closeErr != nil {
log.Errorf("unable to close archived file from tempdir: %+v", closeErr)
}
return nil, fmt.Errorf("unable to process nested java archive (%s): %w", archivePath, err)
}
if err = archiveReadCloser.Close(); err != nil {
return nil, fmt.Errorf("unable to close archived file from tempdir: %w", err)
}
// attach the parent package to all discovered packages that are not already associated with a java archive
for _, p := range nestedPkgs {

View File

@ -35,6 +35,7 @@ func saveArchiveToTmp(reader io.Reader) (string, string, func(), error) {
if err != nil {
return contentDir, "", cleanupFn, fmt.Errorf("unable to create archive: %w", err)
}
defer archiveFile.Close()
_, err = io.Copy(archiveFile, reader)
if err != nil {