mirror of
https://github.com/anchore/syft.git
synced 2025-11-18 08:53:15 +01:00
reduce number of open files while processing nested java archives (#227)
Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
parent
c0b9d7854b
commit
bb14f3b45b
14
internal/file/opener.go
Normal file
14
internal/file/opener.go
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
package file
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Opener struct {
|
||||||
|
path string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o Opener) Open() (io.ReadCloser, error) {
|
||||||
|
return os.Open(o.path)
|
||||||
|
}
|
||||||
@ -63,8 +63,8 @@ func TraverseFilesInZip(archivePath string, visitor func(*zip.File) error, paths
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (map[string]io.Reader, error) {
|
func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (map[string]Opener, error) {
|
||||||
results := make(map[string]io.Reader)
|
results := make(map[string]Opener)
|
||||||
|
|
||||||
// don't allow for full traversal, only select traversal from given paths
|
// don't allow for full traversal, only select traversal from given paths
|
||||||
if len(paths) == 0 {
|
if len(paths) == 0 {
|
||||||
@ -78,11 +78,21 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to create temp file: %w", err)
|
return fmt.Errorf("unable to create temp file: %w", err)
|
||||||
}
|
}
|
||||||
|
// we shouldn't try and keep the tempfile open as the returned result may have several files, which takes up
|
||||||
|
// resources (leading to "too many open files"). Instead we'll return a file opener to the caller which
|
||||||
|
// provides a ReadCloser. It is up to the caller to handle closing the file explicitly.
|
||||||
|
defer tempFile.Close()
|
||||||
|
|
||||||
zippedFile, err := file.Open()
|
zippedFile, err := file.Open()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err)
|
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err)
|
||||||
}
|
}
|
||||||
|
defer func() {
|
||||||
|
err := zippedFile.Close()
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.Name, archivePath, err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
if file.FileInfo().IsDir() {
|
if file.FileInfo().IsDir() {
|
||||||
return fmt.Errorf("unable to extract directories, only files: %s", file.Name)
|
return fmt.Errorf("unable to extract directories, only files: %s", file.Name)
|
||||||
@ -103,12 +113,8 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m
|
|||||||
return fmt.Errorf("unable to reset file pointer (%s): %w", tempFile.Name(), err)
|
return fmt.Errorf("unable to reset file pointer (%s): %w", tempFile.Name(), err)
|
||||||
}
|
}
|
||||||
|
|
||||||
results[file.Name] = tempFile
|
results[file.Name] = Opener{path: tempFile.Name()}
|
||||||
|
|
||||||
err = zippedFile.Close()
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err)
|
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -5,6 +5,8 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/internal/log"
|
||||||
|
|
||||||
"github.com/anchore/syft/internal"
|
"github.com/anchore/syft/internal"
|
||||||
"github.com/anchore/syft/internal/file"
|
"github.com/anchore/syft/internal/file"
|
||||||
"github.com/anchore/syft/syft/cataloger/common"
|
"github.com/anchore/syft/syft/cataloger/common"
|
||||||
@ -214,18 +216,28 @@ func (j *archiveParser) discoverPkgsFromNestedArchives(parentPkg *pkg.Package) (
|
|||||||
}
|
}
|
||||||
|
|
||||||
// search and parse pom.properties files & fetch the contents
|
// search and parse pom.properties files & fetch the contents
|
||||||
readers, err := file.ExtractFromZipToUniqueTempFile(j.archivePath, j.contentPath, j.fileManifest.GlobMatch(archiveFormatGlobs...)...)
|
openers, err := file.ExtractFromZipToUniqueTempFile(j.archivePath, j.contentPath, j.fileManifest.GlobMatch(archiveFormatGlobs...)...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("unable to extract files from zip: %w", err)
|
return nil, fmt.Errorf("unable to extract files from zip: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// discover nested artifacts
|
// discover nested artifacts
|
||||||
for archivePath, archiveReader := range readers {
|
for archivePath, archiveOpener := range openers {
|
||||||
nestedPath := fmt.Sprintf("%s:%s", j.virtualPath, archivePath)
|
archiveReadCloser, err := archiveOpener.Open()
|
||||||
nestedPkgs, err := parseJavaArchive(nestedPath, archiveReader)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to open archived file from tempdir: %w", err)
|
||||||
|
}
|
||||||
|
nestedPath := fmt.Sprintf("%s:%s", j.virtualPath, archivePath)
|
||||||
|
nestedPkgs, err := parseJavaArchive(nestedPath, archiveReadCloser)
|
||||||
|
if err != nil {
|
||||||
|
if closeErr := archiveReadCloser.Close(); closeErr != nil {
|
||||||
|
log.Errorf("unable to close archived file from tempdir: %+v", closeErr)
|
||||||
|
}
|
||||||
return nil, fmt.Errorf("unable to process nested java archive (%s): %w", archivePath, err)
|
return nil, fmt.Errorf("unable to process nested java archive (%s): %w", archivePath, err)
|
||||||
}
|
}
|
||||||
|
if err = archiveReadCloser.Close(); err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to close archived file from tempdir: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
// attach the parent package to all discovered packages that are not already associated with a java archive
|
// attach the parent package to all discovered packages that are not already associated with a java archive
|
||||||
for _, p := range nestedPkgs {
|
for _, p := range nestedPkgs {
|
||||||
|
|||||||
@ -35,6 +35,7 @@ func saveArchiveToTmp(reader io.Reader) (string, string, func(), error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return contentDir, "", cleanupFn, fmt.Errorf("unable to create archive: %w", err)
|
return contentDir, "", cleanupFn, fmt.Errorf("unable to create archive: %w", err)
|
||||||
}
|
}
|
||||||
|
defer archiveFile.Close()
|
||||||
|
|
||||||
_, err = io.Copy(archiveFile, reader)
|
_, err = io.Copy(archiveFile, reader)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user