From 17a66f0186b7b6cd1d9da0e65aeec49bb6614e41 Mon Sep 17 00:00:00 2001 From: Kudryavcev Nikolay Date: Fri, 27 Jun 2025 00:29:35 +0300 Subject: [PATCH] upgrade deprecated library for archiving Signed-off-by: Kudryavcev Nikolay --- go.mod | 3 - go.sum | 6 - internal/file/tar_file_traversal.go | 50 +++- internal/file/zip_file_manifest.go | 20 +- internal/file/zip_file_manifest_test.go | 7 +- internal/file/zip_file_traversal.go | 107 ++++---- internal/file/zip_file_traversal_test.go | 5 +- internal/file/zip_read_closer.go | 229 ------------------ internal/file/zip_read_closer_test.go | 50 ---- internal/task/unknowns_tasks.go | 7 +- syft/format/github/internal/model/model.go | 7 +- syft/pkg/cataloger/java/archive_parser.go | 24 +- .../java/tar_wrapped_archive_parser.go | 2 +- .../java/zip_wrapped_archive_parser.go | 2 +- syft/source/filesource/file_source.go | 59 +++-- 15 files changed, 180 insertions(+), 398 deletions(-) delete mode 100644 internal/file/zip_read_closer.go delete mode 100644 internal/file/zip_read_closer_test.go diff --git a/go.mod b/go.mod index b8fc4bf92..da2570596 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,6 @@ require ( github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d github.com/acobaugh/osrelease v0.1.0 github.com/adrg/xdg v0.5.3 - github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51 github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9 github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716 github.com/anchore/fangs v0.0.0-20250319222917-446a1e748ec2 @@ -156,7 +155,6 @@ require ( github.com/goccy/go-yaml v1.18.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect - github.com/golang/snappy v0.0.4 // indirect github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect @@ -192,7 +190,6 @@ require ( github.com/muesli/cancelreader v0.2.2 // indirect github.com/muesli/termenv v0.16.0 // indirect github.com/ncruces/go-strftime v0.1.9 // indirect - github.com/nwaples/rardecode v1.1.3 // indirect github.com/nwaples/rardecode/v2 v2.1.0 // indirect github.com/opencontainers/image-spec v1.1.1 // indirect github.com/opencontainers/runtime-spec v1.1.0 // indirect diff --git a/go.sum b/go.sum index 60775755f..a8bd3e1e7 100644 --- a/go.sum +++ b/go.sum @@ -94,8 +94,6 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= -github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51 h1:yhk+P8lF3ZiROjmaVRao9WGTRo4b/wYjoKEiAHWrKwc= -github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51/go.mod h1:nwuGSd7aZp0rtYt79YggCGafz1RYsclE7pi3fhLwvuw= github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9 h1:p0ZIe0htYOX284Y4axJaGBvXHU0VCCzLN5Wf5XbKStU= github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9/go.mod h1:3ZsFB9tzW3vl4gEiUeuSOMDnwroWxIxJelOOHUp8dSw= github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716 h1:2sIdYJlQESEnyk3Y0WD2vXWW5eD2iMz9Ev8fj1Z8LNA= @@ -410,8 +408,6 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaS github.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx4u74HPM= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= -github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -660,8 +656,6 @@ github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1 h1:kpt9ZfKcm+ github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1/go.mod h1:qgCw4bBKZX8qMgGeEZzGFVT3notl42dBjNqO2jut0M0= github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249 h1:NHrXEjTNQY7P0Zfx1aMrNhpgxHmow66XQtm0aQLY0AE= github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8= -github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc= -github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= github.com/nwaples/rardecode/v2 v2.1.0 h1:JQl9ZoBPDy+nIZGb1mx8+anfHp/LV3NE2MjMiv0ct/U= github.com/nwaples/rardecode/v2 v2.1.0/go.mod h1:7uz379lSxPe6j9nvzxUZ+n7mnJNgjsRNb6IbvGVHRmw= github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= diff --git a/internal/file/tar_file_traversal.go b/internal/file/tar_file_traversal.go index 7d211168a..91278ac27 100644 --- a/internal/file/tar_file_traversal.go +++ b/internal/file/tar_file_traversal.go @@ -1,17 +1,43 @@ package file import ( + "context" "fmt" "os" "path/filepath" + "github.com/anchore/syft/internal/log" "github.com/bmatcuk/doublestar/v4" - - "github.com/anchore/archiver/v3" + "github.com/mholt/archives" ) +// TraverseFilesInTar enumerates all paths stored within a tar archive using the visitor pattern. +func TraverseFilesInTar(ctx context.Context, archivePath string, visitor archives.FileHandler) error { + tarReader, err := os.Open(archivePath) + if err != nil { + return fmt.Errorf("unable to open tar archive (%s): %w", archivePath, err) + } + defer func() { + if err := tarReader.Close(); err != nil { + log.Errorf("unable to close tar archive (%s): %+v", archivePath, err) + } + }() + + format, _, err := archives.Identify(ctx, archivePath, nil) + if err != nil { + return fmt.Errorf("failed to identify tar compression format: %w", err) + } + + extractor, ok := format.(archives.Extractor) + if !ok { + return fmt.Errorf("file format does not support extraction: %s", archivePath) + } + + return extractor.Extract(ctx, tarReader, visitor) +} + // ExtractGlobsFromTarToUniqueTempFile extracts paths matching the given globs within the given archive to a temporary directory, returning file openers for each file extracted. -func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...string) (map[string]Opener, error) { +func ExtractGlobsFromTarToUniqueTempFile(ctx context.Context, archivePath, dir string, globs ...string) (map[string]Opener, error) { results := make(map[string]Opener) // don't allow for full traversal, only select traversal from given paths @@ -19,9 +45,7 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin return results, nil } - visitor := func(file archiver.File) error { - defer file.Close() - + visitor := func(ctx context.Context, file archives.FileInfo) error { // ignore directories if file.IsDir() { return nil @@ -43,7 +67,17 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin // provides a ReadCloser. It is up to the caller to handle closing the file explicitly. defer tempFile.Close() - if err := safeCopy(tempFile, file.ReadCloser); err != nil { + packedFile, err := file.Open() + if err != nil { + return fmt.Errorf("unable to read file=%q from tar=%q: %w", file.NameInArchive, archivePath, err) + } + defer func() { + if err := packedFile.Close(); err != nil { + log.Errorf("unable to close source file=%q from tar=%q: %+v", file.NameInArchive, archivePath, err) + } + }() + + if err := safeCopy(tempFile, packedFile); err != nil { return fmt.Errorf("unable to copy source=%q for tar=%q: %w", file.Name(), archivePath, err) } @@ -52,7 +86,7 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin return nil } - return results, archiver.Walk(archivePath, visitor) + return results, TraverseFilesInTar(ctx, archivePath, visitor) } func matchesAnyGlob(name string, globs ...string) bool { diff --git a/internal/file/zip_file_manifest.go b/internal/file/zip_file_manifest.go index 346e661c6..ef0cf0ede 100644 --- a/internal/file/zip_file_manifest.go +++ b/internal/file/zip_file_manifest.go @@ -1,35 +1,39 @@ package file import ( + "context" "os" "sort" "strings" - "github.com/scylladb/go-set/strset" - "github.com/anchore/syft/internal/log" + "github.com/mholt/archives" + "github.com/scylladb/go-set/strset" ) // ZipFileManifest is a collection of paths and their file metadata. type ZipFileManifest map[string]os.FileInfo // NewZipFileManifest creates and returns a new ZipFileManifest populated with path and metadata from the given zip archive path. -func NewZipFileManifest(archivePath string) (ZipFileManifest, error) { - zipReader, err := OpenZip(archivePath) +func NewZipFileManifest(ctx context.Context, archivePath string) (ZipFileManifest, error) { + zipReader, err := os.Open(archivePath) manifest := make(ZipFileManifest) if err != nil { log.Debugf("unable to open zip archive (%s): %v", archivePath, err) return manifest, err } defer func() { - err = zipReader.Close() - if err != nil { + if err = zipReader.Close(); err != nil { log.Debugf("unable to close zip archive (%s): %+v", archivePath, err) } }() - for _, file := range zipReader.File { - manifest.Add(file.Name, file.FileInfo()) + err = archives.Zip{}.Extract(ctx, zipReader, func(ctx context.Context, file archives.FileInfo) error { + manifest.Add(file.NameInArchive, file.FileInfo) + return nil + }) + if err != nil { + return manifest, err } return manifest, nil } diff --git a/internal/file/zip_file_manifest_test.go b/internal/file/zip_file_manifest_test.go index 75d445228..9ebe42224 100644 --- a/internal/file/zip_file_manifest_test.go +++ b/internal/file/zip_file_manifest_test.go @@ -4,6 +4,7 @@ package file import ( + "context" "encoding/json" "os" "path" @@ -24,7 +25,7 @@ func TestNewZipFileManifest(t *testing.T) { archiveFilePath := setupZipFileTest(t, sourceDirPath, false) - actual, err := NewZipFileManifest(archiveFilePath) + actual, err := NewZipFileManifest(context.Background(), archiveFilePath) if err != nil { t.Fatalf("unable to extract from unzip archive: %+v", err) } @@ -59,7 +60,7 @@ func TestNewZip64FileManifest(t *testing.T) { sourceDirPath := path.Join(cwd, "test-fixtures", "zip-source") archiveFilePath := setupZipFileTest(t, sourceDirPath, true) - actual, err := NewZipFileManifest(archiveFilePath) + actual, err := NewZipFileManifest(context.Background(), archiveFilePath) if err != nil { t.Fatalf("unable to extract from unzip archive: %+v", err) } @@ -99,7 +100,7 @@ func TestZipFileManifest_GlobMatch(t *testing.T) { archiveFilePath := setupZipFileTest(t, sourceDirPath, false) - z, err := NewZipFileManifest(archiveFilePath) + z, err := NewZipFileManifest(context.Background(), archiveFilePath) if err != nil { t.Fatalf("unable to extract from unzip archive: %+v", err) } diff --git a/internal/file/zip_file_traversal.go b/internal/file/zip_file_traversal.go index 1b712eff5..d5b019b81 100644 --- a/internal/file/zip_file_traversal.go +++ b/internal/file/zip_file_traversal.go @@ -1,14 +1,15 @@ package file import ( - "archive/zip" "bytes" + "context" "fmt" "os" "path/filepath" "strings" "github.com/anchore/syft/internal/log" + "github.com/mholt/archives" ) const ( @@ -39,38 +40,34 @@ func newZipTraverseRequest(paths ...string) zipTraversalRequest { } // TraverseFilesInZip enumerates all paths stored within a zip archive using the visitor pattern. -func TraverseFilesInZip(archivePath string, visitor func(*zip.File) error, paths ...string) error { +func TraverseFilesInZip(ctx context.Context, archivePath string, visitor archives.FileHandler, paths ...string) error { request := newZipTraverseRequest(paths...) - zipReader, err := OpenZip(archivePath) + zipReader, err := os.Open(archivePath) if err != nil { return fmt.Errorf("unable to open zip archive (%s): %w", archivePath, err) } defer func() { - err = zipReader.Close() - if err != nil { + if err := zipReader.Close(); err != nil { log.Errorf("unable to close zip archive (%s): %+v", archivePath, err) } }() - for _, file := range zipReader.File { + return archives.Zip{}.Extract(ctx, zipReader, func(ctx context.Context, file archives.FileInfo) error { // if no paths are given then assume that all files should be traversed if len(paths) > 0 { - if _, ok := request[file.Name]; !ok { + if _, ok := request[file.NameInArchive]; !ok { // this file path is not of interest - continue + return nil } } - if err = visitor(file); err != nil { - return err - } - } - return nil + return visitor(ctx, file) + }) } // ExtractFromZipToUniqueTempFile extracts select paths for the given archive to a temporary directory, returning file openers for each file extracted. -func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (map[string]Opener, error) { +func ExtractFromZipToUniqueTempFile(ctx context.Context, archivePath, dir string, paths ...string) (map[string]Opener, error) { results := make(map[string]Opener) // don't allow for full traversal, only select traversal from given paths @@ -78,9 +75,8 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m return results, nil } - visitor := func(file *zip.File) error { - tempfilePrefix := filepath.Base(filepath.Clean(file.Name)) + "-" - + visitor := func(ctx context.Context, file archives.FileInfo) error { + tempfilePrefix := filepath.Base(filepath.Clean(file.NameInArchive)) + "-" tempFile, err := os.CreateTemp(dir, tempfilePrefix) if err != nil { return fmt.Errorf("unable to create temp file: %w", err) @@ -92,33 +88,32 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m zippedFile, err := file.Open() if err != nil { - return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err) + return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err) } defer func() { - err := zippedFile.Close() - if err != nil { - log.Errorf("unable to close source file=%q from zip=%q: %+v", file.Name, archivePath, err) + if err := zippedFile.Close(); err != nil { + log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err) } }() - if file.FileInfo().IsDir() { - return fmt.Errorf("unable to extract directories, only files: %s", file.Name) + if file.IsDir() { + return fmt.Errorf("unable to extract directories, only files: %s", file.NameInArchive) } if err := safeCopy(tempFile, zippedFile); err != nil { - return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err) + return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.NameInArchive, archivePath, err) } - results[file.Name] = Opener{path: tempFile.Name()} + results[file.NameInArchive] = Opener{path: tempFile.Name()} return nil } - return results, TraverseFilesInZip(archivePath, visitor, paths...) + return results, TraverseFilesInZip(ctx, archivePath, visitor, paths...) } // ContentsFromZip extracts select paths for the given archive and returns a set of string contents for each path. -func ContentsFromZip(archivePath string, paths ...string) (map[string]string, error) { +func ContentsFromZip(ctx context.Context, archivePath string, paths ...string) (map[string]string, error) { results := make(map[string]string) // don't allow for full traversal, only select traversal from given paths @@ -126,37 +121,38 @@ func ContentsFromZip(archivePath string, paths ...string) (map[string]string, er return results, nil } - visitor := func(file *zip.File) error { + visitor := func(ctx context.Context, file archives.FileInfo) error { zippedFile, err := file.Open() if err != nil { - return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err) + return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err) } + defer func() { + if err := zippedFile.Close(); err != nil { + log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err) + } + }() - if file.FileInfo().IsDir() { - return fmt.Errorf("unable to extract directories, only files: %s", file.Name) + if file.IsDir() { + return fmt.Errorf("unable to extract directories, only files: %s", file.NameInArchive) } var buffer bytes.Buffer if err := safeCopy(&buffer, zippedFile); err != nil { - return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err) + return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.NameInArchive, archivePath, err) } - results[file.Name] = buffer.String() + results[file.NameInArchive] = buffer.String() - err = zippedFile.Close() - if err != nil { - return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err) - } return nil } - return results, TraverseFilesInZip(archivePath, visitor, paths...) + return results, TraverseFilesInZip(ctx, archivePath, visitor, paths...) } // UnzipToDir extracts a zip archive to a target directory. -func UnzipToDir(archivePath, targetDir string) error { - visitor := func(file *zip.File) error { - joinedPath, err := safeJoin(targetDir, file.Name) +func UnzipToDir(ctx context.Context, archivePath, targetDir string) error { + visitor := func(ctx context.Context, file archives.FileInfo) error { + joinedPath, err := safeJoin(targetDir, file.NameInArchive) if err != nil { return err } @@ -164,7 +160,7 @@ func UnzipToDir(archivePath, targetDir string) error { return extractSingleFile(file, joinedPath, archivePath) } - return TraverseFilesInZip(archivePath, visitor) + return TraverseFilesInZip(ctx, archivePath, visitor) } // safeJoin ensures that any destinations do not resolve to a path above the prefix path. @@ -181,13 +177,18 @@ func safeJoin(prefix string, dest ...string) (string, error) { return joinResult, nil } -func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) error { +func extractSingleFile(file archives.FileInfo, expandedFilePath, archivePath string) error { zippedFile, err := file.Open() if err != nil { - return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err) + return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err) } + defer func() { + if err := zippedFile.Close(); err != nil { + log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err) + } + }() - if file.FileInfo().IsDir() { + if file.IsDir() { err = os.MkdirAll(expandedFilePath, file.Mode()) if err != nil { return fmt.Errorf("unable to create dir=%q from zip=%q: %w", expandedFilePath, archivePath, err) @@ -202,20 +203,16 @@ func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) err if err != nil { return fmt.Errorf("unable to create dest file=%q from zip=%q: %w", expandedFilePath, archivePath, err) } + defer func() { + if err := outputFile.Close(); err != nil { + log.Errorf("unable to close dest file=%q from zip=%q: %+v", outputFile.Name(), archivePath, err) + } + }() if err := safeCopy(outputFile, zippedFile); err != nil { - return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.Name, outputFile.Name(), archivePath, err) - } - - err = outputFile.Close() - if err != nil { - return fmt.Errorf("unable to close dest file=%q from zip=%q: %w", outputFile.Name(), archivePath, err) + return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.NameInArchive, outputFile.Name(), archivePath, err) } } - err = zippedFile.Close() - if err != nil { - return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err) - } return nil } diff --git a/internal/file/zip_file_traversal_test.go b/internal/file/zip_file_traversal_test.go index d5a81d273..eecee862b 100644 --- a/internal/file/zip_file_traversal_test.go +++ b/internal/file/zip_file_traversal_test.go @@ -4,6 +4,7 @@ package file import ( + "context" "crypto/sha256" "encoding/json" "errors" @@ -55,7 +56,7 @@ func TestUnzipToDir(t *testing.T) { expectedPaths := len(expectedZipArchiveEntries) observedPaths := 0 - err = UnzipToDir(archiveFilePath, unzipDestinationDir) + err = UnzipToDir(context.Background(), archiveFilePath, unzipDestinationDir) if err != nil { t.Fatalf("unable to unzip archive: %+v", err) } @@ -145,7 +146,7 @@ func TestContentsFromZip(t *testing.T) { paths = append(paths, p) } - actual, err := ContentsFromZip(archivePath, paths...) + actual, err := ContentsFromZip(context.Background(), archivePath, paths...) if err != nil { t.Fatalf("unable to extract from unzip archive: %+v", err) } diff --git a/internal/file/zip_read_closer.go b/internal/file/zip_read_closer.go deleted file mode 100644 index fd45f52a1..000000000 --- a/internal/file/zip_read_closer.go +++ /dev/null @@ -1,229 +0,0 @@ -package file - -import ( - "archive/zip" - "encoding/binary" - "errors" - "fmt" - "io" - "math" - "os" - - "github.com/anchore/syft/internal/log" -) - -// directoryEndLen, readByf, directoryEnd, and findSignatureInBlock were copied from the golang stdlib, specifically: -// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/struct.go -// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/reader.go -// findArchiveStartOffset is derived from the same stdlib utils, specifically the readDirectoryEnd function. - -const ( - directoryEndLen = 22 - directory64LocLen = 20 - directory64EndLen = 56 - directory64LocSignature = 0x07064b50 - directory64EndSignature = 0x06064b50 -) - -// ZipReadCloser is a drop-in replacement for zip.ReadCloser (from zip.OpenReader) that additionally considers zips -// that have bytes prefixed to the front of the archive (common with self-extracting jars). -type ZipReadCloser struct { - *zip.Reader - io.Closer -} - -// OpenZip provides a ZipReadCloser for the given filepath. -func OpenZip(filepath string) (*ZipReadCloser, error) { - f, err := os.Open(filepath) - if err != nil { - return nil, err - } - fi, err := f.Stat() - if err != nil { - f.Close() - return nil, err - } - - // some archives may have bytes prepended to the front of the archive, such as with self executing JARs. We first - // need to find the start of the archive and keep track of this offset. - offset, err := findArchiveStartOffset(f, fi.Size()) - if err != nil { - log.Debugf("cannot find beginning of zip archive=%q : %v", filepath, err) - return nil, err - } - - if _, err := f.Seek(0, io.SeekStart); err != nil { - return nil, fmt.Errorf("unable to seek to beginning of archive: %w", err) - } - - if offset > math.MaxInt64 { - return nil, fmt.Errorf("archive start offset too large: %v", offset) - } - offset64 := int64(offset) - - size := fi.Size() - offset64 - - r, err := zip.NewReader(io.NewSectionReader(f, offset64, size), size) - if err != nil { - log.Debugf("unable to open ZipReadCloser @ %q: %v", filepath, err) - return nil, err - } - - return &ZipReadCloser{ - Reader: r, - Closer: f, - }, nil -} - -type readBuf []byte - -func (b *readBuf) uint16() uint16 { - v := binary.LittleEndian.Uint16(*b) - *b = (*b)[2:] - return v -} - -func (b *readBuf) uint32() uint32 { - v := binary.LittleEndian.Uint32(*b) - *b = (*b)[4:] - return v -} - -func (b *readBuf) uint64() uint64 { - v := binary.LittleEndian.Uint64(*b) - *b = (*b)[8:] - return v -} - -type directoryEnd struct { - diskNbr uint32 // unused - dirDiskNbr uint32 // unused - dirRecordsThisDisk uint64 // unused - directoryRecords uint64 - directorySize uint64 - directoryOffset uint64 // relative to file -} - -// note: this is derived from readDirectoryEnd within the archive/zip package -func findArchiveStartOffset(r io.ReaderAt, size int64) (startOfArchive uint64, err error) { - // look for directoryEndSignature in the last 1k, then in the last 65k - var buf []byte - var directoryEndOffset int64 - for i, bLen := range []int64{1024, 65 * 1024} { - if bLen > size { - bLen = size - } - buf = make([]byte, int(bLen)) - if _, err := r.ReadAt(buf, size-bLen); err != nil && !errors.Is(err, io.EOF) { - return 0, err - } - if p := findSignatureInBlock(buf); p >= 0 { - buf = buf[p:] - directoryEndOffset = size - bLen + int64(p) - break - } - if i == 1 || bLen == size { - return 0, zip.ErrFormat - } - } - - if buf == nil { - // we were unable to find the directoryEndSignature block - return 0, zip.ErrFormat - } - - // read header into struct - b := readBuf(buf[4:]) // skip signature - d := &directoryEnd{ - diskNbr: uint32(b.uint16()), - dirDiskNbr: uint32(b.uint16()), - dirRecordsThisDisk: uint64(b.uint16()), - directoryRecords: uint64(b.uint16()), - directorySize: uint64(b.uint32()), - directoryOffset: uint64(b.uint32()), - } - // Calculate where the zip data actually begins - - // These values mean that the file can be a zip64 file - if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff { - p, err := findDirectory64End(r, directoryEndOffset) - if err == nil && p >= 0 { - directoryEndOffset = p - err = readDirectory64End(r, p, d) - } - if err != nil { - return 0, err - } - } - startOfArchive = uint64(directoryEndOffset) - d.directorySize - d.directoryOffset - - // Make sure directoryOffset points to somewhere in our file. - if d.directoryOffset >= uint64(size) { - return 0, zip.ErrFormat - } - return startOfArchive, nil -} - -// findDirectory64End tries to read the zip64 locator just before the -// directory end and returns the offset of the zip64 directory end if -// found. -func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { - locOffset := directoryEndOffset - directory64LocLen - if locOffset < 0 { - return -1, nil // no need to look for a header outside the file - } - buf := make([]byte, directory64LocLen) - if _, err := r.ReadAt(buf, locOffset); err != nil { - return -1, err - } - b := readBuf(buf) - if sig := b.uint32(); sig != directory64LocSignature { - return -1, nil - } - if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory - return -1, nil // the file is not a valid zip64-file - } - p := b.uint64() // relative offset of the zip64 end of central directory record - if b.uint32() != 1 { // total number of disks - return -1, nil // the file is not a valid zip64-file - } - return int64(p), nil -} - -// readDirectory64End reads the zip64 directory end and updates the -// directory end with the zip64 directory end values. -func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { - buf := make([]byte, directory64EndLen) - if _, err := r.ReadAt(buf, offset); err != nil { - return err - } - - b := readBuf(buf) - if sig := b.uint32(); sig != directory64EndSignature { - return errors.New("could not read directory64End") - } - - b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16) - d.diskNbr = b.uint32() // number of this disk - d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory - d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk - d.directoryRecords = b.uint64() // total number of entries in the central directory - d.directorySize = b.uint64() // size of the central directory - d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number - - return nil -} - -func findSignatureInBlock(b []byte) int { - for i := len(b) - directoryEndLen; i >= 0; i-- { - // defined from directoryEndSignature - if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { - // n is length of comment - n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 - if n+directoryEndLen+i <= len(b) { - return i - } - } - } - return -1 -} diff --git a/internal/file/zip_read_closer_test.go b/internal/file/zip_read_closer_test.go deleted file mode 100644 index 349bfcc9b..000000000 --- a/internal/file/zip_read_closer_test.go +++ /dev/null @@ -1,50 +0,0 @@ -//go:build !windows -// +build !windows - -package file - -import ( - "os" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestFindArchiveStartOffset(t *testing.T) { - tests := []struct { - name string - archivePrep func(tb testing.TB) string - expected uint64 - }{ - { - name: "standard, non-nested zip", - archivePrep: prepZipSourceFixture, - expected: 0, - }, - { - name: "zip with prepended bytes", - archivePrep: prependZipSourceFixtureWithString(t, "junk at the beginning of the file..."), - expected: 36, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - archivePath := test.archivePrep(t) - f, err := os.Open(archivePath) - if err != nil { - t.Fatalf("could not open archive %q: %+v", archivePath, err) - } - fi, err := os.Stat(f.Name()) - if err != nil { - t.Fatalf("unable to stat archive: %+v", err) - } - - actual, err := findArchiveStartOffset(f, fi.Size()) - if err != nil { - t.Fatalf("unable to find offset: %+v", err) - } - assert.Equal(t, test.expected, actual) - }) - } -} diff --git a/internal/task/unknowns_tasks.go b/internal/task/unknowns_tasks.go index 0b8959bd0..fefc380bb 100644 --- a/internal/task/unknowns_tasks.go +++ b/internal/task/unknowns_tasks.go @@ -4,13 +4,13 @@ import ( "context" "strings" - "github.com/anchore/archiver/v3" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/sbomsync" "github.com/anchore/syft/syft/cataloging" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/sbom" + "github.com/mholt/archives" ) const unknownsLabelerTaskName = "unknowns-labeler" @@ -57,9 +57,10 @@ func (c unknownsLabelerTask) finalize(resolver file.Resolver, s *sbom.SBOM) { } if c.IncludeUnexpandedArchives { + ctx := context.Background() for coords := range s.Artifacts.FileMetadata { - unarchiver, notArchiveErr := archiver.ByExtension(coords.RealPath) - if unarchiver != nil && notArchiveErr == nil && !hasPackageReference(coords) { + format, _, notArchiveErr := archives.Identify(ctx, coords.RealPath, nil) + if format != nil && notArchiveErr == nil && !hasPackageReference(coords) { s.Artifacts.Unknowns[coords] = append(s.Artifacts.Unknowns[coords], "archive not cataloged") } } diff --git a/syft/format/github/internal/model/model.go b/syft/format/github/internal/model/model.go index 942176314..ef0b46a3c 100644 --- a/syft/format/github/internal/model/model.go +++ b/syft/format/github/internal/model/model.go @@ -1,16 +1,17 @@ package model import ( + "context" "fmt" "strings" "time" - "github.com/anchore/archiver/v3" "github.com/anchore/packageurl-go" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/source" + "github.com/mholt/archives" ) // ToGithubModel converts the provided SBOM to a GitHub dependency model @@ -145,8 +146,8 @@ func trimRelative(s string) string { // isArchive returns true if the path appears to be an archive func isArchive(path string) bool { - _, err := archiver.ByExtension(path) - return err == nil + format, _, err := archives.Identify(context.Background(), path, nil) + return err == nil && format != nil } func toDependencies(s *sbom.SBOM, p pkg.Package) (out []string) { diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index d06b65872..7a04fa9ec 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -110,7 +110,7 @@ func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, d return nil, cleanupFn, fmt.Errorf("unable to process java archive: %w", err) } - fileManifest, err := intFile.NewZipFileManifest(archivePath) + fileManifest, err := intFile.NewZipFileManifest(ctx, archivePath) if err != nil { return nil, cleanupFn, fmt.Errorf("unable to read files from java archive: %w", err) } @@ -228,7 +228,7 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package, } // fetch the manifest file - contents, err := intFile.ContentsFromZip(j.archivePath, manifestMatches...) + contents, err := intFile.ContentsFromZip(ctx, j.archivePath, manifestMatches...) if err != nil { return nil, fmt.Errorf("unable to extract java manifests (%s): %w", j.location, err) } @@ -387,8 +387,8 @@ func (j *archiveParser) discoverMainPackageFromPomInfo(ctx context.Context) (gro var pomProperties pkg.JavaPomProperties // Find the pom.properties/pom.xml if the names seem like a plausible match - properties, _ := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob)) - projects, _ := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob)) + properties, _ := pomPropertiesByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob)) + projects, _ := pomProjectByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob)) // map of all the artifacts in the pom properties, in order to chek exact match with the filename artifactsMap := make(map[string]bool) @@ -453,13 +453,13 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(ctx context.Context, paren var pkgs []pkg.Package // pom.properties - properties, err := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob)) + properties, err := pomPropertiesByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob)) if err != nil { return nil, err } // pom.xml - projects, err := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob)) + projects, err := pomProjectByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob)) if err != nil { return nil, err } @@ -505,7 +505,7 @@ func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg. } if len(licenseMatches) > 0 { - contents, err := intFile.ContentsFromZip(j.archivePath, licenseMatches...) + contents, err := intFile.ContentsFromZip(ctx, j.archivePath, licenseMatches...) if err != nil { return nil, fmt.Errorf("unable to extract java license (%s): %w", j.location, err) } @@ -533,7 +533,7 @@ func (j *archiveParser) discoverPkgsFromNestedArchives(ctx context.Context, pare // associating each discovered package to the given parent package. func discoverPkgsFromZip(ctx context.Context, location file.Location, archivePath, contentPath string, fileManifest intFile.ZipFileManifest, parentPkg *pkg.Package, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) { // search and parse pom.properties files & fetch the contents - openers, err := intFile.ExtractFromZipToUniqueTempFile(archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...) + openers, err := intFile.ExtractFromZipToUniqueTempFile(ctx, archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...) if err != nil { return nil, nil, fmt.Errorf("unable to extract files from zip: %w", err) } @@ -597,8 +597,8 @@ func discoverPkgsFromOpener(ctx context.Context, location file.Location, pathWit return nestedPkgs, nestedRelationships, nil } -func pomPropertiesByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]pkg.JavaPomProperties, error) { - contentsOfMavenPropertiesFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...) +func pomPropertiesByParentPath(ctx context.Context, archivePath string, location file.Location, extractPaths []string) (map[string]pkg.JavaPomProperties, error) { + contentsOfMavenPropertiesFiles, err := intFile.ContentsFromZip(ctx, archivePath, extractPaths...) if err != nil { return nil, fmt.Errorf("unable to extract maven files: %w", err) } @@ -626,8 +626,8 @@ func pomPropertiesByParentPath(archivePath string, location file.Location, extra return propertiesByParentPath, nil } -func pomProjectByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]*parsedPomProject, error) { - contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...) +func pomProjectByParentPath(ctx context.Context, archivePath string, location file.Location, extractPaths []string) (map[string]*parsedPomProject, error) { + contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(ctx, archivePath, extractPaths...) if err != nil { return nil, fmt.Errorf("unable to extract maven files: %w", err) } diff --git a/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go b/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go index 5af4f0b3f..4c4edc595 100644 --- a/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go +++ b/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go @@ -70,7 +70,7 @@ func (gtp genericTarWrappedJavaArchiveParser) parseTarWrappedJavaArchive(ctx con } func discoverPkgsFromTar(ctx context.Context, location file.Location, archivePath, contentPath string, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) { - openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(archivePath, contentPath, archiveFormatGlobs...) + openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(ctx, archivePath, contentPath, archiveFormatGlobs...) if err != nil { return nil, nil, fmt.Errorf("unable to extract files from tar: %w", err) } diff --git a/syft/pkg/cataloger/java/zip_wrapped_archive_parser.go b/syft/pkg/cataloger/java/zip_wrapped_archive_parser.go index 3dd1d2524..e515f4f90 100644 --- a/syft/pkg/cataloger/java/zip_wrapped_archive_parser.go +++ b/syft/pkg/cataloger/java/zip_wrapped_archive_parser.go @@ -41,7 +41,7 @@ func (gzp genericZipWrappedJavaArchiveParser) parseZipWrappedJavaArchive(ctx con // functions support zips with shell scripts prepended to the file. Specifically, the helpers use the central // header at the end of the file to determine where the beginning of the zip payload is (unlike the standard lib // or archiver). - fileManifest, err := intFile.NewZipFileManifest(archivePath) + fileManifest, err := intFile.NewZipFileManifest(ctx, archivePath) if err != nil { return nil, nil, fmt.Errorf("unable to read files from java archive: %w", err) } diff --git a/syft/source/filesource/file_source.go b/syft/source/filesource/file_source.go index 09a4422ce..ea7d44a0a 100644 --- a/syft/source/filesource/file_source.go +++ b/syft/source/filesource/file_source.go @@ -4,6 +4,7 @@ import ( "context" "crypto" "fmt" + "io" "os" "path" "path/filepath" @@ -11,7 +12,6 @@ import ( "github.com/opencontainers/go-digest" - "github.com/anchore/archiver/v3" stereoFile "github.com/anchore/stereoscope/pkg/file" intFile "github.com/anchore/syft/internal/file" "github.com/anchore/syft/internal/log" @@ -21,6 +21,7 @@ import ( "github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source/directorysource" "github.com/anchore/syft/syft/source/internal" + "github.com/mholt/archives" ) var _ source.Source = (*fileSource)(nil) @@ -223,15 +224,8 @@ func fileAnalysisPath(path string, skipExtractArchive bool) (string, func() erro // if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and // use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is // unarchived. - envelopedUnarchiver, err := archiver.ByExtension(path) - if unarchiver, ok := envelopedUnarchiver.(archiver.Unarchiver); err == nil && ok { - if tar, ok := unarchiver.(*archiver.Tar); ok { - // when tar files are extracted, if there are multiple entries at the same - // location, the last entry wins - // NOTE: this currently does not display any messages if an overwrite happens - tar.OverwriteExisting = true - } - + envelopedUnarchiver, _, err := archives.Identify(context.Background(), path, nil) + if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok { analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver) if err != nil { return "", nil, fmt.Errorf("unable to unarchive source file: %w", err) @@ -256,15 +250,52 @@ func digestOfFileContents(path string) string { return di.String() } -func unarchiveToTmp(path string, unarchiver archiver.Unarchiver) (string, func() error, error) { +func unarchiveToTmp(path string, unarchiver archives.Extractor) (string, func() error, error) { + archive, err := os.Open(path) + if err != nil { + fmt.Errorf("unable to open archive: %v", err) + } + defer archive.Close() + tempDir, err := os.MkdirTemp("", "syft-archive-contents-") if err != nil { return "", func() error { return nil }, fmt.Errorf("unable to create tempdir for archive processing: %w", err) } - cleanupFn := func() error { - return os.RemoveAll(tempDir) + visitor := func(ctx context.Context, file archives.FileInfo) error { + destPath := filepath.Join(tempDir, file.NameInArchive) + if file.IsDir() { + return os.MkdirAll(destPath, file.Mode()) + } + + if err := os.MkdirAll(filepath.Dir(destPath), os.ModeDir|0755); err != nil { + return fmt.Errorf("failed to create parent directory: %w", err) + } + + rc, err := file.Open() + if err != nil { + return fmt.Errorf("failed to open file in archive: %w", err) + } + defer rc.Close() + + destFile, err := os.Create(destPath) + if err != nil { + return fmt.Errorf("failed to create file in destination: %w", err) + } + defer destFile.Close() + + if err := destFile.Chmod(file.Mode()); err != nil { + return fmt.Errorf("failed to change mode of destination file: %w", err) + } + + if _, err := io.Copy(destFile, rc); err != nil { + return fmt.Errorf("failed to copy file contents: %w", err) + } + + return nil } - return tempDir, cleanupFn, unarchiver.Unarchive(path, tempDir) + return tempDir, func() error { + return os.RemoveAll(tempDir) + }, unarchiver.Extract(context.Background(), archive, visitor) }