upgrade deprecated library for archiving

Signed-off-by: Kudryavcev Nikolay <kydry.nikolau@gmail.com>
This commit is contained in:
Kudryavcev Nikolay 2025-06-27 00:29:35 +03:00
parent 4eb8ba4575
commit 17a66f0186
15 changed files with 180 additions and 398 deletions

3
go.mod
View File

@ -11,7 +11,6 @@ require (
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d
github.com/acobaugh/osrelease v0.1.0 github.com/acobaugh/osrelease v0.1.0
github.com/adrg/xdg v0.5.3 github.com/adrg/xdg v0.5.3
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9 github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9
github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716 github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716
github.com/anchore/fangs v0.0.0-20250319222917-446a1e748ec2 github.com/anchore/fangs v0.0.0-20250319222917-446a1e748ec2
@ -156,7 +155,6 @@ require (
github.com/goccy/go-yaml v1.18.0 // indirect github.com/goccy/go-yaml v1.18.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e // indirect github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
@ -192,7 +190,6 @@ require (
github.com/muesli/cancelreader v0.2.2 // indirect github.com/muesli/cancelreader v0.2.2 // indirect
github.com/muesli/termenv v0.16.0 // indirect github.com/muesli/termenv v0.16.0 // indirect
github.com/ncruces/go-strftime v0.1.9 // indirect github.com/ncruces/go-strftime v0.1.9 // indirect
github.com/nwaples/rardecode v1.1.3 // indirect
github.com/nwaples/rardecode/v2 v2.1.0 // indirect github.com/nwaples/rardecode/v2 v2.1.0 // indirect
github.com/opencontainers/image-spec v1.1.1 // indirect github.com/opencontainers/image-spec v1.1.1 // indirect
github.com/opencontainers/runtime-spec v1.1.0 // indirect github.com/opencontainers/runtime-spec v1.1.0 // indirect

6
go.sum
View File

@ -94,8 +94,6 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51 h1:yhk+P8lF3ZiROjmaVRao9WGTRo4b/wYjoKEiAHWrKwc=
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51/go.mod h1:nwuGSd7aZp0rtYt79YggCGafz1RYsclE7pi3fhLwvuw=
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9 h1:p0ZIe0htYOX284Y4axJaGBvXHU0VCCzLN5Wf5XbKStU= github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9 h1:p0ZIe0htYOX284Y4axJaGBvXHU0VCCzLN5Wf5XbKStU=
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9/go.mod h1:3ZsFB9tzW3vl4gEiUeuSOMDnwroWxIxJelOOHUp8dSw= github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9/go.mod h1:3ZsFB9tzW3vl4gEiUeuSOMDnwroWxIxJelOOHUp8dSw=
github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716 h1:2sIdYJlQESEnyk3Y0WD2vXWW5eD2iMz9Ev8fj1Z8LNA= github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716 h1:2sIdYJlQESEnyk3Y0WD2vXWW5eD2iMz9Ev8fj1Z8LNA=
@ -410,8 +408,6 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaS
github.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx4u74HPM= github.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx4u74HPM=
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
@ -660,8 +656,6 @@ github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1 h1:kpt9ZfKcm+
github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1/go.mod h1:qgCw4bBKZX8qMgGeEZzGFVT3notl42dBjNqO2jut0M0= github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1/go.mod h1:qgCw4bBKZX8qMgGeEZzGFVT3notl42dBjNqO2jut0M0=
github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249 h1:NHrXEjTNQY7P0Zfx1aMrNhpgxHmow66XQtm0aQLY0AE= github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249 h1:NHrXEjTNQY7P0Zfx1aMrNhpgxHmow66XQtm0aQLY0AE=
github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8= github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8=
github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc=
github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
github.com/nwaples/rardecode/v2 v2.1.0 h1:JQl9ZoBPDy+nIZGb1mx8+anfHp/LV3NE2MjMiv0ct/U= github.com/nwaples/rardecode/v2 v2.1.0 h1:JQl9ZoBPDy+nIZGb1mx8+anfHp/LV3NE2MjMiv0ct/U=
github.com/nwaples/rardecode/v2 v2.1.0/go.mod h1:7uz379lSxPe6j9nvzxUZ+n7mnJNgjsRNb6IbvGVHRmw= github.com/nwaples/rardecode/v2 v2.1.0/go.mod h1:7uz379lSxPe6j9nvzxUZ+n7mnJNgjsRNb6IbvGVHRmw=
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=

View File

@ -1,17 +1,43 @@
package file package file
import ( import (
"context"
"fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
"github.com/anchore/syft/internal/log"
"github.com/bmatcuk/doublestar/v4" "github.com/bmatcuk/doublestar/v4"
"github.com/mholt/archives"
"github.com/anchore/archiver/v3"
) )
// TraverseFilesInTar enumerates all paths stored within a tar archive using the visitor pattern.
func TraverseFilesInTar(ctx context.Context, archivePath string, visitor archives.FileHandler) error {
tarReader, err := os.Open(archivePath)
if err != nil {
return fmt.Errorf("unable to open tar archive (%s): %w", archivePath, err)
}
defer func() {
if err := tarReader.Close(); err != nil {
log.Errorf("unable to close tar archive (%s): %+v", archivePath, err)
}
}()
format, _, err := archives.Identify(ctx, archivePath, nil)
if err != nil {
return fmt.Errorf("failed to identify tar compression format: %w", err)
}
extractor, ok := format.(archives.Extractor)
if !ok {
return fmt.Errorf("file format does not support extraction: %s", archivePath)
}
return extractor.Extract(ctx, tarReader, visitor)
}
// ExtractGlobsFromTarToUniqueTempFile extracts paths matching the given globs within the given archive to a temporary directory, returning file openers for each file extracted. // ExtractGlobsFromTarToUniqueTempFile extracts paths matching the given globs within the given archive to a temporary directory, returning file openers for each file extracted.
func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...string) (map[string]Opener, error) { func ExtractGlobsFromTarToUniqueTempFile(ctx context.Context, archivePath, dir string, globs ...string) (map[string]Opener, error) {
results := make(map[string]Opener) results := make(map[string]Opener)
// don't allow for full traversal, only select traversal from given paths // don't allow for full traversal, only select traversal from given paths
@ -19,9 +45,7 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin
return results, nil return results, nil
} }
visitor := func(file archiver.File) error { visitor := func(ctx context.Context, file archives.FileInfo) error {
defer file.Close()
// ignore directories // ignore directories
if file.IsDir() { if file.IsDir() {
return nil return nil
@ -43,7 +67,17 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin
// provides a ReadCloser. It is up to the caller to handle closing the file explicitly. // provides a ReadCloser. It is up to the caller to handle closing the file explicitly.
defer tempFile.Close() defer tempFile.Close()
if err := safeCopy(tempFile, file.ReadCloser); err != nil { packedFile, err := file.Open()
if err != nil {
return fmt.Errorf("unable to read file=%q from tar=%q: %w", file.NameInArchive, archivePath, err)
}
defer func() {
if err := packedFile.Close(); err != nil {
log.Errorf("unable to close source file=%q from tar=%q: %+v", file.NameInArchive, archivePath, err)
}
}()
if err := safeCopy(tempFile, packedFile); err != nil {
return fmt.Errorf("unable to copy source=%q for tar=%q: %w", file.Name(), archivePath, err) return fmt.Errorf("unable to copy source=%q for tar=%q: %w", file.Name(), archivePath, err)
} }
@ -52,7 +86,7 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin
return nil return nil
} }
return results, archiver.Walk(archivePath, visitor) return results, TraverseFilesInTar(ctx, archivePath, visitor)
} }
func matchesAnyGlob(name string, globs ...string) bool { func matchesAnyGlob(name string, globs ...string) bool {

View File

@ -1,35 +1,39 @@
package file package file
import ( import (
"context"
"os" "os"
"sort" "sort"
"strings" "strings"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/mholt/archives"
"github.com/scylladb/go-set/strset"
) )
// ZipFileManifest is a collection of paths and their file metadata. // ZipFileManifest is a collection of paths and their file metadata.
type ZipFileManifest map[string]os.FileInfo type ZipFileManifest map[string]os.FileInfo
// NewZipFileManifest creates and returns a new ZipFileManifest populated with path and metadata from the given zip archive path. // NewZipFileManifest creates and returns a new ZipFileManifest populated with path and metadata from the given zip archive path.
func NewZipFileManifest(archivePath string) (ZipFileManifest, error) { func NewZipFileManifest(ctx context.Context, archivePath string) (ZipFileManifest, error) {
zipReader, err := OpenZip(archivePath) zipReader, err := os.Open(archivePath)
manifest := make(ZipFileManifest) manifest := make(ZipFileManifest)
if err != nil { if err != nil {
log.Debugf("unable to open zip archive (%s): %v", archivePath, err) log.Debugf("unable to open zip archive (%s): %v", archivePath, err)
return manifest, err return manifest, err
} }
defer func() { defer func() {
err = zipReader.Close() if err = zipReader.Close(); err != nil {
if err != nil {
log.Debugf("unable to close zip archive (%s): %+v", archivePath, err) log.Debugf("unable to close zip archive (%s): %+v", archivePath, err)
} }
}() }()
for _, file := range zipReader.File { err = archives.Zip{}.Extract(ctx, zipReader, func(ctx context.Context, file archives.FileInfo) error {
manifest.Add(file.Name, file.FileInfo()) manifest.Add(file.NameInArchive, file.FileInfo)
return nil
})
if err != nil {
return manifest, err
} }
return manifest, nil return manifest, nil
} }

View File

@ -4,6 +4,7 @@
package file package file
import ( import (
"context"
"encoding/json" "encoding/json"
"os" "os"
"path" "path"
@ -24,7 +25,7 @@ func TestNewZipFileManifest(t *testing.T) {
archiveFilePath := setupZipFileTest(t, sourceDirPath, false) archiveFilePath := setupZipFileTest(t, sourceDirPath, false)
actual, err := NewZipFileManifest(archiveFilePath) actual, err := NewZipFileManifest(context.Background(), archiveFilePath)
if err != nil { if err != nil {
t.Fatalf("unable to extract from unzip archive: %+v", err) t.Fatalf("unable to extract from unzip archive: %+v", err)
} }
@ -59,7 +60,7 @@ func TestNewZip64FileManifest(t *testing.T) {
sourceDirPath := path.Join(cwd, "test-fixtures", "zip-source") sourceDirPath := path.Join(cwd, "test-fixtures", "zip-source")
archiveFilePath := setupZipFileTest(t, sourceDirPath, true) archiveFilePath := setupZipFileTest(t, sourceDirPath, true)
actual, err := NewZipFileManifest(archiveFilePath) actual, err := NewZipFileManifest(context.Background(), archiveFilePath)
if err != nil { if err != nil {
t.Fatalf("unable to extract from unzip archive: %+v", err) t.Fatalf("unable to extract from unzip archive: %+v", err)
} }
@ -99,7 +100,7 @@ func TestZipFileManifest_GlobMatch(t *testing.T) {
archiveFilePath := setupZipFileTest(t, sourceDirPath, false) archiveFilePath := setupZipFileTest(t, sourceDirPath, false)
z, err := NewZipFileManifest(archiveFilePath) z, err := NewZipFileManifest(context.Background(), archiveFilePath)
if err != nil { if err != nil {
t.Fatalf("unable to extract from unzip archive: %+v", err) t.Fatalf("unable to extract from unzip archive: %+v", err)
} }

View File

@ -1,14 +1,15 @@
package file package file
import ( import (
"archive/zip"
"bytes" "bytes"
"context"
"fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
"strings" "strings"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/mholt/archives"
) )
const ( const (
@ -39,38 +40,34 @@ func newZipTraverseRequest(paths ...string) zipTraversalRequest {
} }
// TraverseFilesInZip enumerates all paths stored within a zip archive using the visitor pattern. // TraverseFilesInZip enumerates all paths stored within a zip archive using the visitor pattern.
func TraverseFilesInZip(archivePath string, visitor func(*zip.File) error, paths ...string) error { func TraverseFilesInZip(ctx context.Context, archivePath string, visitor archives.FileHandler, paths ...string) error {
request := newZipTraverseRequest(paths...) request := newZipTraverseRequest(paths...)
zipReader, err := OpenZip(archivePath) zipReader, err := os.Open(archivePath)
if err != nil { if err != nil {
return fmt.Errorf("unable to open zip archive (%s): %w", archivePath, err) return fmt.Errorf("unable to open zip archive (%s): %w", archivePath, err)
} }
defer func() { defer func() {
err = zipReader.Close() if err := zipReader.Close(); err != nil {
if err != nil {
log.Errorf("unable to close zip archive (%s): %+v", archivePath, err) log.Errorf("unable to close zip archive (%s): %+v", archivePath, err)
} }
}() }()
for _, file := range zipReader.File { return archives.Zip{}.Extract(ctx, zipReader, func(ctx context.Context, file archives.FileInfo) error {
// if no paths are given then assume that all files should be traversed // if no paths are given then assume that all files should be traversed
if len(paths) > 0 { if len(paths) > 0 {
if _, ok := request[file.Name]; !ok { if _, ok := request[file.NameInArchive]; !ok {
// this file path is not of interest // this file path is not of interest
continue return nil
} }
} }
if err = visitor(file); err != nil { return visitor(ctx, file)
return err })
}
}
return nil
} }
// ExtractFromZipToUniqueTempFile extracts select paths for the given archive to a temporary directory, returning file openers for each file extracted. // ExtractFromZipToUniqueTempFile extracts select paths for the given archive to a temporary directory, returning file openers for each file extracted.
func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (map[string]Opener, error) { func ExtractFromZipToUniqueTempFile(ctx context.Context, archivePath, dir string, paths ...string) (map[string]Opener, error) {
results := make(map[string]Opener) results := make(map[string]Opener)
// don't allow for full traversal, only select traversal from given paths // don't allow for full traversal, only select traversal from given paths
@ -78,9 +75,8 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m
return results, nil return results, nil
} }
visitor := func(file *zip.File) error { visitor := func(ctx context.Context, file archives.FileInfo) error {
tempfilePrefix := filepath.Base(filepath.Clean(file.Name)) + "-" tempfilePrefix := filepath.Base(filepath.Clean(file.NameInArchive)) + "-"
tempFile, err := os.CreateTemp(dir, tempfilePrefix) tempFile, err := os.CreateTemp(dir, tempfilePrefix)
if err != nil { if err != nil {
return fmt.Errorf("unable to create temp file: %w", err) return fmt.Errorf("unable to create temp file: %w", err)
@ -92,33 +88,32 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m
zippedFile, err := file.Open() zippedFile, err := file.Open()
if err != nil { if err != nil {
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err) return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
} }
defer func() { defer func() {
err := zippedFile.Close() if err := zippedFile.Close(); err != nil {
if err != nil { log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.Name, archivePath, err)
} }
}() }()
if file.FileInfo().IsDir() { if file.IsDir() {
return fmt.Errorf("unable to extract directories, only files: %s", file.Name) return fmt.Errorf("unable to extract directories, only files: %s", file.NameInArchive)
} }
if err := safeCopy(tempFile, zippedFile); err != nil { if err := safeCopy(tempFile, zippedFile); err != nil {
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err) return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.NameInArchive, archivePath, err)
} }
results[file.Name] = Opener{path: tempFile.Name()} results[file.NameInArchive] = Opener{path: tempFile.Name()}
return nil return nil
} }
return results, TraverseFilesInZip(archivePath, visitor, paths...) return results, TraverseFilesInZip(ctx, archivePath, visitor, paths...)
} }
// ContentsFromZip extracts select paths for the given archive and returns a set of string contents for each path. // ContentsFromZip extracts select paths for the given archive and returns a set of string contents for each path.
func ContentsFromZip(archivePath string, paths ...string) (map[string]string, error) { func ContentsFromZip(ctx context.Context, archivePath string, paths ...string) (map[string]string, error) {
results := make(map[string]string) results := make(map[string]string)
// don't allow for full traversal, only select traversal from given paths // don't allow for full traversal, only select traversal from given paths
@ -126,37 +121,38 @@ func ContentsFromZip(archivePath string, paths ...string) (map[string]string, er
return results, nil return results, nil
} }
visitor := func(file *zip.File) error { visitor := func(ctx context.Context, file archives.FileInfo) error {
zippedFile, err := file.Open() zippedFile, err := file.Open()
if err != nil { if err != nil {
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err) return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
} }
defer func() {
if err := zippedFile.Close(); err != nil {
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
}
}()
if file.FileInfo().IsDir() { if file.IsDir() {
return fmt.Errorf("unable to extract directories, only files: %s", file.Name) return fmt.Errorf("unable to extract directories, only files: %s", file.NameInArchive)
} }
var buffer bytes.Buffer var buffer bytes.Buffer
if err := safeCopy(&buffer, zippedFile); err != nil { if err := safeCopy(&buffer, zippedFile); err != nil {
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err) return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.NameInArchive, archivePath, err)
} }
results[file.Name] = buffer.String() results[file.NameInArchive] = buffer.String()
err = zippedFile.Close()
if err != nil {
return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err)
}
return nil return nil
} }
return results, TraverseFilesInZip(archivePath, visitor, paths...) return results, TraverseFilesInZip(ctx, archivePath, visitor, paths...)
} }
// UnzipToDir extracts a zip archive to a target directory. // UnzipToDir extracts a zip archive to a target directory.
func UnzipToDir(archivePath, targetDir string) error { func UnzipToDir(ctx context.Context, archivePath, targetDir string) error {
visitor := func(file *zip.File) error { visitor := func(ctx context.Context, file archives.FileInfo) error {
joinedPath, err := safeJoin(targetDir, file.Name) joinedPath, err := safeJoin(targetDir, file.NameInArchive)
if err != nil { if err != nil {
return err return err
} }
@ -164,7 +160,7 @@ func UnzipToDir(archivePath, targetDir string) error {
return extractSingleFile(file, joinedPath, archivePath) return extractSingleFile(file, joinedPath, archivePath)
} }
return TraverseFilesInZip(archivePath, visitor) return TraverseFilesInZip(ctx, archivePath, visitor)
} }
// safeJoin ensures that any destinations do not resolve to a path above the prefix path. // safeJoin ensures that any destinations do not resolve to a path above the prefix path.
@ -181,13 +177,18 @@ func safeJoin(prefix string, dest ...string) (string, error) {
return joinResult, nil return joinResult, nil
} }
func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) error { func extractSingleFile(file archives.FileInfo, expandedFilePath, archivePath string) error {
zippedFile, err := file.Open() zippedFile, err := file.Open()
if err != nil { if err != nil {
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err) return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
} }
defer func() {
if err := zippedFile.Close(); err != nil {
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
}
}()
if file.FileInfo().IsDir() { if file.IsDir() {
err = os.MkdirAll(expandedFilePath, file.Mode()) err = os.MkdirAll(expandedFilePath, file.Mode())
if err != nil { if err != nil {
return fmt.Errorf("unable to create dir=%q from zip=%q: %w", expandedFilePath, archivePath, err) return fmt.Errorf("unable to create dir=%q from zip=%q: %w", expandedFilePath, archivePath, err)
@ -202,20 +203,16 @@ func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) err
if err != nil { if err != nil {
return fmt.Errorf("unable to create dest file=%q from zip=%q: %w", expandedFilePath, archivePath, err) return fmt.Errorf("unable to create dest file=%q from zip=%q: %w", expandedFilePath, archivePath, err)
} }
defer func() {
if err := outputFile.Close(); err != nil {
log.Errorf("unable to close dest file=%q from zip=%q: %+v", outputFile.Name(), archivePath, err)
}
}()
if err := safeCopy(outputFile, zippedFile); err != nil { if err := safeCopy(outputFile, zippedFile); err != nil {
return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.Name, outputFile.Name(), archivePath, err) return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.NameInArchive, outputFile.Name(), archivePath, err)
}
err = outputFile.Close()
if err != nil {
return fmt.Errorf("unable to close dest file=%q from zip=%q: %w", outputFile.Name(), archivePath, err)
} }
} }
err = zippedFile.Close()
if err != nil {
return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err)
}
return nil return nil
} }

View File

@ -4,6 +4,7 @@
package file package file
import ( import (
"context"
"crypto/sha256" "crypto/sha256"
"encoding/json" "encoding/json"
"errors" "errors"
@ -55,7 +56,7 @@ func TestUnzipToDir(t *testing.T) {
expectedPaths := len(expectedZipArchiveEntries) expectedPaths := len(expectedZipArchiveEntries)
observedPaths := 0 observedPaths := 0
err = UnzipToDir(archiveFilePath, unzipDestinationDir) err = UnzipToDir(context.Background(), archiveFilePath, unzipDestinationDir)
if err != nil { if err != nil {
t.Fatalf("unable to unzip archive: %+v", err) t.Fatalf("unable to unzip archive: %+v", err)
} }
@ -145,7 +146,7 @@ func TestContentsFromZip(t *testing.T) {
paths = append(paths, p) paths = append(paths, p)
} }
actual, err := ContentsFromZip(archivePath, paths...) actual, err := ContentsFromZip(context.Background(), archivePath, paths...)
if err != nil { if err != nil {
t.Fatalf("unable to extract from unzip archive: %+v", err) t.Fatalf("unable to extract from unzip archive: %+v", err)
} }

View File

@ -1,229 +0,0 @@
package file
import (
"archive/zip"
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"os"
"github.com/anchore/syft/internal/log"
)
// directoryEndLen, readByf, directoryEnd, and findSignatureInBlock were copied from the golang stdlib, specifically:
// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/struct.go
// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/reader.go
// findArchiveStartOffset is derived from the same stdlib utils, specifically the readDirectoryEnd function.
const (
directoryEndLen = 22
directory64LocLen = 20
directory64EndLen = 56
directory64LocSignature = 0x07064b50
directory64EndSignature = 0x06064b50
)
// ZipReadCloser is a drop-in replacement for zip.ReadCloser (from zip.OpenReader) that additionally considers zips
// that have bytes prefixed to the front of the archive (common with self-extracting jars).
type ZipReadCloser struct {
*zip.Reader
io.Closer
}
// OpenZip provides a ZipReadCloser for the given filepath.
func OpenZip(filepath string) (*ZipReadCloser, error) {
f, err := os.Open(filepath)
if err != nil {
return nil, err
}
fi, err := f.Stat()
if err != nil {
f.Close()
return nil, err
}
// some archives may have bytes prepended to the front of the archive, such as with self executing JARs. We first
// need to find the start of the archive and keep track of this offset.
offset, err := findArchiveStartOffset(f, fi.Size())
if err != nil {
log.Debugf("cannot find beginning of zip archive=%q : %v", filepath, err)
return nil, err
}
if _, err := f.Seek(0, io.SeekStart); err != nil {
return nil, fmt.Errorf("unable to seek to beginning of archive: %w", err)
}
if offset > math.MaxInt64 {
return nil, fmt.Errorf("archive start offset too large: %v", offset)
}
offset64 := int64(offset)
size := fi.Size() - offset64
r, err := zip.NewReader(io.NewSectionReader(f, offset64, size), size)
if err != nil {
log.Debugf("unable to open ZipReadCloser @ %q: %v", filepath, err)
return nil, err
}
return &ZipReadCloser{
Reader: r,
Closer: f,
}, nil
}
type readBuf []byte
func (b *readBuf) uint16() uint16 {
v := binary.LittleEndian.Uint16(*b)
*b = (*b)[2:]
return v
}
func (b *readBuf) uint32() uint32 {
v := binary.LittleEndian.Uint32(*b)
*b = (*b)[4:]
return v
}
func (b *readBuf) uint64() uint64 {
v := binary.LittleEndian.Uint64(*b)
*b = (*b)[8:]
return v
}
type directoryEnd struct {
diskNbr uint32 // unused
dirDiskNbr uint32 // unused
dirRecordsThisDisk uint64 // unused
directoryRecords uint64
directorySize uint64
directoryOffset uint64 // relative to file
}
// note: this is derived from readDirectoryEnd within the archive/zip package
func findArchiveStartOffset(r io.ReaderAt, size int64) (startOfArchive uint64, err error) {
// look for directoryEndSignature in the last 1k, then in the last 65k
var buf []byte
var directoryEndOffset int64
for i, bLen := range []int64{1024, 65 * 1024} {
if bLen > size {
bLen = size
}
buf = make([]byte, int(bLen))
if _, err := r.ReadAt(buf, size-bLen); err != nil && !errors.Is(err, io.EOF) {
return 0, err
}
if p := findSignatureInBlock(buf); p >= 0 {
buf = buf[p:]
directoryEndOffset = size - bLen + int64(p)
break
}
if i == 1 || bLen == size {
return 0, zip.ErrFormat
}
}
if buf == nil {
// we were unable to find the directoryEndSignature block
return 0, zip.ErrFormat
}
// read header into struct
b := readBuf(buf[4:]) // skip signature
d := &directoryEnd{
diskNbr: uint32(b.uint16()),
dirDiskNbr: uint32(b.uint16()),
dirRecordsThisDisk: uint64(b.uint16()),
directoryRecords: uint64(b.uint16()),
directorySize: uint64(b.uint32()),
directoryOffset: uint64(b.uint32()),
}
// Calculate where the zip data actually begins
// These values mean that the file can be a zip64 file
if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff {
p, err := findDirectory64End(r, directoryEndOffset)
if err == nil && p >= 0 {
directoryEndOffset = p
err = readDirectory64End(r, p, d)
}
if err != nil {
return 0, err
}
}
startOfArchive = uint64(directoryEndOffset) - d.directorySize - d.directoryOffset
// Make sure directoryOffset points to somewhere in our file.
if d.directoryOffset >= uint64(size) {
return 0, zip.ErrFormat
}
return startOfArchive, nil
}
// findDirectory64End tries to read the zip64 locator just before the
// directory end and returns the offset of the zip64 directory end if
// found.
func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) {
locOffset := directoryEndOffset - directory64LocLen
if locOffset < 0 {
return -1, nil // no need to look for a header outside the file
}
buf := make([]byte, directory64LocLen)
if _, err := r.ReadAt(buf, locOffset); err != nil {
return -1, err
}
b := readBuf(buf)
if sig := b.uint32(); sig != directory64LocSignature {
return -1, nil
}
if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory
return -1, nil // the file is not a valid zip64-file
}
p := b.uint64() // relative offset of the zip64 end of central directory record
if b.uint32() != 1 { // total number of disks
return -1, nil // the file is not a valid zip64-file
}
return int64(p), nil
}
// readDirectory64End reads the zip64 directory end and updates the
// directory end with the zip64 directory end values.
func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) {
buf := make([]byte, directory64EndLen)
if _, err := r.ReadAt(buf, offset); err != nil {
return err
}
b := readBuf(buf)
if sig := b.uint32(); sig != directory64EndSignature {
return errors.New("could not read directory64End")
}
b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16)
d.diskNbr = b.uint32() // number of this disk
d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory
d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk
d.directoryRecords = b.uint64() // total number of entries in the central directory
d.directorySize = b.uint64() // size of the central directory
d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number
return nil
}
func findSignatureInBlock(b []byte) int {
for i := len(b) - directoryEndLen; i >= 0; i-- {
// defined from directoryEndSignature
if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
// n is length of comment
n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8
if n+directoryEndLen+i <= len(b) {
return i
}
}
}
return -1
}

View File

@ -1,50 +0,0 @@
//go:build !windows
// +build !windows
package file
import (
"os"
"testing"
"github.com/stretchr/testify/assert"
)
func TestFindArchiveStartOffset(t *testing.T) {
tests := []struct {
name string
archivePrep func(tb testing.TB) string
expected uint64
}{
{
name: "standard, non-nested zip",
archivePrep: prepZipSourceFixture,
expected: 0,
},
{
name: "zip with prepended bytes",
archivePrep: prependZipSourceFixtureWithString(t, "junk at the beginning of the file..."),
expected: 36,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
archivePath := test.archivePrep(t)
f, err := os.Open(archivePath)
if err != nil {
t.Fatalf("could not open archive %q: %+v", archivePath, err)
}
fi, err := os.Stat(f.Name())
if err != nil {
t.Fatalf("unable to stat archive: %+v", err)
}
actual, err := findArchiveStartOffset(f, fi.Size())
if err != nil {
t.Fatalf("unable to find offset: %+v", err)
}
assert.Equal(t, test.expected, actual)
})
}
}

View File

@ -4,13 +4,13 @@ import (
"context" "context"
"strings" "strings"
"github.com/anchore/archiver/v3"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/sbomsync" "github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/syft/cataloging" "github.com/anchore/syft/syft/cataloging"
"github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/sbom"
"github.com/mholt/archives"
) )
const unknownsLabelerTaskName = "unknowns-labeler" const unknownsLabelerTaskName = "unknowns-labeler"
@ -57,9 +57,10 @@ func (c unknownsLabelerTask) finalize(resolver file.Resolver, s *sbom.SBOM) {
} }
if c.IncludeUnexpandedArchives { if c.IncludeUnexpandedArchives {
ctx := context.Background()
for coords := range s.Artifacts.FileMetadata { for coords := range s.Artifacts.FileMetadata {
unarchiver, notArchiveErr := archiver.ByExtension(coords.RealPath) format, _, notArchiveErr := archives.Identify(ctx, coords.RealPath, nil)
if unarchiver != nil && notArchiveErr == nil && !hasPackageReference(coords) { if format != nil && notArchiveErr == nil && !hasPackageReference(coords) {
s.Artifacts.Unknowns[coords] = append(s.Artifacts.Unknowns[coords], "archive not cataloged") s.Artifacts.Unknowns[coords] = append(s.Artifacts.Unknowns[coords], "archive not cataloged")
} }
} }

View File

@ -1,16 +1,17 @@
package model package model
import ( import (
"context"
"fmt" "fmt"
"strings" "strings"
"time" "time"
"github.com/anchore/archiver/v3"
"github.com/anchore/packageurl-go" "github.com/anchore/packageurl-go"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/sbom"
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
"github.com/mholt/archives"
) )
// ToGithubModel converts the provided SBOM to a GitHub dependency model // ToGithubModel converts the provided SBOM to a GitHub dependency model
@ -145,8 +146,8 @@ func trimRelative(s string) string {
// isArchive returns true if the path appears to be an archive // isArchive returns true if the path appears to be an archive
func isArchive(path string) bool { func isArchive(path string) bool {
_, err := archiver.ByExtension(path) format, _, err := archives.Identify(context.Background(), path, nil)
return err == nil return err == nil && format != nil
} }
func toDependencies(s *sbom.SBOM, p pkg.Package) (out []string) { func toDependencies(s *sbom.SBOM, p pkg.Package) (out []string) {

View File

@ -110,7 +110,7 @@ func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, d
return nil, cleanupFn, fmt.Errorf("unable to process java archive: %w", err) return nil, cleanupFn, fmt.Errorf("unable to process java archive: %w", err)
} }
fileManifest, err := intFile.NewZipFileManifest(archivePath) fileManifest, err := intFile.NewZipFileManifest(ctx, archivePath)
if err != nil { if err != nil {
return nil, cleanupFn, fmt.Errorf("unable to read files from java archive: %w", err) return nil, cleanupFn, fmt.Errorf("unable to read files from java archive: %w", err)
} }
@ -228,7 +228,7 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
} }
// fetch the manifest file // fetch the manifest file
contents, err := intFile.ContentsFromZip(j.archivePath, manifestMatches...) contents, err := intFile.ContentsFromZip(ctx, j.archivePath, manifestMatches...)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to extract java manifests (%s): %w", j.location, err) return nil, fmt.Errorf("unable to extract java manifests (%s): %w", j.location, err)
} }
@ -387,8 +387,8 @@ func (j *archiveParser) discoverMainPackageFromPomInfo(ctx context.Context) (gro
var pomProperties pkg.JavaPomProperties var pomProperties pkg.JavaPomProperties
// Find the pom.properties/pom.xml if the names seem like a plausible match // Find the pom.properties/pom.xml if the names seem like a plausible match
properties, _ := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob)) properties, _ := pomPropertiesByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
projects, _ := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob)) projects, _ := pomProjectByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
// map of all the artifacts in the pom properties, in order to chek exact match with the filename // map of all the artifacts in the pom properties, in order to chek exact match with the filename
artifactsMap := make(map[string]bool) artifactsMap := make(map[string]bool)
@ -453,13 +453,13 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(ctx context.Context, paren
var pkgs []pkg.Package var pkgs []pkg.Package
// pom.properties // pom.properties
properties, err := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob)) properties, err := pomPropertiesByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
if err != nil { if err != nil {
return nil, err return nil, err
} }
// pom.xml // pom.xml
projects, err := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob)) projects, err := pomProjectByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -505,7 +505,7 @@ func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg.
} }
if len(licenseMatches) > 0 { if len(licenseMatches) > 0 {
contents, err := intFile.ContentsFromZip(j.archivePath, licenseMatches...) contents, err := intFile.ContentsFromZip(ctx, j.archivePath, licenseMatches...)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to extract java license (%s): %w", j.location, err) return nil, fmt.Errorf("unable to extract java license (%s): %w", j.location, err)
} }
@ -533,7 +533,7 @@ func (j *archiveParser) discoverPkgsFromNestedArchives(ctx context.Context, pare
// associating each discovered package to the given parent package. // associating each discovered package to the given parent package.
func discoverPkgsFromZip(ctx context.Context, location file.Location, archivePath, contentPath string, fileManifest intFile.ZipFileManifest, parentPkg *pkg.Package, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) { func discoverPkgsFromZip(ctx context.Context, location file.Location, archivePath, contentPath string, fileManifest intFile.ZipFileManifest, parentPkg *pkg.Package, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) {
// search and parse pom.properties files & fetch the contents // search and parse pom.properties files & fetch the contents
openers, err := intFile.ExtractFromZipToUniqueTempFile(archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...) openers, err := intFile.ExtractFromZipToUniqueTempFile(ctx, archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("unable to extract files from zip: %w", err) return nil, nil, fmt.Errorf("unable to extract files from zip: %w", err)
} }
@ -597,8 +597,8 @@ func discoverPkgsFromOpener(ctx context.Context, location file.Location, pathWit
return nestedPkgs, nestedRelationships, nil return nestedPkgs, nestedRelationships, nil
} }
func pomPropertiesByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]pkg.JavaPomProperties, error) { func pomPropertiesByParentPath(ctx context.Context, archivePath string, location file.Location, extractPaths []string) (map[string]pkg.JavaPomProperties, error) {
contentsOfMavenPropertiesFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...) contentsOfMavenPropertiesFiles, err := intFile.ContentsFromZip(ctx, archivePath, extractPaths...)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to extract maven files: %w", err) return nil, fmt.Errorf("unable to extract maven files: %w", err)
} }
@ -626,8 +626,8 @@ func pomPropertiesByParentPath(archivePath string, location file.Location, extra
return propertiesByParentPath, nil return propertiesByParentPath, nil
} }
func pomProjectByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]*parsedPomProject, error) { func pomProjectByParentPath(ctx context.Context, archivePath string, location file.Location, extractPaths []string) (map[string]*parsedPomProject, error) {
contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...) contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(ctx, archivePath, extractPaths...)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to extract maven files: %w", err) return nil, fmt.Errorf("unable to extract maven files: %w", err)
} }

View File

@ -70,7 +70,7 @@ func (gtp genericTarWrappedJavaArchiveParser) parseTarWrappedJavaArchive(ctx con
} }
func discoverPkgsFromTar(ctx context.Context, location file.Location, archivePath, contentPath string, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) { func discoverPkgsFromTar(ctx context.Context, location file.Location, archivePath, contentPath string, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) {
openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(archivePath, contentPath, archiveFormatGlobs...) openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(ctx, archivePath, contentPath, archiveFormatGlobs...)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("unable to extract files from tar: %w", err) return nil, nil, fmt.Errorf("unable to extract files from tar: %w", err)
} }

View File

@ -41,7 +41,7 @@ func (gzp genericZipWrappedJavaArchiveParser) parseZipWrappedJavaArchive(ctx con
// functions support zips with shell scripts prepended to the file. Specifically, the helpers use the central // functions support zips with shell scripts prepended to the file. Specifically, the helpers use the central
// header at the end of the file to determine where the beginning of the zip payload is (unlike the standard lib // header at the end of the file to determine where the beginning of the zip payload is (unlike the standard lib
// or archiver). // or archiver).
fileManifest, err := intFile.NewZipFileManifest(archivePath) fileManifest, err := intFile.NewZipFileManifest(ctx, archivePath)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("unable to read files from java archive: %w", err) return nil, nil, fmt.Errorf("unable to read files from java archive: %w", err)
} }

View File

@ -4,6 +4,7 @@ import (
"context" "context"
"crypto" "crypto"
"fmt" "fmt"
"io"
"os" "os"
"path" "path"
"path/filepath" "path/filepath"
@ -11,7 +12,6 @@ import (
"github.com/opencontainers/go-digest" "github.com/opencontainers/go-digest"
"github.com/anchore/archiver/v3"
stereoFile "github.com/anchore/stereoscope/pkg/file" stereoFile "github.com/anchore/stereoscope/pkg/file"
intFile "github.com/anchore/syft/internal/file" intFile "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
@ -21,6 +21,7 @@ import (
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
"github.com/anchore/syft/syft/source/directorysource" "github.com/anchore/syft/syft/source/directorysource"
"github.com/anchore/syft/syft/source/internal" "github.com/anchore/syft/syft/source/internal"
"github.com/mholt/archives"
) )
var _ source.Source = (*fileSource)(nil) var _ source.Source = (*fileSource)(nil)
@ -223,15 +224,8 @@ func fileAnalysisPath(path string, skipExtractArchive bool) (string, func() erro
// if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and // if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and
// use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is // use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is
// unarchived. // unarchived.
envelopedUnarchiver, err := archiver.ByExtension(path) envelopedUnarchiver, _, err := archives.Identify(context.Background(), path, nil)
if unarchiver, ok := envelopedUnarchiver.(archiver.Unarchiver); err == nil && ok { if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok {
if tar, ok := unarchiver.(*archiver.Tar); ok {
// when tar files are extracted, if there are multiple entries at the same
// location, the last entry wins
// NOTE: this currently does not display any messages if an overwrite happens
tar.OverwriteExisting = true
}
analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver) analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver)
if err != nil { if err != nil {
return "", nil, fmt.Errorf("unable to unarchive source file: %w", err) return "", nil, fmt.Errorf("unable to unarchive source file: %w", err)
@ -256,15 +250,52 @@ func digestOfFileContents(path string) string {
return di.String() return di.String()
} }
func unarchiveToTmp(path string, unarchiver archiver.Unarchiver) (string, func() error, error) { func unarchiveToTmp(path string, unarchiver archives.Extractor) (string, func() error, error) {
archive, err := os.Open(path)
if err != nil {
fmt.Errorf("unable to open archive: %v", err)
}
defer archive.Close()
tempDir, err := os.MkdirTemp("", "syft-archive-contents-") tempDir, err := os.MkdirTemp("", "syft-archive-contents-")
if err != nil { if err != nil {
return "", func() error { return nil }, fmt.Errorf("unable to create tempdir for archive processing: %w", err) return "", func() error { return nil }, fmt.Errorf("unable to create tempdir for archive processing: %w", err)
} }
cleanupFn := func() error { visitor := func(ctx context.Context, file archives.FileInfo) error {
return os.RemoveAll(tempDir) destPath := filepath.Join(tempDir, file.NameInArchive)
if file.IsDir() {
return os.MkdirAll(destPath, file.Mode())
}
if err := os.MkdirAll(filepath.Dir(destPath), os.ModeDir|0755); err != nil {
return fmt.Errorf("failed to create parent directory: %w", err)
}
rc, err := file.Open()
if err != nil {
return fmt.Errorf("failed to open file in archive: %w", err)
}
defer rc.Close()
destFile, err := os.Create(destPath)
if err != nil {
return fmt.Errorf("failed to create file in destination: %w", err)
}
defer destFile.Close()
if err := destFile.Chmod(file.Mode()); err != nil {
return fmt.Errorf("failed to change mode of destination file: %w", err)
}
if _, err := io.Copy(destFile, rc); err != nil {
return fmt.Errorf("failed to copy file contents: %w", err)
}
return nil
} }
return tempDir, cleanupFn, unarchiver.Unarchive(path, tempDir) return tempDir, func() error {
return os.RemoveAll(tempDir)
}, unarchiver.Extract(context.Background(), archive, visitor)
} }