chore: migrate syft to use mholt/archives instead of anchore fork (#4029)

---------
Signed-off-by: Kudryavcev Nikolay <kydry.nikolau@gmail.com>
Signed-off-by: Christopher Phillips <spiffcs@users.noreply.github.com>
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Kudryavcev Nikolay 2025-11-14 02:04:43 +03:00 committed by GitHub
parent 4a60c41f38
commit 89842bd2f6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 720 additions and 415 deletions

View File

@ -106,8 +106,8 @@ syft <image> -o <format>
Where the `formats` available are: Where the `formats` available are:
- `syft-json`: Use this to get as much information out of Syft as possible! - `syft-json`: Use this to get as much information out of Syft as possible!
- `syft-text`: A row-oriented, human-and-machine-friendly output. - `syft-text`: A row-oriented, human-and-machine-friendly output.
- `cyclonedx-xml`: A XML report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/). - `cyclonedx-xml`: An XML report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/).
- `cyclonedx-xml@1.5`: A XML report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/). - `cyclonedx-xml@1.5`: An XML report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/).
- `cyclonedx-json`: A JSON report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/). - `cyclonedx-json`: A JSON report conforming to the [CycloneDX 1.6 specification](https://cyclonedx.org/specification/overview/).
- `cyclonedx-json@1.5`: A JSON report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/). - `cyclonedx-json@1.5`: A JSON report conforming to the [CycloneDX 1.5 specification](https://cyclonedx.org/specification/overview/).
- `spdx-tag-value`: A tag-value formatted report conforming to the [SPDX 2.3 specification](https://spdx.github.io/spdx-spec/v2.3/). - `spdx-tag-value`: A tag-value formatted report conforming to the [SPDX 2.3 specification](https://spdx.github.io/spdx-spec/v2.3/).

9
go.mod
View File

@ -11,7 +11,6 @@ require (
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d
github.com/acobaugh/osrelease v0.1.0 github.com/acobaugh/osrelease v0.1.0
github.com/adrg/xdg v0.5.3 github.com/adrg/xdg v0.5.3
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9 github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9
github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716 github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716
github.com/anchore/fangs v0.0.0-20250319222917-446a1e748ec2 github.com/anchore/fangs v0.0.0-20250319222917-446a1e748ec2
@ -168,7 +167,6 @@ require (
github.com/goccy/go-yaml v1.18.0 github.com/goccy/go-yaml v1.18.0
github.com/gogo/protobuf v1.3.2 // indirect github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e // indirect github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e // indirect
github.com/google/s2a-go v0.1.8 // indirect github.com/google/s2a-go v0.1.8 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
@ -209,10 +207,6 @@ require (
github.com/muesli/cancelreader v0.2.2 // indirect github.com/muesli/cancelreader v0.2.2 // indirect
github.com/muesli/termenv v0.16.0 // indirect github.com/muesli/termenv v0.16.0 // indirect
github.com/ncruces/go-strftime v0.1.9 // indirect github.com/ncruces/go-strftime v0.1.9 // indirect
github.com/nwaples/rardecode v1.1.3 // indirect
github.com/nwaples/rardecode/v2 v2.2.0 // indirect
github.com/olekukonko/errors v1.1.0 // indirect
github.com/olekukonko/ll v0.1.2 // indirect
github.com/opencontainers/image-spec v1.1.1 // indirect github.com/opencontainers/image-spec v1.1.1 // indirect
github.com/opencontainers/runtime-spec v1.1.0 // indirect github.com/opencontainers/runtime-spec v1.1.0 // indirect
github.com/opencontainers/selinux v1.13.0 // indirect github.com/opencontainers/selinux v1.13.0 // indirect
@ -319,7 +313,10 @@ require (
github.com/json-iterator/go v1.1.12 // indirect github.com/json-iterator/go v1.1.12 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/nwaples/rardecode/v2 v2.2.0 // indirect
github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 // indirect github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 // indirect
github.com/olekukonko/errors v1.1.0 // indirect
github.com/olekukonko/ll v0.1.2 // indirect
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect
gonum.org/v1/gonum v0.15.1 // indirect gonum.org/v1/gonum v0.15.1 // indirect
) )

6
go.sum
View File

@ -110,8 +110,6 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51 h1:yhk+P8lF3ZiROjmaVRao9WGTRo4b/wYjoKEiAHWrKwc=
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51/go.mod h1:nwuGSd7aZp0rtYt79YggCGafz1RYsclE7pi3fhLwvuw=
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9 h1:p0ZIe0htYOX284Y4axJaGBvXHU0VCCzLN5Wf5XbKStU= github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9 h1:p0ZIe0htYOX284Y4axJaGBvXHU0VCCzLN5Wf5XbKStU=
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9/go.mod h1:3ZsFB9tzW3vl4gEiUeuSOMDnwroWxIxJelOOHUp8dSw= github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9/go.mod h1:3ZsFB9tzW3vl4gEiUeuSOMDnwroWxIxJelOOHUp8dSw=
github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716 h1:2sIdYJlQESEnyk3Y0WD2vXWW5eD2iMz9Ev8fj1Z8LNA= github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716 h1:2sIdYJlQESEnyk3Y0WD2vXWW5eD2iMz9Ev8fj1Z8LNA=
@ -479,8 +477,6 @@ github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
@ -755,8 +751,6 @@ github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1 h1:kpt9ZfKcm+
github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1/go.mod h1:qgCw4bBKZX8qMgGeEZzGFVT3notl42dBjNqO2jut0M0= github.com/nix-community/go-nix v0.0.0-20250101154619-4bdde671e0a1/go.mod h1:qgCw4bBKZX8qMgGeEZzGFVT3notl42dBjNqO2jut0M0=
github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249 h1:NHrXEjTNQY7P0Zfx1aMrNhpgxHmow66XQtm0aQLY0AE= github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249 h1:NHrXEjTNQY7P0Zfx1aMrNhpgxHmow66XQtm0aQLY0AE=
github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8= github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8=
github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc=
github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
github.com/nwaples/rardecode/v2 v2.2.0 h1:4ufPGHiNe1rYJxYfehALLjup4Ls3ck42CWwjKiOqu0A= github.com/nwaples/rardecode/v2 v2.2.0 h1:4ufPGHiNe1rYJxYfehALLjup4Ls3ck42CWwjKiOqu0A=
github.com/nwaples/rardecode/v2 v2.2.0/go.mod h1:7uz379lSxPe6j9nvzxUZ+n7mnJNgjsRNb6IbvGVHRmw= github.com/nwaples/rardecode/v2 v2.2.0/go.mod h1:7uz379lSxPe6j9nvzxUZ+n7mnJNgjsRNb6IbvGVHRmw=
github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 h1:zrbMGy9YXpIeTnGj4EljqMiZsIcE09mmF8XsD5AYOJc= github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 h1:zrbMGy9YXpIeTnGj4EljqMiZsIcE09mmF8XsD5AYOJc=

View File

@ -1,17 +1,40 @@
package file package file
import ( import (
"context"
"fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
"github.com/bmatcuk/doublestar/v4" "github.com/bmatcuk/doublestar/v4"
"github.com/mholt/archives"
"github.com/anchore/archiver/v3" "github.com/anchore/syft/internal"
) )
// TraverseFilesInTar enumerates all paths stored within a tar archive using the visitor pattern.
func TraverseFilesInTar(ctx context.Context, archivePath string, visitor archives.FileHandler) error {
tarReader, err := os.Open(archivePath)
if err != nil {
return fmt.Errorf("unable to open tar archive (%s): %w", archivePath, err)
}
defer internal.CloseAndLogError(tarReader, archivePath)
format, _, err := archives.Identify(ctx, archivePath, nil)
if err != nil {
return fmt.Errorf("failed to identify tar compression format: %w", err)
}
extractor, ok := format.(archives.Extractor)
if !ok {
return fmt.Errorf("file format does not support extraction: %s", archivePath)
}
return extractor.Extract(ctx, tarReader, visitor)
}
// ExtractGlobsFromTarToUniqueTempFile extracts paths matching the given globs within the given archive to a temporary directory, returning file openers for each file extracted. // ExtractGlobsFromTarToUniqueTempFile extracts paths matching the given globs within the given archive to a temporary directory, returning file openers for each file extracted.
func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...string) (map[string]Opener, error) { func ExtractGlobsFromTarToUniqueTempFile(ctx context.Context, archivePath, dir string, globs ...string) (map[string]Opener, error) {
results := make(map[string]Opener) results := make(map[string]Opener)
// don't allow for full traversal, only select traversal from given paths // don't allow for full traversal, only select traversal from given paths
@ -19,9 +42,7 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin
return results, nil return results, nil
} }
visitor := func(file archiver.File) error { visitor := func(_ context.Context, file archives.FileInfo) error {
defer file.Close()
// ignore directories // ignore directories
if file.IsDir() { if file.IsDir() {
return nil return nil
@ -43,7 +64,13 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin
// provides a ReadCloser. It is up to the caller to handle closing the file explicitly. // provides a ReadCloser. It is up to the caller to handle closing the file explicitly.
defer tempFile.Close() defer tempFile.Close()
if err := safeCopy(tempFile, file.ReadCloser); err != nil { packedFile, err := file.Open()
if err != nil {
return fmt.Errorf("unable to read file=%q from tar=%q: %w", file.NameInArchive, archivePath, err)
}
defer internal.CloseAndLogError(packedFile, archivePath)
if err := safeCopy(tempFile, packedFile); err != nil {
return fmt.Errorf("unable to copy source=%q for tar=%q: %w", file.Name(), archivePath, err) return fmt.Errorf("unable to copy source=%q for tar=%q: %w", file.Name(), archivePath, err)
} }
@ -52,7 +79,7 @@ func ExtractGlobsFromTarToUniqueTempFile(archivePath, dir string, globs ...strin
return nil return nil
} }
return results, archiver.Walk(archivePath, visitor) return results, TraverseFilesInTar(ctx, archivePath, visitor)
} }
func matchesAnyGlob(name string, globs ...string) bool { func matchesAnyGlob(name string, globs ...string) bool {

View File

@ -1,10 +1,12 @@
package file package file
import ( import (
"context"
"os" "os"
"sort" "sort"
"strings" "strings"
"github.com/mholt/archives"
"github.com/scylladb/go-set/strset" "github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
@ -14,22 +16,25 @@ import (
type ZipFileManifest map[string]os.FileInfo type ZipFileManifest map[string]os.FileInfo
// NewZipFileManifest creates and returns a new ZipFileManifest populated with path and metadata from the given zip archive path. // NewZipFileManifest creates and returns a new ZipFileManifest populated with path and metadata from the given zip archive path.
func NewZipFileManifest(archivePath string) (ZipFileManifest, error) { func NewZipFileManifest(ctx context.Context, archivePath string) (ZipFileManifest, error) {
zipReader, err := OpenZip(archivePath) zipReader, err := os.Open(archivePath)
manifest := make(ZipFileManifest) manifest := make(ZipFileManifest)
if err != nil { if err != nil {
log.Debugf("unable to open zip archive (%s): %v", archivePath, err) log.Debugf("unable to open zip archive (%s): %v", archivePath, err)
return manifest, err return manifest, err
} }
defer func() { defer func() {
err = zipReader.Close() if err = zipReader.Close(); err != nil {
if err != nil {
log.Debugf("unable to close zip archive (%s): %+v", archivePath, err) log.Debugf("unable to close zip archive (%s): %+v", archivePath, err)
} }
}() }()
for _, file := range zipReader.File { err = archives.Zip{}.Extract(ctx, zipReader, func(_ context.Context, file archives.FileInfo) error {
manifest.Add(file.Name, file.FileInfo()) manifest.Add(file.NameInArchive, file.FileInfo)
return nil
})
if err != nil {
return manifest, err
} }
return manifest, nil return manifest, nil
} }

View File

@ -4,6 +4,7 @@
package file package file
import ( import (
"context"
"encoding/json" "encoding/json"
"os" "os"
"path" "path"
@ -24,7 +25,7 @@ func TestNewZipFileManifest(t *testing.T) {
archiveFilePath := setupZipFileTest(t, sourceDirPath, false) archiveFilePath := setupZipFileTest(t, sourceDirPath, false)
actual, err := NewZipFileManifest(archiveFilePath) actual, err := NewZipFileManifest(context.Background(), archiveFilePath)
if err != nil { if err != nil {
t.Fatalf("unable to extract from unzip archive: %+v", err) t.Fatalf("unable to extract from unzip archive: %+v", err)
} }
@ -59,7 +60,7 @@ func TestNewZip64FileManifest(t *testing.T) {
sourceDirPath := path.Join(cwd, "test-fixtures", "zip-source") sourceDirPath := path.Join(cwd, "test-fixtures", "zip-source")
archiveFilePath := setupZipFileTest(t, sourceDirPath, true) archiveFilePath := setupZipFileTest(t, sourceDirPath, true)
actual, err := NewZipFileManifest(archiveFilePath) actual, err := NewZipFileManifest(context.Background(), archiveFilePath)
if err != nil { if err != nil {
t.Fatalf("unable to extract from unzip archive: %+v", err) t.Fatalf("unable to extract from unzip archive: %+v", err)
} }
@ -99,7 +100,7 @@ func TestZipFileManifest_GlobMatch(t *testing.T) {
archiveFilePath := setupZipFileTest(t, sourceDirPath, false) archiveFilePath := setupZipFileTest(t, sourceDirPath, false)
z, err := NewZipFileManifest(archiveFilePath) z, err := NewZipFileManifest(context.Background(), archiveFilePath)
if err != nil { if err != nil {
t.Fatalf("unable to extract from unzip archive: %+v", err) t.Fatalf("unable to extract from unzip archive: %+v", err)
} }

View File

@ -1,13 +1,15 @@
package file package file
import ( import (
"archive/zip"
"bytes" "bytes"
"context"
"fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
"strings" "strings"
"github.com/mholt/archives"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
) )
@ -25,7 +27,7 @@ type errZipSlipDetected struct {
} }
func (e *errZipSlipDetected) Error() string { func (e *errZipSlipDetected) Error() string {
return fmt.Sprintf("paths are not allowed to resolve outside of the root prefix (%q). Destination: %q", e.Prefix, e.JoinArgs) return fmt.Sprintf("path traversal detected: paths are not allowed to resolve outside of the root prefix (%q). Destination: %q", e.Prefix, e.JoinArgs)
} }
type zipTraversalRequest map[string]struct{} type zipTraversalRequest map[string]struct{}
@ -39,38 +41,34 @@ func newZipTraverseRequest(paths ...string) zipTraversalRequest {
} }
// TraverseFilesInZip enumerates all paths stored within a zip archive using the visitor pattern. // TraverseFilesInZip enumerates all paths stored within a zip archive using the visitor pattern.
func TraverseFilesInZip(archivePath string, visitor func(*zip.File) error, paths ...string) error { func TraverseFilesInZip(ctx context.Context, archivePath string, visitor archives.FileHandler, paths ...string) error {
request := newZipTraverseRequest(paths...) request := newZipTraverseRequest(paths...)
zipReader, err := OpenZip(archivePath) zipReader, err := os.Open(archivePath)
if err != nil { if err != nil {
return fmt.Errorf("unable to open zip archive (%s): %w", archivePath, err) return fmt.Errorf("unable to open zip archive (%s): %w", archivePath, err)
} }
defer func() { defer func() {
err = zipReader.Close() if err := zipReader.Close(); err != nil {
if err != nil {
log.Errorf("unable to close zip archive (%s): %+v", archivePath, err) log.Errorf("unable to close zip archive (%s): %+v", archivePath, err)
} }
}() }()
for _, file := range zipReader.File { return archives.Zip{}.Extract(ctx, zipReader, func(ctx context.Context, file archives.FileInfo) error {
// if no paths are given then assume that all files should be traversed // if no paths are given then assume that all files should be traversed
if len(paths) > 0 { if len(paths) > 0 {
if _, ok := request[file.Name]; !ok { if _, ok := request[file.NameInArchive]; !ok {
// this file path is not of interest // this file path is not of interest
continue return nil
} }
} }
if err = visitor(file); err != nil { return visitor(ctx, file)
return err })
}
}
return nil
} }
// ExtractFromZipToUniqueTempFile extracts select paths for the given archive to a temporary directory, returning file openers for each file extracted. // ExtractFromZipToUniqueTempFile extracts select paths for the given archive to a temporary directory, returning file openers for each file extracted.
func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (map[string]Opener, error) { func ExtractFromZipToUniqueTempFile(ctx context.Context, archivePath, dir string, paths ...string) (map[string]Opener, error) {
results := make(map[string]Opener) results := make(map[string]Opener)
// don't allow for full traversal, only select traversal from given paths // don't allow for full traversal, only select traversal from given paths
@ -78,9 +76,8 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m
return results, nil return results, nil
} }
visitor := func(file *zip.File) error { visitor := func(_ context.Context, file archives.FileInfo) error {
tempfilePrefix := filepath.Base(filepath.Clean(file.Name)) + "-" tempfilePrefix := filepath.Base(filepath.Clean(file.NameInArchive)) + "-"
tempFile, err := os.CreateTemp(dir, tempfilePrefix) tempFile, err := os.CreateTemp(dir, tempfilePrefix)
if err != nil { if err != nil {
return fmt.Errorf("unable to create temp file: %w", err) return fmt.Errorf("unable to create temp file: %w", err)
@ -92,33 +89,32 @@ func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (m
zippedFile, err := file.Open() zippedFile, err := file.Open()
if err != nil { if err != nil {
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err) return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
} }
defer func() { defer func() {
err := zippedFile.Close() if err := zippedFile.Close(); err != nil {
if err != nil { log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.Name, archivePath, err)
} }
}() }()
if file.FileInfo().IsDir() { if file.IsDir() {
return fmt.Errorf("unable to extract directories, only files: %s", file.Name) return fmt.Errorf("unable to extract directories, only files: %s", file.NameInArchive)
} }
if err := safeCopy(tempFile, zippedFile); err != nil { if err := safeCopy(tempFile, zippedFile); err != nil {
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err) return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.NameInArchive, archivePath, err)
} }
results[file.Name] = Opener{path: tempFile.Name()} results[file.NameInArchive] = Opener{path: tempFile.Name()}
return nil return nil
} }
return results, TraverseFilesInZip(archivePath, visitor, paths...) return results, TraverseFilesInZip(ctx, archivePath, visitor, paths...)
} }
// ContentsFromZip extracts select paths for the given archive and returns a set of string contents for each path. // ContentsFromZip extracts select paths for the given archive and returns a set of string contents for each path.
func ContentsFromZip(archivePath string, paths ...string) (map[string]string, error) { func ContentsFromZip(ctx context.Context, archivePath string, paths ...string) (map[string]string, error) {
results := make(map[string]string) results := make(map[string]string)
// don't allow for full traversal, only select traversal from given paths // don't allow for full traversal, only select traversal from given paths
@ -126,37 +122,38 @@ func ContentsFromZip(archivePath string, paths ...string) (map[string]string, er
return results, nil return results, nil
} }
visitor := func(file *zip.File) error { visitor := func(_ context.Context, file archives.FileInfo) error {
zippedFile, err := file.Open() zippedFile, err := file.Open()
if err != nil { if err != nil {
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err) return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
} }
defer func() {
if err := zippedFile.Close(); err != nil {
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
}
}()
if file.FileInfo().IsDir() { if file.IsDir() {
return fmt.Errorf("unable to extract directories, only files: %s", file.Name) return fmt.Errorf("unable to extract directories, only files: %s", file.NameInArchive)
} }
var buffer bytes.Buffer var buffer bytes.Buffer
if err := safeCopy(&buffer, zippedFile); err != nil { if err := safeCopy(&buffer, zippedFile); err != nil {
return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err) return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.NameInArchive, archivePath, err)
} }
results[file.Name] = buffer.String() results[file.NameInArchive] = buffer.String()
err = zippedFile.Close()
if err != nil {
return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err)
}
return nil return nil
} }
return results, TraverseFilesInZip(archivePath, visitor, paths...) return results, TraverseFilesInZip(ctx, archivePath, visitor, paths...)
} }
// UnzipToDir extracts a zip archive to a target directory. // UnzipToDir extracts a zip archive to a target directory.
func UnzipToDir(archivePath, targetDir string) error { func UnzipToDir(ctx context.Context, archivePath, targetDir string) error {
visitor := func(file *zip.File) error { visitor := func(_ context.Context, file archives.FileInfo) error {
joinedPath, err := safeJoin(targetDir, file.Name) joinedPath, err := SafeJoin(targetDir, file.NameInArchive)
if err != nil { if err != nil {
return err return err
} }
@ -164,11 +161,11 @@ func UnzipToDir(archivePath, targetDir string) error {
return extractSingleFile(file, joinedPath, archivePath) return extractSingleFile(file, joinedPath, archivePath)
} }
return TraverseFilesInZip(archivePath, visitor) return TraverseFilesInZip(ctx, archivePath, visitor)
} }
// safeJoin ensures that any destinations do not resolve to a path above the prefix path. // SafeJoin ensures that any destinations do not resolve to a path above the prefix path.
func safeJoin(prefix string, dest ...string) (string, error) { func SafeJoin(prefix string, dest ...string) (string, error) {
joinResult := filepath.Join(append([]string{prefix}, dest...)...) joinResult := filepath.Join(append([]string{prefix}, dest...)...)
cleanJoinResult := filepath.Clean(joinResult) cleanJoinResult := filepath.Clean(joinResult)
if !strings.HasPrefix(cleanJoinResult, filepath.Clean(prefix)) { if !strings.HasPrefix(cleanJoinResult, filepath.Clean(prefix)) {
@ -181,13 +178,18 @@ func safeJoin(prefix string, dest ...string) (string, error) {
return joinResult, nil return joinResult, nil
} }
func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) error { func extractSingleFile(file archives.FileInfo, expandedFilePath, archivePath string) error {
zippedFile, err := file.Open() zippedFile, err := file.Open()
if err != nil { if err != nil {
return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err) return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
} }
defer func() {
if err := zippedFile.Close(); err != nil {
log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
}
}()
if file.FileInfo().IsDir() { if file.IsDir() {
err = os.MkdirAll(expandedFilePath, file.Mode()) err = os.MkdirAll(expandedFilePath, file.Mode())
if err != nil { if err != nil {
return fmt.Errorf("unable to create dir=%q from zip=%q: %w", expandedFilePath, archivePath, err) return fmt.Errorf("unable to create dir=%q from zip=%q: %w", expandedFilePath, archivePath, err)
@ -202,20 +204,16 @@ func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) err
if err != nil { if err != nil {
return fmt.Errorf("unable to create dest file=%q from zip=%q: %w", expandedFilePath, archivePath, err) return fmt.Errorf("unable to create dest file=%q from zip=%q: %w", expandedFilePath, archivePath, err)
} }
defer func() {
if err := outputFile.Close(); err != nil {
log.Errorf("unable to close dest file=%q from zip=%q: %+v", outputFile.Name(), archivePath, err)
}
}()
if err := safeCopy(outputFile, zippedFile); err != nil { if err := safeCopy(outputFile, zippedFile); err != nil {
return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.Name, outputFile.Name(), archivePath, err) return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.NameInArchive, outputFile.Name(), archivePath, err)
}
err = outputFile.Close()
if err != nil {
return fmt.Errorf("unable to close dest file=%q from zip=%q: %w", outputFile.Name(), archivePath, err)
} }
} }
err = zippedFile.Close()
if err != nil {
return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err)
}
return nil return nil
} }

View File

@ -4,6 +4,8 @@
package file package file
import ( import (
"archive/zip"
"context"
"crypto/sha256" "crypto/sha256"
"encoding/json" "encoding/json"
"errors" "errors"
@ -17,6 +19,7 @@ import (
"github.com/go-test/deep" "github.com/go-test/deep"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
) )
func equal(r1, r2 io.Reader) (bool, error) { func equal(r1, r2 io.Reader) (bool, error) {
@ -55,7 +58,7 @@ func TestUnzipToDir(t *testing.T) {
expectedPaths := len(expectedZipArchiveEntries) expectedPaths := len(expectedZipArchiveEntries)
observedPaths := 0 observedPaths := 0
err = UnzipToDir(archiveFilePath, unzipDestinationDir) err = UnzipToDir(context.Background(), archiveFilePath, unzipDestinationDir)
if err != nil { if err != nil {
t.Fatalf("unable to unzip archive: %+v", err) t.Fatalf("unable to unzip archive: %+v", err)
} }
@ -145,7 +148,7 @@ func TestContentsFromZip(t *testing.T) {
paths = append(paths, p) paths = append(paths, p)
} }
actual, err := ContentsFromZip(archivePath, paths...) actual, err := ContentsFromZip(context.Background(), archivePath, paths...)
if err != nil { if err != nil {
t.Fatalf("unable to extract from unzip archive: %+v", err) t.Fatalf("unable to extract from unzip archive: %+v", err)
} }
@ -307,9 +310,528 @@ func TestSafeJoin(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(fmt.Sprintf("%+v:%+v", test.prefix, test.args), func(t *testing.T) { t.Run(fmt.Sprintf("%+v:%+v", test.prefix, test.args), func(t *testing.T) {
actual, err := safeJoin(test.prefix, test.args...) actual, err := SafeJoin(test.prefix, test.args...)
test.errAssertion(t, err) test.errAssertion(t, err)
assert.Equal(t, test.expected, actual) assert.Equal(t, test.expected, actual)
}) })
} }
} }
// TestSymlinkProtection demonstrates that SafeJoin protects against symlink-based
// directory traversal attacks by validating that archive entry paths cannot escape
// the extraction directory.
func TestSafeJoin_SymlinkProtection(t *testing.T) {
tests := []struct {
name string
archivePath string // Path as it would appear in the archive
expectError bool
description string
}{
{
name: "path traversal via ../",
archivePath: "../../../outside/file.txt",
expectError: true,
description: "Archive entry with ../ trying to escape extraction dir",
},
{
name: "absolute path symlink target",
archivePath: "../../../sensitive.txt",
expectError: true,
description: "Simulates symlink pointing outside via relative path",
},
{
name: "safe relative path within extraction dir",
archivePath: "subdir/safe.txt",
expectError: false,
description: "Normal file path that stays within extraction directory",
},
{
name: "safe path with internal ../",
archivePath: "dir1/../dir2/file.txt",
expectError: false,
description: "Path with ../ that still resolves within extraction dir",
},
{
name: "deeply nested traversal",
archivePath: "../../../../../../tmp/evil.txt",
expectError: true,
description: "Multiple levels of ../ trying to escape",
},
{
name: "single parent directory escape",
archivePath: "../",
expectError: true,
description: "Simple one-level escape attempt",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create temp directories to simulate extraction scenario
tmpDir := t.TempDir()
extractDir := filepath.Join(tmpDir, "extract")
outsideDir := filepath.Join(tmpDir, "outside")
require.NoError(t, os.MkdirAll(extractDir, 0755))
require.NoError(t, os.MkdirAll(outsideDir, 0755))
// Create a file outside extraction dir that an attacker might target
outsideFile := filepath.Join(outsideDir, "sensitive.txt")
require.NoError(t, os.WriteFile(outsideFile, []byte("sensitive data"), 0644))
// Test SafeJoin - this is what happens when processing archive entries
result, err := SafeJoin(extractDir, tt.archivePath)
if tt.expectError {
// Should block malicious paths
require.Error(t, err, "Expected SafeJoin to reject malicious path")
var zipSlipErr *errZipSlipDetected
assert.ErrorAs(t, err, &zipSlipErr, "Error should be errZipSlipDetected type")
assert.Empty(t, result, "Result should be empty for blocked paths")
} else {
// Should allow safe paths
require.NoError(t, err, "Expected SafeJoin to allow safe path")
assert.NotEmpty(t, result, "Result should not be empty for safe paths")
assert.True(t, strings.HasPrefix(filepath.Clean(result), filepath.Clean(extractDir)),
"Safe path should resolve within extraction directory")
}
})
}
}
// TestUnzipToDir_SymlinkAttacks tests UnzipToDir function with malicious ZIP archives
// containing symlink entries that attempt path traversal attacks.
//
// EXPECTED BEHAVIOR: UnzipToDir should either:
// 1. Detect and reject symlinks explicitly with a security error, OR
// 2. Extract them safely (library converts symlinks to regular files)
func TestUnzipToDir_SymlinkAttacks(t *testing.T) {
tests := []struct {
name string
symlinkName string
fileName string
errContains string
}{
{
name: "direct symlink to outside directory",
symlinkName: "evil_link",
fileName: "evil_link/payload.txt",
errContains: "not a directory", // attempt to write through symlink leaf (which is not a directory)
},
{
name: "directory symlink attack",
symlinkName: "safe_dir/link",
fileName: "safe_dir/link/payload.txt",
errContains: "not a directory", // attempt to write through symlink (which is not a directory)
},
{
name: "symlink without payload file",
symlinkName: "standalone_link",
fileName: "", // no payload file
errContains: "", // no error expected, symlink without payload is safe
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tempDir := t.TempDir()
// create outside target directory
outsideDir := filepath.Join(tempDir, "outside_target")
require.NoError(t, os.MkdirAll(outsideDir, 0755))
// create extraction directory
extractDir := filepath.Join(tempDir, "extract")
require.NoError(t, os.MkdirAll(extractDir, 0755))
maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, outsideDir, tt.fileName)
err := UnzipToDir(context.Background(), maliciousZip, extractDir)
// check error expectations
if tt.errContains != "" {
require.Error(t, err)
require.Contains(t, err.Error(), tt.errContains)
} else {
require.NoError(t, err)
}
analyzeExtractionDirectory(t, extractDir)
// check if payload file escaped extraction directory
if tt.fileName != "" {
maliciousFile := filepath.Join(outsideDir, filepath.Base(tt.fileName))
checkFileOutsideExtraction(t, maliciousFile)
}
// check if symlink was created pointing outside
symlinkPath := filepath.Join(extractDir, tt.symlinkName)
checkSymlinkCreation(t, symlinkPath, extractDir, outsideDir)
})
}
}
// TestContentsFromZip_SymlinkAttacks tests the ContentsFromZip function with malicious
// ZIP archives containing symlink entries.
//
// EXPECTED BEHAVIOR: ContentsFromZip should either:
// 1. Reject symlinks explicitly, OR
// 2. Return empty content for symlinks (library behavior)
//
// Though ContentsFromZip doesn't write to disk, but if symlinks are followed, it could read sensitive
// files from outside the archive.
func TestContentsFromZip_SymlinkAttacks(t *testing.T) {
tests := []struct {
name string
symlinkName string
symlinkTarget string
requestPath string
errContains string
}{
{
name: "request symlink entry directly",
symlinkName: "evil_link",
symlinkTarget: "/etc/hosts", // attempt to read sensitive file
requestPath: "evil_link",
errContains: "", // no error expected - library returns symlink metadata
},
{
name: "symlink in nested directory",
symlinkName: "nested/link",
symlinkTarget: "/etc/hosts",
requestPath: "nested/link",
errContains: "", // no error expected - library returns symlink metadata
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tempDir := t.TempDir()
// create malicious ZIP with symlink entry (no payload file needed)
maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, tt.symlinkTarget, "")
contents, err := ContentsFromZip(context.Background(), maliciousZip, tt.requestPath)
// check error expectations
if tt.errContains != "" {
require.Error(t, err)
require.Contains(t, err.Error(), tt.errContains)
return
}
require.NoError(t, err)
// verify symlink handling - library should return symlink target as content (metadata)
content, found := contents[tt.requestPath]
require.True(t, found, "symlink entry should be found in results")
// verify symlink was NOT followed (content should be target path or empty)
if content != "" && content != tt.symlinkTarget {
// content is not empty and not the symlink target - check if actual file was read
if _, statErr := os.Stat(tt.symlinkTarget); statErr == nil {
targetContent, readErr := os.ReadFile(tt.symlinkTarget)
if readErr == nil && string(targetContent) == content {
t.Errorf("critical issue!... symlink was FOLLOWED and external file content was read!")
t.Logf(" symlink: %s → %s", tt.requestPath, tt.symlinkTarget)
t.Logf(" content length: %d bytes", len(content))
}
}
}
})
}
}
// TestExtractFromZipToUniqueTempFile_SymlinkAttacks tests the ExtractFromZipToUniqueTempFile
// function with malicious ZIP archives containing symlink entries.
//
// EXPECTED BEHAVIOR: ExtractFromZipToUniqueTempFile should either:
// 1. Reject symlinks explicitly, OR
// 2. Extract them safely (library converts to empty files, filepath.Base sanitizes names)
//
// This function uses filepath.Base() on the archive entry name for temp file prefix and
// os.CreateTemp() which creates files in the specified directory, so it should be protected.
func TestExtractFromZipToUniqueTempFile_SymlinkAttacks(t *testing.T) {
tests := []struct {
name string
symlinkName string
symlinkTarget string
requestPath string
errContains string
}{
{
name: "extract symlink entry to temp file",
symlinkName: "evil_link",
symlinkTarget: "/etc/passwd",
requestPath: "evil_link",
errContains: "", // no error expected - library extracts symlink metadata
},
{
name: "extract nested symlink",
symlinkName: "nested/dir/link",
symlinkTarget: "/tmp/outside",
requestPath: "nested/dir/link",
errContains: "", // no error expected
},
{
name: "extract path traversal symlink name",
symlinkName: "../../escape",
symlinkTarget: "/tmp/outside",
requestPath: "../../escape",
errContains: "", // no error expected - filepath.Base sanitizes name
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tempDir := t.TempDir()
maliciousZip := createMaliciousZipWithSymlink(t, tempDir, tt.symlinkName, tt.symlinkTarget, "")
// create temp directory for extraction
extractTempDir := filepath.Join(tempDir, "temp_extract")
require.NoError(t, os.MkdirAll(extractTempDir, 0755))
openers, err := ExtractFromZipToUniqueTempFile(context.Background(), maliciousZip, extractTempDir, tt.requestPath)
// check error expectations
if tt.errContains != "" {
require.Error(t, err)
require.Contains(t, err.Error(), tt.errContains)
return
}
require.NoError(t, err)
// verify symlink was extracted
opener, found := openers[tt.requestPath]
require.True(t, found, "symlink entry should be extracted")
// verify temp file is within temp directory
tempFilePath := opener.path
cleanTempDir := filepath.Clean(extractTempDir)
cleanTempFile := filepath.Clean(tempFilePath)
require.True(t, strings.HasPrefix(cleanTempFile, cleanTempDir),
"temp file must be within temp directory: %s not in %s", cleanTempFile, cleanTempDir)
// verify symlink was NOT followed (content should be target path or empty)
f, openErr := opener.Open()
require.NoError(t, openErr)
defer f.Close()
content, readErr := io.ReadAll(f)
require.NoError(t, readErr)
// check if symlink was followed (content matches actual file)
if len(content) > 0 && string(content) != tt.symlinkTarget {
if _, statErr := os.Stat(tt.symlinkTarget); statErr == nil {
targetContent, readErr := os.ReadFile(tt.symlinkTarget)
if readErr == nil && string(targetContent) == string(content) {
t.Errorf("critical issue!... symlink was FOLLOWED and external file content was copied!")
t.Logf(" symlink: %s → %s", tt.requestPath, tt.symlinkTarget)
t.Logf(" content length: %d bytes", len(content))
}
}
}
})
}
}
// forensicFindings contains the results of analyzing an extraction directory
type forensicFindings struct {
symlinksFound []forensicSymlink
regularFiles []string
directories []string
symlinkVulnerabilities []string
}
type forensicSymlink struct {
path string
target string
escapesExtraction bool
resolvedPath string
}
// analyzeExtractionDirectory walks the extraction directory and detects symlinks that point
// outside the extraction directory. It is silent unless vulnerabilities are found.
func analyzeExtractionDirectory(t *testing.T, extractDir string) forensicFindings {
t.Helper()
findings := forensicFindings{}
filepath.Walk(extractDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
// only log if there's an error walking the directory
t.Logf("Error walking %s: %v", path, err)
return nil
}
relPath := strings.TrimPrefix(path, extractDir+"/")
if relPath == "" {
relPath = "."
}
// use Lstat to detect symlinks without following them
linfo, lerr := os.Lstat(path)
if lerr == nil && linfo.Mode()&os.ModeSymlink != 0 {
target, _ := os.Readlink(path)
// resolve to see where it actually points
var resolvedPath string
var escapesExtraction bool
if filepath.IsAbs(target) {
// absolute symlink
resolvedPath = target
cleanExtractDir := filepath.Clean(extractDir)
escapesExtraction = !strings.HasPrefix(filepath.Clean(target), cleanExtractDir)
if escapesExtraction {
t.Errorf("critical issue!... absolute symlink created: %s → %s", relPath, target)
t.Logf(" this symlink points outside the extraction directory")
findings.symlinkVulnerabilities = append(findings.symlinkVulnerabilities,
fmt.Sprintf("absolute symlink: %s → %s", relPath, target))
}
} else {
// relative symlink - resolve it
resolvedPath = filepath.Join(filepath.Dir(path), target)
cleanResolved := filepath.Clean(resolvedPath)
cleanExtractDir := filepath.Clean(extractDir)
escapesExtraction = !strings.HasPrefix(cleanResolved, cleanExtractDir)
if escapesExtraction {
t.Errorf("critical issue!... symlink escapes extraction dir: %s → %s", relPath, target)
t.Logf(" symlink resolves to: %s (outside extraction directory)", cleanResolved)
findings.symlinkVulnerabilities = append(findings.symlinkVulnerabilities,
fmt.Sprintf("relative symlink escape: %s → %s (resolves to %s)", relPath, target, cleanResolved))
}
}
findings.symlinksFound = append(findings.symlinksFound, forensicSymlink{
path: relPath,
target: target,
escapesExtraction: escapesExtraction,
resolvedPath: resolvedPath,
})
} else {
// regular file or directory - collect silently
if info.IsDir() {
findings.directories = append(findings.directories, relPath)
} else {
findings.regularFiles = append(findings.regularFiles, relPath)
}
}
return nil
})
return findings
}
// checkFileOutsideExtraction checks if a file was written outside the extraction directory.
// Returns true if the file exists (vulnerability), false otherwise. Silent on success.
func checkFileOutsideExtraction(t *testing.T, filePath string) bool {
t.Helper()
if stat, err := os.Stat(filePath); err == nil {
content, _ := os.ReadFile(filePath)
t.Errorf("critical issue!... file written OUTSIDE extraction directory!")
t.Logf(" location: %s", filePath)
t.Logf(" size: %d bytes", stat.Size())
t.Logf(" content: %s", string(content))
t.Logf(" ...this means an attacker can write files to arbitrary locations on the filesystem")
return true
}
// no file found outside extraction directory...
return false
}
// checkSymlinkCreation verifies if a symlink was created at the expected path and reports
// whether it points outside the extraction directory. Silent unless a symlink is found.
func checkSymlinkCreation(t *testing.T, symlinkPath, extractDir, expectedTarget string) bool {
t.Helper()
if linfo, err := os.Lstat(symlinkPath); err == nil {
if linfo.Mode()&os.ModeSymlink != 0 {
target, _ := os.Readlink(symlinkPath)
if expectedTarget != "" && target == expectedTarget {
t.Errorf("critical issue!... symlink pointing outside extraction dir was created!")
t.Logf(" Symlink: %s → %s", symlinkPath, target)
return true
}
// Check if it escapes even if target doesn't match expected
if filepath.IsAbs(target) {
cleanExtractDir := filepath.Clean(extractDir)
if !strings.HasPrefix(filepath.Clean(target), cleanExtractDir) {
t.Errorf("critical issue!... absolute symlink escapes extraction dir!")
t.Logf(" symlink: %s → %s", symlinkPath, target)
return true
}
}
}
// if it exists but is not a symlink, that's good (attack was thwarted)...
}
return false
}
// createMaliciousZipWithSymlink creates a ZIP archive containing a symlink entry pointing to an arbitrary target,
// followed by a file entry that attempts to write through that symlink.
// returns the path to the created ZIP archive.
func createMaliciousZipWithSymlink(t *testing.T, tempDir, symlinkName, symlinkTarget, fileName string) string {
t.Helper()
maliciousZip := filepath.Join(tempDir, "malicious.zip")
zipFile, err := os.Create(maliciousZip)
require.NoError(t, err)
defer zipFile.Close()
zw := zip.NewWriter(zipFile)
// create parent directories if the symlink is nested
if dir := filepath.Dir(symlinkName); dir != "." {
dirHeader := &zip.FileHeader{
Name: dir + "/",
Method: zip.Store,
}
dirHeader.SetMode(os.ModeDir | 0755)
_, err = zw.CreateHeader(dirHeader)
require.NoError(t, err)
}
// create symlink entry pointing outside extraction directory
// note: ZIP format stores symlinks as regular files with the target path as content
symlinkHeader := &zip.FileHeader{
Name: symlinkName,
Method: zip.Store,
}
symlinkHeader.SetMode(os.ModeSymlink | 0755)
symlinkWriter, err := zw.CreateHeader(symlinkHeader)
require.NoError(t, err)
// write the symlink target as the file content (this is how ZIP stores symlinks)
_, err = symlinkWriter.Write([]byte(symlinkTarget))
require.NoError(t, err)
// create file entry that will be written through the symlink
if fileName != "" {
payloadContent := []byte("MALICIOUS PAYLOAD - This should NOT be written outside extraction dir!")
payloadHeader := &zip.FileHeader{
Name: fileName,
Method: zip.Deflate,
}
payloadHeader.SetMode(0644)
payloadWriter, err := zw.CreateHeader(payloadHeader)
require.NoError(t, err)
_, err = payloadWriter.Write(payloadContent)
require.NoError(t, err)
}
require.NoError(t, zw.Close())
require.NoError(t, zipFile.Close())
return maliciousZip
}

View File

@ -1,229 +0,0 @@
package file
import (
"archive/zip"
"encoding/binary"
"errors"
"fmt"
"io"
"math"
"os"
"github.com/anchore/syft/internal/log"
)
// directoryEndLen, readByf, directoryEnd, and findSignatureInBlock were copied from the golang stdlib, specifically:
// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/struct.go
// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/reader.go
// findArchiveStartOffset is derived from the same stdlib utils, specifically the readDirectoryEnd function.
const (
directoryEndLen = 22
directory64LocLen = 20
directory64EndLen = 56
directory64LocSignature = 0x07064b50
directory64EndSignature = 0x06064b50
)
// ZipReadCloser is a drop-in replacement for zip.ReadCloser (from zip.OpenReader) that additionally considers zips
// that have bytes prefixed to the front of the archive (common with self-extracting jars).
type ZipReadCloser struct {
*zip.Reader
io.Closer
}
// OpenZip provides a ZipReadCloser for the given filepath.
func OpenZip(filepath string) (*ZipReadCloser, error) {
f, err := os.Open(filepath)
if err != nil {
return nil, err
}
fi, err := f.Stat()
if err != nil {
f.Close()
return nil, err
}
// some archives may have bytes prepended to the front of the archive, such as with self executing JARs. We first
// need to find the start of the archive and keep track of this offset.
offset, err := findArchiveStartOffset(f, fi.Size())
if err != nil {
log.Debugf("cannot find beginning of zip archive=%q : %v", filepath, err)
return nil, err
}
if _, err := f.Seek(0, io.SeekStart); err != nil {
return nil, fmt.Errorf("unable to seek to beginning of archive: %w", err)
}
if offset > math.MaxInt64 {
return nil, fmt.Errorf("archive start offset too large: %v", offset)
}
offset64 := int64(offset)
size := fi.Size() - offset64
r, err := zip.NewReader(io.NewSectionReader(f, offset64, size), size)
if err != nil {
log.Debugf("unable to open ZipReadCloser @ %q: %v", filepath, err)
return nil, err
}
return &ZipReadCloser{
Reader: r,
Closer: f,
}, nil
}
type readBuf []byte
func (b *readBuf) uint16() uint16 {
v := binary.LittleEndian.Uint16(*b)
*b = (*b)[2:]
return v
}
func (b *readBuf) uint32() uint32 {
v := binary.LittleEndian.Uint32(*b)
*b = (*b)[4:]
return v
}
func (b *readBuf) uint64() uint64 {
v := binary.LittleEndian.Uint64(*b)
*b = (*b)[8:]
return v
}
type directoryEnd struct {
diskNbr uint32 // unused
dirDiskNbr uint32 // unused
dirRecordsThisDisk uint64 // unused
directoryRecords uint64
directorySize uint64
directoryOffset uint64 // relative to file
}
// note: this is derived from readDirectoryEnd within the archive/zip package
func findArchiveStartOffset(r io.ReaderAt, size int64) (startOfArchive uint64, err error) {
// look for directoryEndSignature in the last 1k, then in the last 65k
var buf []byte
var directoryEndOffset int64
for i, bLen := range []int64{1024, 65 * 1024} {
if bLen > size {
bLen = size
}
buf = make([]byte, int(bLen))
if _, err := r.ReadAt(buf, size-bLen); err != nil && !errors.Is(err, io.EOF) {
return 0, err
}
if p := findSignatureInBlock(buf); p >= 0 {
buf = buf[p:]
directoryEndOffset = size - bLen + int64(p)
break
}
if i == 1 || bLen == size {
return 0, zip.ErrFormat
}
}
if buf == nil {
// we were unable to find the directoryEndSignature block
return 0, zip.ErrFormat
}
// read header into struct
b := readBuf(buf[4:]) // skip signature
d := &directoryEnd{
diskNbr: uint32(b.uint16()),
dirDiskNbr: uint32(b.uint16()),
dirRecordsThisDisk: uint64(b.uint16()),
directoryRecords: uint64(b.uint16()),
directorySize: uint64(b.uint32()),
directoryOffset: uint64(b.uint32()),
}
// Calculate where the zip data actually begins
// These values mean that the file can be a zip64 file
if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff {
p, err := findDirectory64End(r, directoryEndOffset)
if err == nil && p >= 0 {
directoryEndOffset = p
err = readDirectory64End(r, p, d)
}
if err != nil {
return 0, err
}
}
startOfArchive = uint64(directoryEndOffset) - d.directorySize - d.directoryOffset
// Make sure directoryOffset points to somewhere in our file.
if d.directoryOffset >= uint64(size) {
return 0, zip.ErrFormat
}
return startOfArchive, nil
}
// findDirectory64End tries to read the zip64 locator just before the
// directory end and returns the offset of the zip64 directory end if
// found.
func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) {
locOffset := directoryEndOffset - directory64LocLen
if locOffset < 0 {
return -1, nil // no need to look for a header outside the file
}
buf := make([]byte, directory64LocLen)
if _, err := r.ReadAt(buf, locOffset); err != nil {
return -1, err
}
b := readBuf(buf)
if sig := b.uint32(); sig != directory64LocSignature {
return -1, nil
}
if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory
return -1, nil // the file is not a valid zip64-file
}
p := b.uint64() // relative offset of the zip64 end of central directory record
if b.uint32() != 1 { // total number of disks
return -1, nil // the file is not a valid zip64-file
}
return int64(p), nil
}
// readDirectory64End reads the zip64 directory end and updates the
// directory end with the zip64 directory end values.
func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) {
buf := make([]byte, directory64EndLen)
if _, err := r.ReadAt(buf, offset); err != nil {
return err
}
b := readBuf(buf)
if sig := b.uint32(); sig != directory64EndSignature {
return errors.New("could not read directory64End")
}
b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16)
d.diskNbr = b.uint32() // number of this disk
d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory
d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk
d.directoryRecords = b.uint64() // total number of entries in the central directory
d.directorySize = b.uint64() // size of the central directory
d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number
return nil
}
func findSignatureInBlock(b []byte) int {
for i := len(b) - directoryEndLen; i >= 0; i-- {
// defined from directoryEndSignature
if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
// n is length of comment
n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8
if n+directoryEndLen+i <= len(b) {
return i
}
}
}
return -1
}

View File

@ -1,50 +0,0 @@
//go:build !windows
// +build !windows
package file
import (
"os"
"testing"
"github.com/stretchr/testify/assert"
)
func TestFindArchiveStartOffset(t *testing.T) {
tests := []struct {
name string
archivePrep func(tb testing.TB) string
expected uint64
}{
{
name: "standard, non-nested zip",
archivePrep: prepZipSourceFixture,
expected: 0,
},
{
name: "zip with prepended bytes",
archivePrep: prependZipSourceFixtureWithString(t, "junk at the beginning of the file..."),
expected: 36,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
archivePath := test.archivePrep(t)
f, err := os.Open(archivePath)
if err != nil {
t.Fatalf("could not open archive %q: %+v", archivePath, err)
}
fi, err := os.Stat(f.Name())
if err != nil {
t.Fatalf("unable to stat archive: %+v", err)
}
actual, err := findArchiveStartOffset(f, fi.Size())
if err != nil {
t.Fatalf("unable to find offset: %+v", err)
}
assert.Equal(t, test.expected, actual)
})
}
}

View File

@ -4,7 +4,8 @@ import (
"context" "context"
"strings" "strings"
"github.com/anchore/archiver/v3" "github.com/mholt/archives"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/sbomsync" "github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/syft/cataloging" "github.com/anchore/syft/syft/cataloging"
@ -57,9 +58,10 @@ func (c unknownsLabelerTask) finalize(resolver file.Resolver, s *sbom.SBOM) {
} }
if c.IncludeUnexpandedArchives { if c.IncludeUnexpandedArchives {
ctx := context.Background()
for coords := range s.Artifacts.FileMetadata { for coords := range s.Artifacts.FileMetadata {
unarchiver, notArchiveErr := archiver.ByExtension(coords.RealPath) format, _, notArchiveErr := archives.Identify(ctx, coords.RealPath, nil)
if unarchiver != nil && notArchiveErr == nil && !hasPackageReference(coords) { if format != nil && notArchiveErr == nil && !hasPackageReference(coords) {
s.Artifacts.Unknowns[coords] = append(s.Artifacts.Unknowns[coords], "archive not cataloged") s.Artifacts.Unknowns[coords] = append(s.Artifacts.Unknowns[coords], "archive not cataloged")
} }
} }

View File

@ -1,11 +1,13 @@
package model package model
import ( import (
"context"
"fmt" "fmt"
"strings" "strings"
"time" "time"
"github.com/anchore/archiver/v3" "github.com/mholt/archives"
"github.com/anchore/packageurl-go" "github.com/anchore/packageurl-go"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
@ -153,8 +155,8 @@ func trimRelative(s string) string {
// isArchive returns true if the path appears to be an archive // isArchive returns true if the path appears to be an archive
func isArchive(path string) bool { func isArchive(path string) bool {
_, err := archiver.ByExtension(path) format, _, err := archives.Identify(context.Background(), path, nil)
return err == nil return err == nil && format != nil
} }
func toDependencies(s *sbom.SBOM, p pkg.Package) (out []string) { func toDependencies(s *sbom.SBOM, p pkg.Package) (out []string) {

View File

@ -80,7 +80,7 @@ func (gap genericArchiveParserAdapter) parseJavaArchive(ctx context.Context, _ f
// processJavaArchive processes an archive for java contents, returning all Java libraries and nested archives // processJavaArchive processes an archive for java contents, returning all Java libraries and nested archives
func (gap genericArchiveParserAdapter) processJavaArchive(ctx context.Context, reader file.LocationReadCloser, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) { func (gap genericArchiveParserAdapter) processJavaArchive(ctx context.Context, reader file.LocationReadCloser, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) {
parser, cleanupFn, err := newJavaArchiveParser(reader, true, gap.cfg) parser, cleanupFn, err := newJavaArchiveParser(ctx, reader, true, gap.cfg)
// note: even on error, we should always run cleanup functions // note: even on error, we should always run cleanup functions
defer cleanupFn() defer cleanupFn()
if err != nil { if err != nil {
@ -99,7 +99,7 @@ func uniquePkgKey(groupID string, p *pkg.Package) string {
// newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover // newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover
// and parse nested archives or ignore them. // and parse nested archives or ignore them.
func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) { func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) {
// fetch the last element of the virtual path // fetch the last element of the virtual path
virtualElements := strings.Split(reader.Path(), ":") virtualElements := strings.Split(reader.Path(), ":")
currentFilepath := virtualElements[len(virtualElements)-1] currentFilepath := virtualElements[len(virtualElements)-1]
@ -109,7 +109,7 @@ func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg
return nil, cleanupFn, fmt.Errorf("unable to process java archive: %w", err) return nil, cleanupFn, fmt.Errorf("unable to process java archive: %w", err)
} }
fileManifest, err := intFile.NewZipFileManifest(archivePath) fileManifest, err := intFile.NewZipFileManifest(ctx, archivePath)
if err != nil { if err != nil {
return nil, cleanupFn, fmt.Errorf("unable to read files from java archive: %w", err) return nil, cleanupFn, fmt.Errorf("unable to read files from java archive: %w", err)
} }
@ -226,7 +226,7 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
} }
// fetch the manifest file // fetch the manifest file
contents, err := intFile.ContentsFromZip(j.archivePath, manifestMatches...) contents, err := intFile.ContentsFromZip(ctx, j.archivePath, manifestMatches...)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to extract java manifests (%s): %w", j.location, err) return nil, fmt.Errorf("unable to extract java manifests (%s): %w", j.location, err)
} }
@ -387,8 +387,9 @@ type parsedPomProject struct {
// discoverMainPackageFromPomInfo attempts to resolve maven groupId, artifactId, version and other info from found pom information // discoverMainPackageFromPomInfo attempts to resolve maven groupId, artifactId, version and other info from found pom information
func (j *archiveParser) discoverMainPackageFromPomInfo(ctx context.Context) (group, name, version string, parsedPom *parsedPomProject) { func (j *archiveParser) discoverMainPackageFromPomInfo(ctx context.Context) (group, name, version string, parsedPom *parsedPomProject) {
properties, _ := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob)) // Find the pom.properties/pom.xml if the names seem like a plausible match
projects, _ := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob)) properties, _ := pomPropertiesByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
projects, _ := pomProjectByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
artifactsMap := j.buildArtifactsMap(properties) artifactsMap := j.buildArtifactsMap(properties)
pomProperties, parsedPom := j.findBestPomMatch(properties, projects, artifactsMap) pomProperties, parsedPom := j.findBestPomMatch(properties, projects, artifactsMap)
@ -519,13 +520,13 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(ctx context.Context, paren
var pkgs []pkg.Package var pkgs []pkg.Package
// pom.properties // pom.properties
properties, err := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob)) properties, err := pomPropertiesByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob))
if err != nil { if err != nil {
return nil, err return nil, err
} }
// pom.xml // pom.xml
projects, err := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob)) projects, err := pomProjectByParentPath(ctx, j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob))
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -575,7 +576,7 @@ func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg.
} }
if len(licenseMatches) > 0 { if len(licenseMatches) > 0 {
contents, err := intFile.ContentsFromZip(j.archivePath, licenseMatches...) contents, err := intFile.ContentsFromZip(ctx, j.archivePath, licenseMatches...)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to extract java license (%s): %w", j.location, err) return nil, fmt.Errorf("unable to extract java license (%s): %w", j.location, err)
} }
@ -616,7 +617,7 @@ func (j *archiveParser) discoverPkgsFromNestedArchives(ctx context.Context, pare
// associating each discovered package to the given parent package. // associating each discovered package to the given parent package.
func discoverPkgsFromZip(ctx context.Context, location file.Location, archivePath, contentPath string, fileManifest intFile.ZipFileManifest, parentPkg *pkg.Package, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) { func discoverPkgsFromZip(ctx context.Context, location file.Location, archivePath, contentPath string, fileManifest intFile.ZipFileManifest, parentPkg *pkg.Package, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) {
// search and parse pom.properties files & fetch the contents // search and parse pom.properties files & fetch the contents
openers, err := intFile.ExtractFromZipToUniqueTempFile(archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...) openers, err := intFile.ExtractFromZipToUniqueTempFile(ctx, archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("unable to extract files from zip: %w", err) return nil, nil, fmt.Errorf("unable to extract files from zip: %w", err)
} }
@ -680,8 +681,8 @@ func discoverPkgsFromOpener(ctx context.Context, location file.Location, pathWit
return nestedPkgs, nestedRelationships, nil return nestedPkgs, nestedRelationships, nil
} }
func pomPropertiesByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]pkg.JavaPomProperties, error) { func pomPropertiesByParentPath(ctx context.Context, archivePath string, location file.Location, extractPaths []string) (map[string]pkg.JavaPomProperties, error) {
contentsOfMavenPropertiesFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...) contentsOfMavenPropertiesFiles, err := intFile.ContentsFromZip(ctx, archivePath, extractPaths...)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to extract maven files: %w", err) return nil, fmt.Errorf("unable to extract maven files: %w", err)
} }
@ -709,8 +710,8 @@ func pomPropertiesByParentPath(archivePath string, location file.Location, extra
return propertiesByParentPath, nil return propertiesByParentPath, nil
} }
func pomProjectByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]*parsedPomProject, error) { func pomProjectByParentPath(ctx context.Context, archivePath string, location file.Location, extractPaths []string) (map[string]*parsedPomProject, error) {
contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...) contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(ctx, archivePath, extractPaths...)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to extract maven files: %w", err) return nil, fmt.Errorf("unable to extract maven files: %w", err)
} }

View File

@ -72,7 +72,7 @@ func TestSearchMavenForLicenses(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
// setup parser // setup parser
ap, cleanupFn, err := newJavaArchiveParser( ap, cleanupFn, err := newJavaArchiveParser(context.Background(),
file.LocationReadCloser{ file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()), Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture, ReadCloser: fixture,
@ -372,7 +372,7 @@ func TestParseJar(t *testing.T) {
UseNetwork: false, UseNetwork: false,
UseMavenLocalRepository: false, UseMavenLocalRepository: false,
} }
parser, cleanupFn, err := newJavaArchiveParser( parser, cleanupFn, err := newJavaArchiveParser(context.Background(),
file.LocationReadCloser{ file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()), Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture, ReadCloser: fixture,
@ -1499,7 +1499,7 @@ func Test_deterministicMatchingPomProperties(t *testing.T) {
fixture, err := os.Open(fixturePath) fixture, err := os.Open(fixturePath)
require.NoError(t, err) require.NoError(t, err)
parser, cleanupFn, err := newJavaArchiveParser( parser, cleanupFn, err := newJavaArchiveParser(context.Background(),
file.LocationReadCloser{ file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()), Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture, ReadCloser: fixture,
@ -1636,7 +1636,7 @@ func Test_jarPomPropertyResolutionDoesNotPanic(t *testing.T) {
ctx := context.TODO() ctx := context.TODO()
// setup parser // setup parser
ap, cleanupFn, err := newJavaArchiveParser( ap, cleanupFn, err := newJavaArchiveParser(context.Background(),
file.LocationReadCloser{ file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()), Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture, ReadCloser: fixture,

View File

@ -70,7 +70,7 @@ func (gtp genericTarWrappedJavaArchiveParser) parseTarWrappedJavaArchive(ctx con
} }
func discoverPkgsFromTar(ctx context.Context, location file.Location, archivePath, contentPath string, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) { func discoverPkgsFromTar(ctx context.Context, location file.Location, archivePath, contentPath string, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) {
openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(archivePath, contentPath, archiveFormatGlobs...) openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(ctx, archivePath, contentPath, archiveFormatGlobs...)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("unable to extract files from tar: %w", err) return nil, nil, fmt.Errorf("unable to extract files from tar: %w", err)
} }

View File

@ -41,7 +41,7 @@ func (gzp genericZipWrappedJavaArchiveParser) parseZipWrappedJavaArchive(ctx con
// functions support zips with shell scripts prepended to the file. Specifically, the helpers use the central // functions support zips with shell scripts prepended to the file. Specifically, the helpers use the central
// header at the end of the file to determine where the beginning of the zip payload is (unlike the standard lib // header at the end of the file to determine where the beginning of the zip payload is (unlike the standard lib
// or archiver). // or archiver).
fileManifest, err := intFile.NewZipFileManifest(archivePath) fileManifest, err := intFile.NewZipFileManifest(ctx, archivePath)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("unable to read files from java archive: %w", err) return nil, nil, fmt.Errorf("unable to read files from java archive: %w", err)
} }

View File

@ -4,13 +4,15 @@ import (
"context" "context"
"crypto" "crypto"
"fmt" "fmt"
"io"
"os" "os"
"path" "path"
"path/filepath"
"sync" "sync"
"github.com/mholt/archives"
"github.com/opencontainers/go-digest" "github.com/opencontainers/go-digest"
"github.com/anchore/archiver/v3"
stereoFile "github.com/anchore/stereoscope/pkg/file" stereoFile "github.com/anchore/stereoscope/pkg/file"
intFile "github.com/anchore/syft/internal/file" intFile "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
@ -208,18 +210,8 @@ func fileAnalysisPath(path string, skipExtractArchive bool) (string, func() erro
// if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and // if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and
// use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is // use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is
// unarchived. // unarchived.
envelopedUnarchiver, err := archiver.ByExtension(path) envelopedUnarchiver, _, err := archives.Identify(context.Background(), path, nil)
if unarchiver, ok := envelopedUnarchiver.(archiver.Unarchiver); err == nil && ok { if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok {
// when tar/zip files are extracted, if there are multiple entries at the same
// location, the last entry wins
// NOTE: this currently does not display any messages if an overwrite happens
switch v := unarchiver.(type) {
case *archiver.Tar:
v.OverwriteExisting = true
case *archiver.Zip:
v.OverwriteExisting = true
}
analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver) analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver)
if err != nil { if err != nil {
return "", nil, fmt.Errorf("unable to unarchive source file: %w", err) return "", nil, fmt.Errorf("unable to unarchive source file: %w", err)
@ -246,15 +238,58 @@ func digestOfFileContents(path string) string {
return di.String() return di.String()
} }
func unarchiveToTmp(path string, unarchiver archiver.Unarchiver) (string, func() error, error) { func unarchiveToTmp(path string, unarchiver archives.Extractor) (string, func() error, error) {
var cleanupFn = func() error { return nil }
archive, err := os.Open(path)
if err != nil {
return "", cleanupFn, fmt.Errorf("unable to open archive: %v", err)
}
defer archive.Close()
tempDir, err := os.MkdirTemp("", "syft-archive-contents-") tempDir, err := os.MkdirTemp("", "syft-archive-contents-")
if err != nil { if err != nil {
return "", func() error { return nil }, fmt.Errorf("unable to create tempdir for archive processing: %w", err) return "", cleanupFn, fmt.Errorf("unable to create tempdir for archive processing: %w", err)
} }
cleanupFn := func() error { visitor := func(_ context.Context, file archives.FileInfo) error {
// Protect against symlink attacks by ensuring path doesn't escape tempDir
destPath, err := intFile.SafeJoin(tempDir, file.NameInArchive)
if err != nil {
return err
}
if file.IsDir() {
return os.MkdirAll(destPath, file.Mode())
}
if err = os.MkdirAll(filepath.Dir(destPath), os.ModeDir|0755); err != nil {
return fmt.Errorf("failed to create parent directory: %w", err)
}
rc, err := file.Open()
if err != nil {
return fmt.Errorf("failed to open file in archive: %w", err)
}
defer rc.Close()
destFile, err := os.Create(destPath)
if err != nil {
return fmt.Errorf("failed to create file in destination: %w", err)
}
defer destFile.Close()
if err := destFile.Chmod(file.Mode()); err != nil {
return fmt.Errorf("failed to change mode of destination file: %w", err)
}
if _, err := io.Copy(destFile, rc); err != nil {
return fmt.Errorf("failed to copy file contents: %w", err)
}
return nil
}
return tempDir, func() error {
return os.RemoveAll(tempDir) return os.RemoveAll(tempDir)
} }, unarchiver.Extract(context.Background(), archive, visitor)
return tempDir, cleanupFn, unarchiver.Unarchive(path, tempDir)
} }