update zip_read_closer to incorporate zip64 support (#1041)

This commit is contained in:
Christopher Angelo Phillips 2022-06-16 10:43:18 -04:00 committed by GitHub
parent e72d68b0c6
commit 9e72771b85
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 130 additions and 13 deletions

View File

@ -3,5 +3,10 @@ set -eux
# $1 —— absolute path to destination file, should end with .zip, ideally
# $2 —— absolute path to directory from which to add entries to the archive
# $3 —— if files should be zip64 or not
if [[$3]]; then
pushd "$2" && find . -print | zip -fz "$1" -@ && popd
else
pushd "$2" && find . -print | zip "$1" -@ && popd
fi

View File

@ -20,16 +20,20 @@ var expectedZipArchiveEntries = []string{
}
// createZipArchive creates a new ZIP archive file at destinationArchivePath based on the directory found at
// sourceDirPath.
func createZipArchive(t testing.TB, sourceDirPath, destinationArchivePath string) {
// sourceDirPath. It forces a zip64 archive if zip64 is "0".
func createZipArchive(t testing.TB, sourceDirPath, destinationArchivePath string, zip64 bool) {
t.Helper()
cwd, err := os.Getwd()
if err != nil {
t.Fatalf("unable to get cwd: %+v", err)
}
zip64Arg := "0"
if zip64 {
zip64Arg = "1"
}
cmd := exec.Command("./generate-zip-fixture-from-source-dir.sh", destinationArchivePath, path.Base(sourceDirPath))
cmd := exec.Command("./generate-zip-fixture-from-source-dir.sh", destinationArchivePath, path.Base(sourceDirPath), zip64Arg)
cmd.Dir = filepath.Join(cwd, "test-fixtures")
if err := cmd.Start(); err != nil {
@ -66,7 +70,7 @@ func assertNoError(t testing.TB, fn func() error) func() {
// which should be called (typically deferred) by the caller, the path of the created zip archive, and an error,
// which should trigger a fatal test failure in the consuming test. The returned cleanup function will never be nil
// (even if there's an error), and it should always be called.
func setupZipFileTest(t testing.TB, sourceDirPath string) string {
func setupZipFileTest(t testing.TB, sourceDirPath string, zip64 bool) string {
t.Helper()
archivePrefix, err := ioutil.TempFile("", "syft-ziputil-archive-TEST-")
@ -84,7 +88,7 @@ func setupZipFileTest(t testing.TB, sourceDirPath string) string {
destinationArchiveFilePath := archivePrefix.Name() + ".zip"
t.Logf("archive path: %s", destinationArchiveFilePath)
createZipArchive(t, sourceDirPath, destinationArchiveFilePath)
createZipArchive(t, sourceDirPath, destinationArchiveFilePath, zip64)
t.Cleanup(
assertNoError(t,
@ -109,7 +113,7 @@ func ensureNestedZipExists(t *testing.T, sourceDirPath string) error {
t.Helper()
nestedArchiveFilePath := path.Join(sourceDirPath, "nested.zip")
createZipArchive(t, sourceDirPath, nestedArchiveFilePath)
createZipArchive(t, sourceDirPath, nestedArchiveFilePath, false)
return nil
}

View File

@ -22,7 +22,42 @@ func TestNewZipFileManifest(t *testing.T) {
t.Fatal(err)
}
archiveFilePath := setupZipFileTest(t, sourceDirPath)
archiveFilePath := setupZipFileTest(t, sourceDirPath, false)
actual, err := NewZipFileManifest(archiveFilePath)
if err != nil {
t.Fatalf("unable to extract from unzip archive: %+v", err)
}
if len(expectedZipArchiveEntries) != len(actual) {
t.Fatalf("mismatched manifest: %d != %d", len(actual), len(expectedZipArchiveEntries))
}
for _, e := range expectedZipArchiveEntries {
_, ok := actual[e]
if !ok {
t.Errorf("missing path: %s", e)
}
}
if t.Failed() {
b, err := json.MarshalIndent(actual, "", " ")
if err != nil {
t.Fatalf("can't show results: %+v", err)
}
t.Errorf("full result: %s", string(b))
}
}
func TestNewZip64FileManifest(t *testing.T) {
cwd, err := os.Getwd()
if err != nil {
t.Fatal(err)
}
sourceDirPath := path.Join(cwd, "test-fixtures", "zip-source")
archiveFilePath := setupZipFileTest(t, sourceDirPath, true)
actual, err := NewZipFileManifest(archiveFilePath)
if err != nil {
@ -62,7 +97,7 @@ func TestZipFileManifest_GlobMatch(t *testing.T) {
t.Fatal(err)
}
archiveFilePath := setupZipFileTest(t, sourceDirPath)
archiveFilePath := setupZipFileTest(t, sourceDirPath, false)
z, err := NewZipFileManifest(archiveFilePath)
if err != nil {

View File

@ -47,7 +47,7 @@ func TestUnzipToDir(t *testing.T) {
goldenRootDir := filepath.Join(cwd, "test-fixtures")
sourceDirPath := path.Join(goldenRootDir, "zip-source")
archiveFilePath := setupZipFileTest(t, sourceDirPath)
archiveFilePath := setupZipFileTest(t, sourceDirPath, false)
unzipDestinationDir, err := ioutil.TempDir("", "syft-ziputil-contents-TEST-")
t.Cleanup(assertNoError(t, func() error {
@ -227,7 +227,7 @@ func prepZipSourceFixture(t testing.TB) string {
t.Logf("archive path: %s", archivePath)
createZipArchive(t, "zip-source", archivePrefix.Name())
createZipArchive(t, "zip-source", archivePrefix.Name(), false)
return archivePath
}

View File

@ -3,6 +3,7 @@ package file
import (
"archive/zip"
"encoding/binary"
"errors"
"fmt"
"io"
"os"
@ -13,7 +14,13 @@ import (
// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/reader.go
// findArchiveStartOffset is derived from the same stdlib utils, specifically the readDirectoryEnd function.
const directoryEndLen = 22
const (
directoryEndLen = 22
directory64LocLen = 20
directory64EndLen = 56
directory64LocSignature = 0x07064b50
directory64EndSignature = 0x06064b50
)
// ZipReadCloser is a drop-in replacement for zip.ReadCloser (from zip.OpenReader) that additionally considers zips
// that have bytes prefixed to the front of the archive (common with self-extracting jars).
@ -72,6 +79,12 @@ func (b *readBuf) uint32() uint32 {
return v
}
func (b *readBuf) uint64() uint64 {
v := binary.LittleEndian.Uint64(*b)
*b = (*b)[8:]
return v
}
type directoryEnd struct {
diskNbr uint32 // unused
dirDiskNbr uint32 // unused
@ -82,6 +95,7 @@ type directoryEnd struct {
}
// note: this is derived from readDirectoryEnd within the archive/zip package
// nolint:gocognit
func findArchiveStartOffset(r io.ReaderAt, size int64) (startOfArchive uint64, err error) {
// look for directoryEndSignature in the last 1k, then in the last 65k
var buf []byte
@ -120,13 +134,22 @@ func findArchiveStartOffset(r io.ReaderAt, size int64) (startOfArchive uint64, e
directoryOffset: uint64(b.uint32()),
}
// Calculate where the zip data actually begins
startOfArchive = uint64(directoryEndOffset) - d.directorySize - d.directoryOffset
// These values mean that the file can be a zip64 file
if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff {
p, err := findDirectory64End(r, directoryEndOffset)
if err == nil && p >= 0 {
directoryEndOffset = p
err = readDirectory64End(r, p, d)
}
if err != nil {
return 0, err
}
startOfArchive = 0 // Prefixed data not supported
}
startOfArchive = uint64(directoryEndOffset) - d.directorySize - d.directoryOffset
// Make sure directoryOffset points to somewhere in our file.
if o := int64(d.directoryOffset); o < 0 || o >= size {
return 0, zip.ErrFormat
@ -134,6 +157,56 @@ func findArchiveStartOffset(r io.ReaderAt, size int64) (startOfArchive uint64, e
return startOfArchive, nil
}
// findDirectory64End tries to read the zip64 locator just before the
// directory end and returns the offset of the zip64 directory end if
// found.
func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) {
locOffset := directoryEndOffset - directory64LocLen
if locOffset < 0 {
return -1, nil // no need to look for a header outside the file
}
buf := make([]byte, directory64LocLen)
if _, err := r.ReadAt(buf, locOffset); err != nil {
return -1, err
}
b := readBuf(buf)
if sig := b.uint32(); sig != directory64LocSignature {
return -1, nil
}
if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory
return -1, nil // the file is not a valid zip64-file
}
p := b.uint64() // relative offset of the zip64 end of central directory record
if b.uint32() != 1 { // total number of disks
return -1, nil // the file is not a valid zip64-file
}
return int64(p), nil
}
// readDirectory64End reads the zip64 directory end and updates the
// directory end with the zip64 directory end values.
func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) {
buf := make([]byte, directory64EndLen)
if _, err := r.ReadAt(buf, offset); err != nil {
return err
}
b := readBuf(buf)
if sig := b.uint32(); sig != directory64EndSignature {
return errors.New("could not read directory64End")
}
b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16)
d.diskNbr = b.uint32() // number of this disk
d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory
d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk
d.directoryRecords = b.uint64() // total number of entries in the central directory
d.directorySize = b.uint64() // size of the central directory
d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number
return nil
}
func findSignatureInBlock(b []byte) int {
for i := len(b) - directoryEndLen; i >= 0; i-- {
// defined from directoryEndSignature