Merge pull request #428 from anchore/self-extracting-jar

Add support for processing files with prepended bytes before the zip archive
This commit is contained in:
Alex Goodman 2021-06-04 12:50:22 -04:00 committed by GitHub
commit 801e662633
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 364 additions and 122 deletions

View File

@ -1,7 +1,6 @@
package file
import (
"fmt"
"io/ioutil"
"os"
"os/exec"
@ -9,6 +8,8 @@ import (
"path/filepath"
"syscall"
"testing"
"github.com/stretchr/testify/assert"
)
var expectedZipArchiveEntries = []string{
@ -18,35 +19,21 @@ var expectedZipArchiveEntries = []string{
"nested.zip",
}
// fatalIfError calls the supplied function. If the function returns a non-nil error, t.Fatal(err) is called.
func fatalIfError(t *testing.T, fn func() error) {
t.Helper()
if fn == nil {
return
}
err := fn()
if err != nil {
t.Fatal(err)
}
}
// createZipArchive creates a new ZIP archive file at destinationArchivePath based on the directory found at
// sourceDirPath.
func createZipArchive(t *testing.T, sourceDirPath, destinationArchivePath string) error {
func createZipArchive(t testing.TB, sourceDirPath, destinationArchivePath string) {
t.Helper()
cwd, err := os.Getwd()
if err != nil {
return fmt.Errorf("unable to get cwd: %+v", err)
t.Fatalf("unable to get cwd: %+v", err)
}
cmd := exec.Command("./generate-zip-fixture.sh", destinationArchivePath, path.Base(sourceDirPath))
cmd := exec.Command("./generate-zip-fixture-from-source-dir.sh", destinationArchivePath, path.Base(sourceDirPath))
cmd.Dir = filepath.Join(cwd, "test-fixtures")
if err := cmd.Start(); err != nil {
return fmt.Errorf("unable to start generate zip fixture script: %+v", err)
t.Fatalf("unable to start generate zip fixture script: %+v", err)
}
if err := cmd.Wait(); err != nil {
@ -59,61 +46,62 @@ func createZipArchive(t *testing.T, sourceDirPath, destinationArchivePath string
// an ExitStatus() method with the same signature.
if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
if status.ExitStatus() != 0 {
return fmt.Errorf("failed to generate fixture: rc=%d", status.ExitStatus())
t.Fatalf("failed to generate fixture: rc=%d", status.ExitStatus())
}
}
} else {
return fmt.Errorf("unable to get generate fixture script result: %+v", err)
t.Fatalf("unable to get generate fixture script result: %+v", err)
}
}
return nil
}
func assertNoError(t testing.TB, fn func() error) func() {
return func() {
assert.NoError(t, fn())
}
}
// setupZipFileTest encapsulates common test setup work for zip file tests. It returns a cleanup function,
// which should be called (typically deferred) by the caller, the path of the created zip archive, and an error,
// which should trigger a fatal test failure in the consuming test. The returned cleanup function will never be nil
// (even if there's an error), and it should always be called.
func setupZipFileTest(t *testing.T, sourceDirPath string) (func() error, string, error) {
func setupZipFileTest(t testing.TB, sourceDirPath string) string {
t.Helper()
// Keep track of any needed cleanup work as we go
var cleanupFns []func() error
cleanup := func(fns []func() error) func() error {
return func() error {
for _, fn := range fns {
err := fn()
if err != nil {
return err
}
}
return nil
}
}
archivePrefix, err := ioutil.TempFile("", "syft-ziputil-archive-TEST-")
if err != nil {
return cleanup(cleanupFns), "", fmt.Errorf("unable to create tempfile: %+v", err)
t.Fatalf("unable to create tempfile: %+v", err)
}
cleanupFns = append(cleanupFns, func() error { return os.Remove(archivePrefix.Name()) })
t.Cleanup(
assertNoError(t,
func() error {
return os.Remove(archivePrefix.Name())
},
),
)
destinationArchiveFilePath := archivePrefix.Name() + ".zip"
t.Logf("archive path: %s", destinationArchiveFilePath)
err = createZipArchive(t, sourceDirPath, destinationArchiveFilePath)
cleanupFns = append(cleanupFns, func() error { return os.Remove(destinationArchiveFilePath) })
if err != nil {
return cleanup(cleanupFns), "", err
}
createZipArchive(t, sourceDirPath, destinationArchiveFilePath)
t.Cleanup(
assertNoError(t,
func() error {
return os.Remove(destinationArchiveFilePath)
},
),
)
cwd, err := os.Getwd()
if err != nil {
return cleanup(cleanupFns), "", fmt.Errorf("unable to get cwd: %+v", err)
t.Fatalf("unable to get cwd: %+v", err)
}
t.Logf("running from: %s", cwd)
return cleanup(cleanupFns), destinationArchiveFilePath, nil
return destinationArchiveFilePath
}
// TODO: Consider moving any non-git asset generation to a task (e.g. make) that's run ahead of running go tests.
@ -121,11 +109,7 @@ func ensureNestedZipExists(t *testing.T, sourceDirPath string) error {
t.Helper()
nestedArchiveFilePath := path.Join(sourceDirPath, "nested.zip")
err := createZipArchive(t, sourceDirPath, nestedArchiveFilePath)
if err != nil {
return fmt.Errorf("unable to create nested archive for test fixture: %+v", err)
}
createZipArchive(t, sourceDirPath, nestedArchiveFilePath)
return nil
}

View File

@ -1,23 +1,36 @@
package file
import (
"archive/zip"
"fmt"
"os"
"sort"
"strings"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
)
// ZipFileManifest is a collection of paths and their file metadata.
type ZipFileManifest map[string]os.FileInfo
// newZipManifest creates an empty ZipFileManifest.
func newZipManifest() ZipFileManifest {
return make(ZipFileManifest)
// NewZipFileManifest creates and returns a new ZipFileManifest populated with path and metadata from the given zip archive path.
func NewZipFileManifest(archivePath string) (ZipFileManifest, error) {
zipReader, err := OpenZip(archivePath)
manifest := make(ZipFileManifest)
if err != nil {
return manifest, fmt.Errorf("unable to open zip archive (%s): %w", archivePath, err)
}
defer func() {
err = zipReader.Close()
if err != nil {
log.Warnf("unable to close zip archive (%s): %+v", archivePath, err)
}
}()
for _, file := range zipReader.Reader.File {
manifest.Add(file.Name, file.FileInfo())
}
return manifest, nil
}
// Add a new path and it's file metadata to the collection.
@ -47,26 +60,6 @@ func (z ZipFileManifest) GlobMatch(patterns ...string) []string {
return results
}
// NewZipFileManifest creates and returns a new ZipFileManifest populated with path and metadata from the given zip archive path.
func NewZipFileManifest(archivePath string) (ZipFileManifest, error) {
zipReader, err := zip.OpenReader(archivePath)
manifest := newZipManifest()
if err != nil {
return manifest, fmt.Errorf("unable to open zip archive (%s): %w", archivePath, err)
}
defer func() {
err = zipReader.Close()
if err != nil {
log.Errorf("unable to close zip archive (%s): %+v", archivePath, err)
}
}()
for _, file := range zipReader.Reader.File {
manifest.Add(file.Name, file.FileInfo())
}
return manifest, nil
}
// normalizeZipEntryName takes the given path entry and ensures it is prefixed with "/".
func normalizeZipEntryName(entry string) string {
if !strings.HasPrefix(entry, "/") {

View File

@ -19,11 +19,7 @@ func TestNewZipFileManifest(t *testing.T) {
t.Fatal(err)
}
cleanup, archiveFilePath, err := setupZipFileTest(t, sourceDirPath)
defer fatalIfError(t, cleanup)
if err != nil {
t.Fatal(err)
}
archiveFilePath := setupZipFileTest(t, sourceDirPath)
actual, err := NewZipFileManifest(archiveFilePath)
if err != nil {
@ -63,12 +59,7 @@ func TestZipFileManifest_GlobMatch(t *testing.T) {
t.Fatal(err)
}
cleanup, archiveFilePath, err := setupZipFileTest(t, sourceDirPath)
//goland:noinspection GoNilness
defer fatalIfError(t, cleanup)
if err != nil {
t.Fatal(err)
}
archiveFilePath := setupZipFileTest(t, sourceDirPath)
z, err := NewZipFileManifest(archiveFilePath)
if err != nil {

View File

@ -47,7 +47,7 @@ func newZipTraverseRequest(paths ...string) zipTraversalRequest {
func TraverseFilesInZip(archivePath string, visitor func(*zip.File) error, paths ...string) error {
request := newZipTraverseRequest(paths...)
zipReader, err := zip.OpenReader(archivePath)
zipReader, err := OpenZip(archivePath)
if err != nil {
return fmt.Errorf("unable to open zip archive (%s): %w", archivePath, err)
}

View File

@ -44,14 +44,12 @@ func TestUnzipToDir(t *testing.T) {
goldenRootDir := filepath.Join(cwd, "test-fixtures")
sourceDirPath := path.Join(goldenRootDir, "zip-source")
cleanup, archiveFilePath, err := setupZipFileTest(t, sourceDirPath)
defer fatalIfError(t, cleanup)
if err != nil {
t.Fatal(err)
}
archiveFilePath := setupZipFileTest(t, sourceDirPath)
unzipDestinationDir, err := ioutil.TempDir("", "syft-ziputil-contents-TEST-")
defer os.RemoveAll(unzipDestinationDir)
t.Cleanup(assertNoError(t, func() error {
return os.RemoveAll(unzipDestinationDir)
}))
if err != nil {
t.Fatalf("unable to create tempdir: %+v", err)
}
@ -127,41 +125,119 @@ func TestUnzipToDir(t *testing.T) {
}
func TestContentsFromZip(t *testing.T) {
archivePrefix, err := ioutil.TempFile("", "syft-ziputil-archive-TEST-")
tests := []struct {
name string
archivePrep func(tb testing.TB) string
}{
{
name: "standard, non-nested zip",
archivePrep: prepZipSourceFixture,
},
{
name: "zip with prepended bytes",
archivePrep: prependZipSourceFixtureWithString(t, "junk at the beginning of the file..."),
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
archivePath := test.archivePrep(t)
expected := zipSourceFixtureExpectedContents()
var paths []string
for p := range expected {
paths = append(paths, p)
}
actual, err := ContentsFromZip(archivePath, paths...)
if err != nil {
t.Fatalf("unable to extract from unzip archive: %+v", err)
}
assertZipSourceFixtureContents(t, actual, expected)
})
}
}
func prependZipSourceFixtureWithString(tb testing.TB, value string) func(tb testing.TB) string {
if len(value) == 0 {
tb.Fatalf("no bytes given to prefix")
}
return func(t testing.TB) string {
archivePath := prepZipSourceFixture(t)
// create a temp file
tmpFile, err := ioutil.TempFile("", "syft-ziputil-prependZipSourceFixtureWithString-")
if err != nil {
t.Fatalf("unable to create tempfile: %+v", err)
}
defer tmpFile.Close()
// write value to the temp file
if _, err := tmpFile.WriteString(value); err != nil {
t.Fatalf("unable to write to tempfile: %+v", err)
}
// open the original archive
sourceFile, err := os.Open(archivePath)
if err != nil {
t.Fatalf("unable to read source file: %+v", err)
}
// copy all contents from the archive to the temp file
if _, err := io.Copy(tmpFile, sourceFile); err != nil {
t.Fatalf("unable to copy source to dest: %+v", err)
}
sourceFile.Close()
// remove the original archive and replace it with the temp file
if err := os.Remove(archivePath); err != nil {
t.Fatalf("unable to remove original source archive (%q): %+v", archivePath, err)
}
if err := os.Rename(tmpFile.Name(), archivePath); err != nil {
t.Fatalf("unable to move new archive to old path (%q): %+v", tmpFile.Name(), err)
}
return archivePath
}
}
func prepZipSourceFixture(t testing.TB) string {
t.Helper()
archivePrefix, err := ioutil.TempFile("", "syft-ziputil-prepZipSourceFixture-")
if err != nil {
t.Fatalf("unable to create tempfile: %+v", err)
}
defer os.Remove(archivePrefix.Name())
t.Cleanup(func() {
assert.NoError(t, os.Remove(archivePrefix.Name()))
})
// the zip utility will add ".zip" to the end of the given name
archivePath := archivePrefix.Name() + ".zip"
defer os.Remove(archivePath)
t.Cleanup(func() {
assert.NoError(t, os.Remove(archivePath))
})
t.Logf("archive path: %s", archivePath)
err = createZipArchive(t, "zip-source", archivePrefix.Name())
if err != nil {
t.Fatal(err)
}
cwd, err := os.Getwd()
if err != nil {
t.Errorf("unable to get cwd: %+v", err)
}
t.Logf("running from: %s", cwd)
aFilePath := filepath.Join("some-dir", "a-file.txt")
bFilePath := filepath.Join("b-file.txt")
expected := map[string]string{
aFilePath: "A file! nice!",
bFilePath: "B file...",
}
actual, err := ContentsFromZip(archivePath, aFilePath, bFilePath)
if err != nil {
t.Fatalf("unable to extract from unzip archive: %+v", err)
createZipArchive(t, "zip-source", archivePrefix.Name())
return archivePath
}
func zipSourceFixtureExpectedContents() map[string]string {
return map[string]string{
filepath.Join("some-dir", "a-file.txt"): "A file! nice!",
filepath.Join("b-file.txt"): "B file...",
}
}
func assertZipSourceFixtureContents(t testing.TB, actual map[string]string, expected map[string]string) {
t.Helper()
diffs := deep.Equal(actual, expected)
if len(diffs) > 0 {
for _, d := range diffs {

View File

@ -0,0 +1,149 @@
package file
import (
"archive/zip"
"encoding/binary"
"fmt"
"io"
"os"
)
// directoryEndLen, readByf, directoryEnd, and findSignatureInBlock were copied from the golang stdlib, specifically:
// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/struct.go
// - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/reader.go
// findArchiveStartOffset is derived from the same stdlib utils, specifically the readDirectoryEnd function.
const directoryEndLen = 22
// ZipReadCloser is a drop-in replacement for zip.ReadCloser (from zip.OpenReader) that additionally considers zips
// that have bytes prefixed to the front of the archive (common with self-extracting jars).
type ZipReadCloser struct {
*zip.Reader
io.Closer
}
// OpenZip provides a ZipReadCloser for the given filepath.
func OpenZip(filepath string) (*ZipReadCloser, error) {
f, err := os.Open(filepath)
if err != nil {
return nil, err
}
fi, err := f.Stat()
if err != nil {
f.Close()
return nil, err
}
// some archives may have bytes prepended to the front of the archive, such as with self executing JARs. We first
// need to find the start of the archive and keep track of this offset.
offset, err := findArchiveStartOffset(f, fi.Size())
if err != nil {
return nil, fmt.Errorf("cannot find beginning of zip archive=%q : %w", filepath, err)
}
if _, err := f.Seek(0, io.SeekStart); err != nil {
return nil, fmt.Errorf("unable to seek to beginning of archive: %w", err)
}
size := fi.Size() - int64(offset)
r, err := zip.NewReader(io.NewSectionReader(f, int64(offset), size), size)
if err != nil {
return nil, fmt.Errorf("unable to open ZipReadCloser @ %q: %w", filepath, err)
}
return &ZipReadCloser{
Reader: r,
Closer: f,
}, nil
}
type readBuf []byte
func (b *readBuf) uint16() uint16 {
v := binary.LittleEndian.Uint16(*b)
*b = (*b)[2:]
return v
}
func (b *readBuf) uint32() uint32 {
v := binary.LittleEndian.Uint32(*b)
*b = (*b)[4:]
return v
}
type directoryEnd struct {
diskNbr uint32 // unused
dirDiskNbr uint32 // unused
dirRecordsThisDisk uint64 // unused
directoryRecords uint64
directorySize uint64
directoryOffset uint64 // relative to file
}
// note: this is derived from readDirectoryEnd within the archive/zip package
func findArchiveStartOffset(r io.ReaderAt, size int64) (startOfArchive uint64, err error) {
// look for directoryEndSignature in the last 1k, then in the last 65k
var buf []byte
var directoryEndOffset int64
for i, bLen := range []int64{1024, 65 * 1024} {
if bLen > size {
bLen = size
}
buf = make([]byte, int(bLen))
if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF {
return 0, err
}
if p := findSignatureInBlock(buf); p >= 0 {
buf = buf[p:]
directoryEndOffset = size - bLen + int64(p)
break
}
if i == 1 || bLen == size {
return 0, zip.ErrFormat
}
}
if buf == nil {
// we were unable to find the directoryEndSignature block
return 0, zip.ErrFormat
}
// read header into struct
b := readBuf(buf[4:]) // skip signature
d := &directoryEnd{
diskNbr: uint32(b.uint16()),
dirDiskNbr: uint32(b.uint16()),
dirRecordsThisDisk: uint64(b.uint16()),
directoryRecords: uint64(b.uint16()),
directorySize: uint64(b.uint32()),
directoryOffset: uint64(b.uint32()),
}
// Calculate where the zip data actually begins
startOfArchive = uint64(directoryEndOffset) - d.directorySize - d.directoryOffset
// These values mean that the file can be a zip64 file
if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff {
startOfArchive = 0 // Prefixed data not supported
}
// Make sure directoryOffset points to somewhere in our file.
if o := int64(d.directoryOffset); o < 0 || o >= size {
return 0, zip.ErrFormat
}
return startOfArchive, nil
}
func findSignatureInBlock(b []byte) int {
for i := len(b) - directoryEndLen; i >= 0; i-- {
// defined from directoryEndSignature
if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
// n is length of comment
n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8
if n+directoryEndLen+i <= len(b) {
return i
}
}
}
return -1
}

View File

@ -0,0 +1,47 @@
package file
import (
"os"
"testing"
"github.com/stretchr/testify/assert"
)
func TestFindArchiveStartOffset(t *testing.T) {
tests := []struct {
name string
archivePrep func(tb testing.TB) string
expected uint64
}{
{
name: "standard, non-nested zip",
archivePrep: prepZipSourceFixture,
expected: 0,
},
{
name: "zip with prepended bytes",
archivePrep: prependZipSourceFixtureWithString(t, "junk at the beginning of the file..."),
expected: 36,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
archivePath := test.archivePrep(t)
f, err := os.Open(archivePath)
if err != nil {
t.Fatalf("could not open archive %q: %+v", archivePath, err)
}
fi, err := os.Stat(f.Name())
if err != nil {
t.Fatalf("unable to stat archive: %+v", err)
}
actual, err := findArchiveStartOffset(f, fi.Size())
if err != nil {
t.Fatalf("unable to find offset: %+v", err)
}
assert.Equal(t, test.expected, actual)
})
}
}

View File

@ -52,10 +52,12 @@
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<version>1.0.2.RELEASE</version>
<version>2.1.5.RELEASE</version>
<configuration>
<mainClass>${start-class}</mainClass>
<layout>ZIP</layout>
<!-- this gives us a specific build example with a self-executing jar (jar with a shell script prefixed to the archive) -->
<executable>true</executable>
</configuration>
<executions>
<execution>