mirror of
https://github.com/anchore/syft.git
synced 2026-02-12 02:26:42 +01:00
Decompress UPX packed binaries to extract golang build info (ELF formatted binaries with lzma method only) (#4480)
* decompress upx packed binaries Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> * fix linting and remove dead code Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> --------- Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
parent
7ef4703454
commit
0ea920ba6d
2
go.mod
2
go.mod
@ -239,7 +239,7 @@ require (
|
||||
github.com/tidwall/match v1.1.1 // indirect
|
||||
github.com/tidwall/pretty v1.2.1 // indirect
|
||||
github.com/tidwall/sjson v1.2.5 // indirect
|
||||
github.com/ulikunitz/xz v0.5.15 // indirect
|
||||
github.com/ulikunitz/xz v0.5.15
|
||||
github.com/vbatts/tar-split v0.12.2 // indirect
|
||||
github.com/xanzy/ssh-agent v0.3.3 // indirect
|
||||
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
|
||||
|
||||
@ -47,6 +47,37 @@ func Test_PackageCataloger_Binary(t *testing.T) {
|
||||
"stdlib @ go1.23.2 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "upx compressed binary",
|
||||
fixture: "image-small-upx",
|
||||
expectedPkgs: []string{
|
||||
"anchore.io/not/real @ v1.0.0 (/run-me)",
|
||||
"github.com/andybalholm/brotli @ v1.1.1 (/run-me)",
|
||||
"github.com/dsnet/compress @ v0.0.2-0.20210315054119-f66993602bf5 (/run-me)",
|
||||
"github.com/golang/snappy @ v0.0.4 (/run-me)",
|
||||
"github.com/klauspost/compress @ v1.17.11 (/run-me)",
|
||||
"github.com/klauspost/pgzip @ v1.2.6 (/run-me)",
|
||||
"github.com/nwaples/rardecode @ v1.1.3 (/run-me)",
|
||||
"github.com/pierrec/lz4/v4 @ v4.1.21 (/run-me)",
|
||||
"github.com/ulikunitz/xz @ v0.5.12 (/run-me)",
|
||||
"github.com/xi2/xz @ v0.0.0-20171230120015-48954b6210f8 (/run-me)",
|
||||
"stdlib @ go1.23.2 (/run-me)",
|
||||
"github.com/anchore/archiver/v3 @ v3.5.3-0.20241210171143-5b1d8d1c7c51 (/run-me)",
|
||||
},
|
||||
expectedRels: []string{
|
||||
"github.com/andybalholm/brotli @ v1.1.1 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
|
||||
"github.com/dsnet/compress @ v0.0.2-0.20210315054119-f66993602bf5 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
|
||||
"github.com/golang/snappy @ v0.0.4 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
|
||||
"github.com/klauspost/compress @ v1.17.11 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
|
||||
"github.com/klauspost/pgzip @ v1.2.6 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
|
||||
"github.com/anchore/archiver/v3 @ v3.5.3-0.20241210171143-5b1d8d1c7c51 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
|
||||
"github.com/nwaples/rardecode @ v1.1.3 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
|
||||
"github.com/pierrec/lz4/v4 @ v4.1.21 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
|
||||
"github.com/ulikunitz/xz @ v0.5.12 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
|
||||
"github.com/xi2/xz @ v0.0.0-20171230120015-48954b6210f8 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
|
||||
"stdlib @ go1.23.2 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "partially built binary",
|
||||
// the difference is the build flags used to build the binary... they will not reference the module directly
|
||||
|
||||
@ -32,7 +32,7 @@ func scanFile(location file.Location, reader unionreader.UnionReader) ([]*extend
|
||||
|
||||
var builds []*extendedBuildInfo
|
||||
for _, r := range readers {
|
||||
bi, err := getBuildInfo(r)
|
||||
bi, err := getBuildInfo(r, location)
|
||||
if err != nil {
|
||||
log.WithFields("file", location.RealPath, "error", err).Trace("unable to read golang buildinfo")
|
||||
|
||||
@ -89,7 +89,7 @@ func getCryptoSettingsFromVersion(v version.Version) []string {
|
||||
return cryptoSettings
|
||||
}
|
||||
|
||||
func getBuildInfo(r io.ReaderAt) (bi *debug.BuildInfo, err error) {
|
||||
func getBuildInfo(r io.ReaderAt, location file.Location) (bi *debug.BuildInfo, err error) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
// this can happen in cases where a malformed binary is passed in can be initially parsed, but not
|
||||
@ -98,7 +98,25 @@ func getBuildInfo(r io.ReaderAt) (bi *debug.BuildInfo, err error) {
|
||||
err = fmt.Errorf("recovered from panic: %v", r)
|
||||
}
|
||||
}()
|
||||
|
||||
// try to read buildinfo from the binary directly
|
||||
bi, err = buildinfo.Read(r)
|
||||
if err == nil {
|
||||
return bi, nil
|
||||
}
|
||||
|
||||
// if direct read fails and this looks like a UPX-compressed binary,
|
||||
// try to decompress and read the buildinfo from the decompressed data
|
||||
if isUPXCompressed(r) {
|
||||
log.WithFields("path", location.RealPath).Trace("detected UPX-compressed Go binary, attempting decompression to read the build info")
|
||||
decompressed, decompErr := decompressUPX(r)
|
||||
if decompErr == nil {
|
||||
bi, err = buildinfo.Read(decompressed)
|
||||
if err == nil {
|
||||
return bi, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// note: the stdlib does not export the error we need to check for
|
||||
if err != nil {
|
||||
@ -106,11 +124,11 @@ func getBuildInfo(r io.ReaderAt) (bi *debug.BuildInfo, err error) {
|
||||
// since the cataloger can only select executables and not distinguish if they are a go-compiled
|
||||
// binary, we should not show warnings/logs in this case. For this reason we nil-out err here.
|
||||
err = nil
|
||||
return
|
||||
return bi, err
|
||||
}
|
||||
// in this case we could not read the or parse the file, but not explicitly because it is not a
|
||||
// go-compiled binary (though it still might be).
|
||||
return
|
||||
return bi, err
|
||||
}
|
||||
return
|
||||
return bi, err
|
||||
}
|
||||
|
||||
@ -8,6 +8,8 @@ import (
|
||||
|
||||
"github.com/kastenhq/goversion/version"
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"github.com/anchore/syft/syft/file"
|
||||
)
|
||||
|
||||
func Test_getBuildInfo(t *testing.T) {
|
||||
@ -31,7 +33,7 @@ func Test_getBuildInfo(t *testing.T) {
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
gotBi, err := getBuildInfo(tt.args.r)
|
||||
gotBi, err := getBuildInfo(tt.args.r, file.Location{})
|
||||
if !tt.wantErr(t, err, fmt.Sprintf("getBuildInfo(%v)", tt.args.r)) {
|
||||
return
|
||||
}
|
||||
|
||||
1
syft/pkg/cataloger/golang/test-fixtures/image-small-upx/.gitignore
vendored
Normal file
1
syft/pkg/cataloger/golang/test-fixtures/image-small-upx/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
/run-me
|
||||
@ -0,0 +1,18 @@
|
||||
FROM --platform=linux/amd64 golang:1.23.2-alpine AS builder
|
||||
|
||||
RUN apk add --no-cache upx
|
||||
|
||||
RUN mkdir /app
|
||||
WORKDIR /app
|
||||
|
||||
COPY go.mod go.sum ./
|
||||
RUN go mod download
|
||||
COPY main.go main.go
|
||||
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -ldflags "-X main.Version=1.0.0" -o run-me .
|
||||
RUN upx --best --lzma --exact run-me
|
||||
|
||||
FROM scratch
|
||||
|
||||
COPY --from=builder /app/run-me /run-me
|
||||
ENTRYPOINT ["/run-me"]
|
||||
@ -0,0 +1,19 @@
|
||||
module anchore.io/not/real
|
||||
|
||||
go 1.23
|
||||
|
||||
toolchain go1.23.2
|
||||
|
||||
require github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51
|
||||
|
||||
require (
|
||||
github.com/andybalholm/brotli v1.1.1 // indirect
|
||||
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
|
||||
github.com/golang/snappy v0.0.4 // indirect
|
||||
github.com/klauspost/compress v1.17.11 // indirect
|
||||
github.com/klauspost/pgzip v1.2.6 // indirect
|
||||
github.com/nwaples/rardecode v1.1.3 // indirect
|
||||
github.com/pierrec/lz4/v4 v4.1.21 // indirect
|
||||
github.com/ulikunitz/xz v0.5.12 // indirect
|
||||
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
|
||||
)
|
||||
@ -0,0 +1,28 @@
|
||||
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51 h1:yhk+P8lF3ZiROjmaVRao9WGTRo4b/wYjoKEiAHWrKwc=
|
||||
github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51/go.mod h1:nwuGSd7aZp0rtYt79YggCGafz1RYsclE7pi3fhLwvuw=
|
||||
github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
|
||||
github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
|
||||
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY=
|
||||
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
|
||||
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
|
||||
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
|
||||
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
||||
github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
|
||||
github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
|
||||
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
||||
github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU=
|
||||
github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
|
||||
github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc=
|
||||
github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
|
||||
github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
|
||||
github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
|
||||
github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
|
||||
github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc=
|
||||
github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
|
||||
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
|
||||
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
|
||||
github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
|
||||
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
@ -0,0 +1,19 @@
|
||||
package main
|
||||
|
||||
import "github.com/anchore/archiver/v3"
|
||||
|
||||
func main() {
|
||||
|
||||
z := archiver.Zip{
|
||||
MkdirAll: true,
|
||||
SelectiveCompression: true,
|
||||
ContinueOnError: false,
|
||||
OverwriteExisting: false,
|
||||
ImplicitTopLevelFolder: false,
|
||||
}
|
||||
|
||||
err := z.Archive([]string{"main.go"}, "test.zip")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
533
syft/pkg/cataloger/golang/upx.go
Normal file
533
syft/pkg/cataloger/golang/upx.go
Normal file
@ -0,0 +1,533 @@
|
||||
package golang
|
||||
|
||||
// UPX Decompression Support
|
||||
//
|
||||
// this file implements decompression of UPX-compressed ELF binaries to enable
|
||||
// extraction of Go build information (.go.buildinfo) from packed executables.
|
||||
//
|
||||
// UPX (Ultimate Packer for eXecutables) is a popular executable packer that
|
||||
// compresses binaries to reduce file size. When a Go binary is compressed with
|
||||
// UPX, the standard debug/buildinfo.Read() fails because the .go.buildinfo
|
||||
// section is compressed. This code decompresses the binary in-memory to allow
|
||||
// buildinfo extraction.
|
||||
//
|
||||
// # Supported Compression Methods
|
||||
//
|
||||
// Currently only LZMA (method 14) is supported, which is used by:
|
||||
//
|
||||
// upx --best --lzma <binary>
|
||||
//
|
||||
// Other UPX methods (NRV2B, NRV2D, NRV2E, etc.) are not yet implemented but
|
||||
// could be added via the upxDecompressors dispatch map.
|
||||
//
|
||||
// # Key Functions
|
||||
//
|
||||
// - isUPXCompressed: detects UPX magic bytes ("UPX!") in the binary
|
||||
// - decompressUPX: main entry point; decompresses all blocks and reconstructs the ELF
|
||||
// - decompressLZMA: handles UPX's custom 2-byte LZMA header format
|
||||
// - unfilter49: reverses the CTO (call trick optimization) filter for x86/x64 code
|
||||
// - parseELFPTLoadOffsets: extracts PT_LOAD segment offsets for proper block placement
|
||||
//
|
||||
// # UPX Binary Format
|
||||
//
|
||||
// UPX-compressed binaries contain several header structures followed by compressed blocks:
|
||||
//
|
||||
// l_info (at "UPX!" magic):
|
||||
// - l_checksum (4 bytes before magic)
|
||||
// - l_magic "UPX!" (4 bytes)
|
||||
// - l_lsize (2 bytes) - loader size
|
||||
// - l_version (1 byte)
|
||||
// - l_format (1 byte)
|
||||
//
|
||||
// p_info (12 bytes, follows l_info):
|
||||
// - p_progid (4 bytes)
|
||||
// - p_filesize (4 bytes) - original uncompressed file size
|
||||
// - p_blocksize (4 bytes)
|
||||
//
|
||||
// b_info (12 bytes each, one per compressed block):
|
||||
// - sz_unc (4 bytes) - uncompressed size
|
||||
// - sz_cpr (4 bytes) - compressed size
|
||||
// - b_method (1 byte) - compression method (14 = LZMA)
|
||||
// - b_ftid (1 byte) - filter ID (0x49 = CTO filter)
|
||||
// - b_cto8 (1 byte) - filter parameter
|
||||
// - unused (1 byte)
|
||||
//
|
||||
// # LZMA Header Format
|
||||
//
|
||||
// UPX uses a 2-byte custom header, NOT the standard 13-byte LZMA format:
|
||||
//
|
||||
// Byte 0: (t << 3) | pb, where t = lc + lp
|
||||
// Byte 1: (lp << 4) | lc
|
||||
// Byte 2+: raw LZMA stream
|
||||
//
|
||||
// This is converted to standard LZMA props: props = lc + lp*9 + pb*9*5
|
||||
//
|
||||
// # ELF Segment Placement
|
||||
//
|
||||
// Decompressed blocks must be placed at specific file offsets according to the
|
||||
// ELF PT_LOAD segments parsed from the first decompressed block. Simply
|
||||
// concatenating blocks produces invalid output.
|
||||
//
|
||||
// # References
|
||||
//
|
||||
// - UPX source: https://github.com/upx/upx
|
||||
// - LZMA format: https://github.com/upx/upx/blob/devel/src/compress/compress_lzma.cpp
|
||||
// - CTO filter: https://github.com/upx/upx/blob/master/src/filter/cto.h
|
||||
//
|
||||
// note: no code was copied from the UPX repo, this is an independent implementation based on format description.
|
||||
//
|
||||
// # Anti-Unpacking / Obfuscation (Not Currently Supported)
|
||||
//
|
||||
// Malware commonly modifies UPX binaries to evade analysis. This implementation
|
||||
// does not currently handle obfuscated binaries, but these techniques could be
|
||||
// addressed in the future:
|
||||
//
|
||||
// - Magic modification: "UPX!" replaced with custom bytes (e.g., "YTS!", "MOZI").
|
||||
// Recovery: scan for decompression stub code patterns instead of magic bytes.
|
||||
//
|
||||
// - Zeroed p_info fields: p_filesize and p_blocksize set to 0.
|
||||
// Recovery: read original size from PackHeader at EOF (last 36 bytes, offset 0x18).
|
||||
//
|
||||
// - Header corruption: checksums or version fields modified.
|
||||
// Recovery: ignore validation and use PackHeader values as authoritative source.
|
||||
//
|
||||
// This would require parsing of the PackHeader, located in the final 36 bytes of the file, contains
|
||||
// metadata recoverable even if p_info is corrupted (not parsed today):
|
||||
//
|
||||
// Offset Size Field Description
|
||||
// ──────────────────────────────────────────────────────────
|
||||
// 0x00 4 UPX magic "UPX!" (0x21585055)
|
||||
// 0x04 1 version UPX version
|
||||
// 0x05 1 format Executable format
|
||||
// 0x06 1 method Compression method
|
||||
// 0x07 1 level Compression level (1-10)
|
||||
// 0x08 4 u_adler Uncompressed data checksum
|
||||
// 0x0C 4 c_adler Compressed data checksum
|
||||
// 0x10 4 u_len Uncompressed length
|
||||
// 0x14 4 c_len Compressed length
|
||||
// 0x18 4 u_file_size Original file size ← Recovery point
|
||||
// 0x1C 1 filter Filter ID
|
||||
// 0x1D 1 filter_cto Filter CTO parameter
|
||||
// 0x1E 1 n_mru MRU parameter
|
||||
// 0x1F 1 header_checksum Header checksum
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/ulikunitz/xz/lzma"
|
||||
)
|
||||
|
||||
// UPX compression method constants
|
||||
const (
|
||||
upxMethodLZMA uint8 = 14 // M_LZMA in UPX source
|
||||
)
|
||||
|
||||
// UPX filter constants
|
||||
const (
|
||||
upxFilterCTO uint8 = 0x49 // CTO (call trick optimization) filter for x86/x64
|
||||
)
|
||||
|
||||
var (
|
||||
// upxMagic is the magic bytes that identify a UPX-packed binary
|
||||
upxMagic = []byte("UPX!")
|
||||
|
||||
errNotUPX = errors.New("not a UPX-compressed binary")
|
||||
errUnsupportedUPXMethod = errors.New("unsupported UPX compression method")
|
||||
)
|
||||
|
||||
// upxInfo contains parsed UPX header information
|
||||
type upxInfo struct {
|
||||
magicOffset int64
|
||||
version uint8
|
||||
format uint8
|
||||
originalSize uint32 // p_filesize - original uncompressed file size
|
||||
blockSize uint32 // p_blocksize - size of each compression block
|
||||
firstBlockOff int64 // offset to first b_info structure
|
||||
}
|
||||
|
||||
// blockInfo contains information about a single compressed block
|
||||
type blockInfo struct {
|
||||
uncompressedSize uint32
|
||||
compressedSize uint32
|
||||
method uint8
|
||||
filterID uint8
|
||||
filterCTO uint8
|
||||
dataOffset int64
|
||||
}
|
||||
|
||||
// upxDecompressor is a function that decompresses data using a specific method
|
||||
type upxDecompressor func(compressedData []byte, uncompressedSize uint32) ([]byte, error)
|
||||
|
||||
// upxDecompressors maps compression methods to their decompressor functions
|
||||
var upxDecompressors = map[uint8]upxDecompressor{
|
||||
upxMethodLZMA: decompressLZMA,
|
||||
|
||||
// note: the NRV algorithms are from the UCL library, an open-source implementation based on the NRV (Not Really Vanished) algorithm.
|
||||
// TODO: future methods can be added here
|
||||
// upxMethodNRV2B: decompressNRV2B,
|
||||
// upxMethodNRV2D: decompressNRV2D,
|
||||
// upxMethodNRV2E: decompressNRV2E,
|
||||
}
|
||||
|
||||
// unfilter49 reverses UPX filter 0x49 (CTO / call trick optimization).
|
||||
// The filter transforms CALL (0xE8) and JMP (0xE9) instruction addresses in x86/x64 code to improve compression.
|
||||
// The filtered format stores addresses as big-endian with cto8 as the high byte marker (the `cto8` parameter,
|
||||
// stored in `b_cto8`, marks transformed instructions):
|
||||
//
|
||||
// original: E8 xx xx xx xx (CALL rel32, little-endian offset)
|
||||
// filtered: E8 CC yy yy yy (big-endian, CC = cto8 marker)
|
||||
func unfilter49(data []byte, cto8 byte) {
|
||||
cto := uint32(cto8) << 24
|
||||
|
||||
for pos := uint32(0); pos+5 <= uint32(len(data)); pos++ {
|
||||
opcode := data[pos]
|
||||
|
||||
// check for E8 (CALL) or E9 (JMP)
|
||||
if opcode == 0xE8 || opcode == 0xE9 {
|
||||
// check if first byte after opcode matches cto8 marker
|
||||
if data[pos+1] == cto8 {
|
||||
// read operand as big-endian
|
||||
jc := binary.BigEndian.Uint32(data[pos+1 : pos+5])
|
||||
// subtract cto and position to get original relative address
|
||||
result := jc - (pos + 1) - cto
|
||||
// write back as little-endian
|
||||
binary.LittleEndian.PutUint32(data[pos+1:pos+5], result)
|
||||
}
|
||||
}
|
||||
|
||||
// check for conditional jumps (0F 80-8F)
|
||||
if opcode == 0x0F && pos+6 <= uint32(len(data)) {
|
||||
opcode2 := data[pos+1]
|
||||
if opcode2 >= 0x80 && opcode2 <= 0x8F && data[pos+2] == cto8 {
|
||||
jc := binary.BigEndian.Uint32(data[pos+2 : pos+6])
|
||||
result := jc - (pos + 2) - cto
|
||||
binary.LittleEndian.PutUint32(data[pos+2:pos+6], result)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// isUPXCompressed checks if the reader contains a UPX-compressed binary
|
||||
func isUPXCompressed(r io.ReaderAt) bool {
|
||||
// UPX magic can be at various offsets depending on the binary format
|
||||
// scan the first 4KB for the magic bytes
|
||||
buf := make([]byte, 4096)
|
||||
n, err := r.ReadAt(buf, 0)
|
||||
if err != nil && !errors.Is(err, io.EOF) {
|
||||
return false
|
||||
}
|
||||
return bytes.Contains(buf[:n], upxMagic)
|
||||
}
|
||||
|
||||
// decompressUPX attempts to decompress a UPX-compressed ELF binary.
|
||||
// It reads blocks and places them at correct file offsets based on ELF PT_LOAD segments.
|
||||
//
|
||||
// The first decompressed block contains the original ELF headers. Parse them to get PT_LOAD segment
|
||||
// file offsets for proper block placement:
|
||||
//
|
||||
// - After decompressing block 1, parse its ELF headers:
|
||||
// ptLoadOffsets := parseELFPTLoadOffsets(block1Data)
|
||||
//
|
||||
// - Block 1: placed at offset 0 (contains ELF header + program headers)
|
||||
// - Block 2: placed at offset 0 (overwrites/extends)
|
||||
// - Block 3+: placed at ptLoadOffsets[blockNum-2]
|
||||
//
|
||||
// Why this matters: Simply concatenating decompressed blocks produces invalid output.
|
||||
// Each block corresponds to a PT_LOAD segment and must be placed at its correct file offset.
|
||||
//
|
||||
// Returns the decompressed binary as a bytes.Reader (implements io.ReaderAt).
|
||||
func decompressUPX(r io.ReaderAt) (io.ReaderAt, error) {
|
||||
info, err := parseUPXInfo(r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// allocate buffer for the full decompressed output
|
||||
output := make([]byte, info.originalSize)
|
||||
|
||||
currentOffset := info.firstBlockOff
|
||||
outputOffset := uint64(0)
|
||||
blockNum := 0
|
||||
|
||||
// track PT_LOAD segment offsets for proper block placement
|
||||
var ptLoadOffsets []uint64
|
||||
|
||||
for {
|
||||
block, err := readBlockInfo(r, currentOffset)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read block info at offset %d: %w", currentOffset, err)
|
||||
}
|
||||
|
||||
// check for end marker (sz_unc == 0)
|
||||
if block.uncompressedSize == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
// non-LZMA method on first block is an error; on subsequent blocks it indicates end of data
|
||||
if block.method != upxMethodLZMA {
|
||||
if blockNum == 0 {
|
||||
return nil, fmt.Errorf("%w: method %d", errUnsupportedUPXMethod, block.method)
|
||||
}
|
||||
break
|
||||
}
|
||||
blockNum++
|
||||
|
||||
decompressor, ok := upxDecompressors[block.method]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("%w: method %d", errUnsupportedUPXMethod, block.method)
|
||||
}
|
||||
|
||||
// read compressed data for this block
|
||||
compressedData := make([]byte, block.compressedSize)
|
||||
_, err = r.ReadAt(compressedData, block.dataOffset)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read compressed data: %w", err)
|
||||
}
|
||||
|
||||
// decompress this block
|
||||
blockData, err := decompressor(compressedData, block.uncompressedSize)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to decompress block: %w", err)
|
||||
}
|
||||
|
||||
// apply CTO filter reversal if needed
|
||||
if block.filterID == upxFilterCTO {
|
||||
unfilter49(blockData, block.filterCTO)
|
||||
}
|
||||
|
||||
// first block contains ELF headers - parse PT_LOAD segments for subsequent blocks
|
||||
if blockNum == 1 {
|
||||
ptLoadOffsets = parseELFPTLoadOffsets(blockData)
|
||||
}
|
||||
|
||||
// determine where to place this block in the output
|
||||
destOffset := outputOffset
|
||||
if blockNum > 2 && len(ptLoadOffsets) > blockNum-2 {
|
||||
// blocks 3+ go to their respective PT_LOAD segment offsets
|
||||
destOffset = ptLoadOffsets[blockNum-2]
|
||||
}
|
||||
|
||||
// copy block data to output at correct offset
|
||||
if destOffset+uint64(len(blockData)) <= uint64(len(output)) {
|
||||
copy(output[destOffset:], blockData)
|
||||
}
|
||||
|
||||
outputOffset = destOffset + uint64(block.uncompressedSize)
|
||||
currentOffset = block.dataOffset + int64(block.compressedSize)
|
||||
}
|
||||
|
||||
return bytes.NewReader(output), nil
|
||||
}
|
||||
|
||||
// parseELFPTLoadOffsets extracts PT_LOAD segment file offsets from ELF headers.
|
||||
// These offsets determine where each decompressed block should be placed.
|
||||
func parseELFPTLoadOffsets(elfHeader []byte) []uint64 {
|
||||
if len(elfHeader) < 64 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// verify ELF magic
|
||||
if !bytes.HasPrefix(elfHeader, []byte{0x7f, 'E', 'L', 'F'}) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// only support 64-bit ELF
|
||||
if elfHeader[4] != 2 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// parse ELF64 header fields
|
||||
phoff := binary.LittleEndian.Uint64(elfHeader[0x20:0x28])
|
||||
phentsize := binary.LittleEndian.Uint16(elfHeader[0x36:0x38])
|
||||
phnum := binary.LittleEndian.Uint16(elfHeader[0x38:0x3a])
|
||||
|
||||
var offsets []uint64
|
||||
for i := uint16(0); i < phnum; i++ {
|
||||
phStart := phoff + uint64(i)*uint64(phentsize)
|
||||
if phStart+uint64(phentsize) > uint64(len(elfHeader)) {
|
||||
break
|
||||
}
|
||||
|
||||
ph := elfHeader[phStart:]
|
||||
ptype := binary.LittleEndian.Uint32(ph[0:4])
|
||||
|
||||
// PT_LOAD = 1
|
||||
if ptype == 1 {
|
||||
poffset := binary.LittleEndian.Uint64(ph[8:16])
|
||||
offsets = append(offsets, poffset)
|
||||
}
|
||||
}
|
||||
|
||||
return offsets
|
||||
}
|
||||
|
||||
// parseUPXInfo locates and parses the UPX header information
|
||||
func parseUPXInfo(r io.ReaderAt) (*upxInfo, error) {
|
||||
// scan for the UPX! magic in the first 8KB
|
||||
buf := make([]byte, 8192)
|
||||
n, err := r.ReadAt(buf, 0)
|
||||
if err != nil && !errors.Is(err, io.EOF) {
|
||||
return nil, fmt.Errorf("failed to read header: %w", err)
|
||||
}
|
||||
|
||||
magicIdx := bytes.Index(buf[:n], upxMagic)
|
||||
if magicIdx == -1 {
|
||||
return nil, errNotUPX
|
||||
}
|
||||
|
||||
// UPX header structure (after finding "UPX!" magic):
|
||||
// l_info structure (magic is at offset 4 within l_info):
|
||||
// offset -4: l_checksum (4 bytes) - checksum of following data
|
||||
// offset 0: l_magic "UPX!" (4 bytes)
|
||||
// offset 4: l_lsize (2 bytes) - loader size
|
||||
// offset 6: l_version (1 byte)
|
||||
// offset 7: l_format (1 byte)
|
||||
//
|
||||
// p_info structure (12 bytes, starts at magic+8):
|
||||
// offset 0: p_progid (4 bytes)
|
||||
// offset 4: p_filesize (4 bytes) - original file size
|
||||
// offset 8: p_blocksize (4 bytes)
|
||||
//
|
||||
// b_info structures follow (12 bytes each):
|
||||
// offset 0: sz_unc (4 bytes) - uncompressed size of this block
|
||||
// offset 4: sz_cpr (4 bytes) - compressed size (may have filter bits)
|
||||
// offset 8: b_method (1 byte)
|
||||
// offset 9: b_ftid (1 byte) - filter id
|
||||
// offset 10: b_cto8 (1 byte) - filter parameter
|
||||
// offset 11: unused (1 byte)
|
||||
|
||||
if magicIdx+32 > n {
|
||||
return nil, fmt.Errorf("UPX header truncated")
|
||||
}
|
||||
|
||||
lInfoBase := buf[magicIdx:]
|
||||
pInfoBase := buf[magicIdx+8:] // p_info starts 8 bytes after magic
|
||||
|
||||
info := &upxInfo{
|
||||
magicOffset: int64(magicIdx),
|
||||
version: lInfoBase[6],
|
||||
format: lInfoBase[7],
|
||||
originalSize: binary.LittleEndian.Uint32(pInfoBase[4:8]),
|
||||
blockSize: binary.LittleEndian.Uint32(pInfoBase[8:12]),
|
||||
firstBlockOff: int64(magicIdx + 8 + 12), // magic + l_info remainder + p_info
|
||||
}
|
||||
|
||||
// sanity check
|
||||
if info.originalSize == 0 || info.originalSize > 500*1024*1024 {
|
||||
return nil, fmt.Errorf("invalid original size: %d", info.originalSize)
|
||||
}
|
||||
|
||||
return info, nil
|
||||
}
|
||||
|
||||
// readBlockInfo reads a b_info structure at the given offset
|
||||
func readBlockInfo(r io.ReaderAt, offset int64) (*blockInfo, error) {
|
||||
buf := make([]byte, 12)
|
||||
_, err := r.ReadAt(buf, offset)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
szUnc := binary.LittleEndian.Uint32(buf[0:4])
|
||||
szCpr := binary.LittleEndian.Uint32(buf[4:8])
|
||||
|
||||
// the compressed size may have filter info in the high bits
|
||||
// for some formats, but for LZMA it's typically clean
|
||||
block := &blockInfo{
|
||||
uncompressedSize: szUnc,
|
||||
compressedSize: szCpr & 0x00ffffff, // lower 24 bits
|
||||
method: buf[8],
|
||||
filterID: buf[9],
|
||||
filterCTO: buf[10],
|
||||
dataOffset: offset + 12, // data starts right after b_info
|
||||
}
|
||||
|
||||
return block, nil
|
||||
}
|
||||
|
||||
// nextPowerOf2 returns the smallest power of 2 >= n
|
||||
func nextPowerOf2(n uint32) uint32 {
|
||||
if n == 0 {
|
||||
return 1
|
||||
}
|
||||
// if already a power of 2, return it
|
||||
if n&(n-1) == 0 {
|
||||
return n
|
||||
}
|
||||
// find the highest set bit and shift left by 1
|
||||
n--
|
||||
n |= n >> 1
|
||||
n |= n >> 2
|
||||
n |= n >> 4
|
||||
n |= n >> 8
|
||||
n |= n >> 16
|
||||
return n + 1
|
||||
}
|
||||
|
||||
// decompressLZMA decompresses LZMA-compressed data as used by UPX.
|
||||
// UPX uses a 2-byte custom header format, not the standard 13-byte LZMA format.
|
||||
//
|
||||
// UPX 2-byte header encoding:
|
||||
// - Byte 0: (t << 3) | pb, where t = lc + lp
|
||||
// - Byte 1: (lp << 4) | lc
|
||||
// - Byte 2+: raw LZMA stream (starts with 0x00 for range decoder init)
|
||||
//
|
||||
// Standard LZMA props encoding: props = lc + lp*9 + pb*9*5
|
||||
func decompressLZMA(compressedData []byte, uncompressedSize uint32) ([]byte, error) {
|
||||
if len(compressedData) < 3 {
|
||||
return nil, fmt.Errorf("compressed data too short")
|
||||
}
|
||||
|
||||
// parse UPX's 2-byte LZMA header
|
||||
pb := compressedData[0] & 0x07
|
||||
lp := compressedData[1] >> 4
|
||||
lc := compressedData[1] & 0x0f
|
||||
|
||||
// convert to standard LZMA properties byte
|
||||
props := lc + lp*9 + pb*9*5
|
||||
|
||||
// raw LZMA stream starts at byte 2 (includes 0x00 init byte)
|
||||
lzmaStream := compressedData[2:]
|
||||
|
||||
// compute dictionary size: must be at least as large as uncompressed size
|
||||
// use next power of 2 for efficiency, with reasonable min/max bounds.
|
||||
// note: if you're seeing that testing small binaries works and large ones don't,
|
||||
// it may be that the dictionary size was not considered properly in this code.
|
||||
const minDictSize = 64 * 1024 // 64KB minimum
|
||||
const maxDictSize = 128 * 1024 * 1024 // 128MB maximum
|
||||
dictSize := nextPowerOf2(uncompressedSize)
|
||||
if dictSize < minDictSize {
|
||||
dictSize = minDictSize
|
||||
}
|
||||
if dictSize > maxDictSize {
|
||||
dictSize = maxDictSize
|
||||
}
|
||||
|
||||
// construct standard 13-byte LZMA header
|
||||
header := make([]byte, 13)
|
||||
header[0] = props //nolint:gosec
|
||||
binary.LittleEndian.PutUint32(header[1:5], dictSize)
|
||||
binary.LittleEndian.PutUint64(header[5:13], uint64(uncompressedSize))
|
||||
|
||||
// combine header + raw stream
|
||||
var fullStream []byte
|
||||
fullStream = append(fullStream, header...)
|
||||
fullStream = append(fullStream, lzmaStream...)
|
||||
|
||||
reader, err := lzma.NewReader(bytes.NewReader(fullStream))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create LZMA reader: %w", err)
|
||||
}
|
||||
|
||||
decompressed := make([]byte, uncompressedSize)
|
||||
_, err = io.ReadFull(reader, decompressed)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to decompress LZMA data: %w", err)
|
||||
}
|
||||
|
||||
return decompressed, nil
|
||||
}
|
||||
128
syft/pkg/cataloger/golang/upx_test.go
Normal file
128
syft/pkg/cataloger/golang/upx_test.go
Normal file
@ -0,0 +1,128 @@
|
||||
package golang
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestIsUPXCompressed(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
data []byte
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "contains UPX magic at start",
|
||||
data: append([]byte("UPX!"), make([]byte, 100)...),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "contains UPX magic with offset",
|
||||
data: append(append(make([]byte, 500), []byte("UPX!")...), make([]byte, 100)...),
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "no UPX magic",
|
||||
data: []byte("\x7FELF" + string(make([]byte, 100))),
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "empty data",
|
||||
data: []byte{},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "partial UPX magic",
|
||||
data: []byte("UPX"),
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
reader := bytes.NewReader(tt.data)
|
||||
result := isUPXCompressed(reader)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseUPXInfo_NotUPX(t *testing.T) {
|
||||
data := []byte("\x7FELF" + string(make([]byte, 100)))
|
||||
reader := bytes.NewReader(data)
|
||||
|
||||
_, err := parseUPXInfo(reader)
|
||||
require.Error(t, err)
|
||||
assert.ErrorIs(t, err, errNotUPX)
|
||||
}
|
||||
|
||||
func TestParseUPXInfo_ValidHeader(t *testing.T) {
|
||||
// construct a minimal valid UPX header matching actual format
|
||||
// l_info: checksum (4) + magic (4) + lsize (2) + version (1) + format (1)
|
||||
lInfo := []byte{
|
||||
0, 0, 0, 0, // l_checksum (before magic)
|
||||
'U', 'P', 'X', '!', // magic
|
||||
0, 0, // l_lsize
|
||||
14, // l_version
|
||||
22, // l_format (ELF)
|
||||
}
|
||||
|
||||
// p_info (12 bytes): progid + filesize + blocksize
|
||||
pInfo := []byte{
|
||||
0, 0, 0, 0, // p_progid
|
||||
0, 0, 0x10, 0, // p_filesize = 0x100000 (1MB) little-endian
|
||||
0, 0, 0x10, 0, // p_blocksize
|
||||
}
|
||||
|
||||
// b_info (12 bytes): sz_unc + sz_cpr + method + filter info
|
||||
bInfo := []byte{
|
||||
0, 0, 0x10, 0, // sz_unc = 1MB
|
||||
0, 0, 0x08, 0, // sz_cpr = 512KB (compressed)
|
||||
14, 0, 0, 0, // method=LZMA, filter info
|
||||
}
|
||||
|
||||
data := append(append(lInfo, pInfo...), bInfo...)
|
||||
data = append(data, make([]byte, 100)...) // padding
|
||||
|
||||
reader := bytes.NewReader(data)
|
||||
info, err := parseUPXInfo(reader)
|
||||
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, uint8(14), info.version)
|
||||
assert.Equal(t, uint8(22), info.format)
|
||||
assert.Equal(t, uint32(0x100000), info.originalSize)
|
||||
}
|
||||
|
||||
func TestDecompressUPX_UnsupportedMethod(t *testing.T) {
|
||||
// construct a header with an unsupported compression method
|
||||
lInfo := []byte{
|
||||
0, 0, 0, 0, // l_checksum
|
||||
'U', 'P', 'X', '!',
|
||||
0, 0, // l_lsize
|
||||
14, 22, // version, format
|
||||
}
|
||||
|
||||
pInfo := []byte{
|
||||
0, 0, 0, 0, // p_progid
|
||||
0x00, 0x01, 0x00, 0x00, // p_filesize = 256 bytes (small for test)
|
||||
0, 0, 0x10, 0, // p_blocksize
|
||||
}
|
||||
|
||||
bInfo := []byte{
|
||||
0x00, 0x01, 0x00, 0x00, // sz_unc = 256
|
||||
0x80, 0x00, 0x00, 0x00, // sz_cpr = 128
|
||||
99, 0, 0, 0, // unsupported method
|
||||
}
|
||||
|
||||
data := append(append(lInfo, pInfo...), bInfo...)
|
||||
data = append(data, make([]byte, 1000)...)
|
||||
|
||||
reader := bytes.NewReader(data)
|
||||
_, err := decompressUPX(reader)
|
||||
|
||||
require.Error(t, err)
|
||||
assert.ErrorIs(t, err, errUnsupportedUPXMethod)
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user