Decompress UPX packed binaries to extract golang build info (ELF formatted binaries with lzma method only) (#4480)

* decompress upx packed binaries Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> * fix linting and remove dead code Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> --------- Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
2026-02-12 02:26:42 +01:00 · 2025-12-22 09:17:38 -05:00 · 2025-12-22 09:17:38 -05:00 · 0ea920ba6d
commit 0ea920ba6d
parent 7ef4703454
11 changed files with 804 additions and 7 deletions
--- a/go.mod
+++ b/go.mod
@ -239,7 +239,7 @@ require (
 	github.com/tidwall/match v1.1.1 // indirect
 	github.com/tidwall/pretty v1.2.1 // indirect
 	github.com/tidwall/sjson v1.2.5 // indirect
-	github.com/ulikunitz/xz v0.5.15 // indirect
+	github.com/ulikunitz/xz v0.5.15
 	github.com/vbatts/tar-split v0.12.2 // indirect
 	github.com/xanzy/ssh-agent v0.3.3 // indirect
 	github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
--- a/syft/pkg/cataloger/golang/cataloger_test.go
+++ b/syft/pkg/cataloger/golang/cataloger_test.go
@ -47,6 +47,37 @@ func Test_PackageCataloger_Binary(t *testing.T) {
 				"stdlib @ go1.23.2 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
 			},
 		},
+		{
+			name:    "upx compressed binary",
+			fixture: "image-small-upx",
+			expectedPkgs: []string{
+				"anchore.io/not/real @ v1.0.0 (/run-me)",
+				"github.com/andybalholm/brotli @ v1.1.1 (/run-me)",
+				"github.com/dsnet/compress @ v0.0.2-0.20210315054119-f66993602bf5 (/run-me)",
+				"github.com/golang/snappy @ v0.0.4 (/run-me)",
+				"github.com/klauspost/compress @ v1.17.11 (/run-me)",
+				"github.com/klauspost/pgzip @ v1.2.6 (/run-me)",
+				"github.com/nwaples/rardecode @ v1.1.3 (/run-me)",
+				"github.com/pierrec/lz4/v4 @ v4.1.21 (/run-me)",
+				"github.com/ulikunitz/xz @ v0.5.12 (/run-me)",
+				"github.com/xi2/xz @ v0.0.0-20171230120015-48954b6210f8 (/run-me)",
+				"stdlib @ go1.23.2 (/run-me)",
+				"github.com/anchore/archiver/v3 @ v3.5.3-0.20241210171143-5b1d8d1c7c51 (/run-me)",
+			},
+			expectedRels: []string{
+				"github.com/andybalholm/brotli @ v1.1.1 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
+				"github.com/dsnet/compress @ v0.0.2-0.20210315054119-f66993602bf5 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
+				"github.com/golang/snappy @ v0.0.4 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
+				"github.com/klauspost/compress @ v1.17.11 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
+				"github.com/klauspost/pgzip @ v1.2.6 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
+				"github.com/anchore/archiver/v3 @ v3.5.3-0.20241210171143-5b1d8d1c7c51 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
+				"github.com/nwaples/rardecode @ v1.1.3 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
+				"github.com/pierrec/lz4/v4 @ v4.1.21 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
+				"github.com/ulikunitz/xz @ v0.5.12 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
+				"github.com/xi2/xz @ v0.0.0-20171230120015-48954b6210f8 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
+				"stdlib @ go1.23.2 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
+			},
+		},
 		{
 			name: "partially built binary",
 			// the difference is the build flags used to build the binary... they will not reference the module directly
--- a/syft/pkg/cataloger/golang/scan_binary.go
+++ b/syft/pkg/cataloger/golang/scan_binary.go
@ -32,7 +32,7 @@ func scanFile(location file.Location, reader unionreader.UnionReader) ([]*extend

 	var builds []*extendedBuildInfo
 	for _, r := range readers {
-		bi, err := getBuildInfo(r)
+		bi, err := getBuildInfo(r, location)
 		if err != nil {
 			log.WithFields("file", location.RealPath, "error", err).Trace("unable to read golang buildinfo")

@ -89,7 +89,7 @@ func getCryptoSettingsFromVersion(v version.Version) []string {
 	return cryptoSettings
 }

-func getBuildInfo(r io.ReaderAt) (bi *debug.BuildInfo, err error) {
+func getBuildInfo(r io.ReaderAt, location file.Location) (bi *debug.BuildInfo, err error) {
 	defer func() {
 		if r := recover(); r != nil {
 			// this can happen in cases where a malformed binary is passed in can be initially parsed, but not
@ -98,7 +98,25 @@ func getBuildInfo(r io.ReaderAt) (bi *debug.BuildInfo, err error) {
 			err = fmt.Errorf("recovered from panic: %v", r)
 		}
 	}()
+
+	// try to read buildinfo from the binary directly
 	bi, err = buildinfo.Read(r)
+	if err == nil {
+		return bi, nil
+	}
+
+	// if direct read fails and this looks like a UPX-compressed binary,
+	// try to decompress and read the buildinfo from the decompressed data
+	if isUPXCompressed(r) {
+		log.WithFields("path", location.RealPath).Trace("detected UPX-compressed Go binary, attempting decompression to read the build info")
+		decompressed, decompErr := decompressUPX(r)
+		if decompErr == nil {
+			bi, err = buildinfo.Read(decompressed)
+			if err == nil {
+				return bi, nil
+			}
+		}
+	}

 	// note: the stdlib does not export the error we need to check for
 	if err != nil {
@ -106,11 +124,11 @@ func getBuildInfo(r io.ReaderAt) (bi *debug.BuildInfo, err error) {
 			// since the cataloger can only select executables and not distinguish if they are a go-compiled
 			// binary, we should not show warnings/logs in this case. For this reason we nil-out err here.
 			err = nil
-			return
+			return bi, err
 		}
 		// in this case we could not read the or parse the file, but not explicitly because it is not a
 		// go-compiled binary (though it still might be).
-		return
+		return bi, err
 	}
-	return
+	return bi, err
 }
--- a/syft/pkg/cataloger/golang/scan_binary_test.go
+++ b/syft/pkg/cataloger/golang/scan_binary_test.go
@ -8,6 +8,8 @@ import (

 	"github.com/kastenhq/goversion/version"
 	"github.com/stretchr/testify/assert"
+
+	"github.com/anchore/syft/syft/file"
 )

 func Test_getBuildInfo(t *testing.T) {
@ -31,7 +33,7 @@ func Test_getBuildInfo(t *testing.T) {
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			gotBi, err := getBuildInfo(tt.args.r)
+			gotBi, err := getBuildInfo(tt.args.r, file.Location{})
 			if !tt.wantErr(t, err, fmt.Sprintf("getBuildInfo(%v)", tt.args.r)) {
 				return
 			}
--- a/syft/pkg/cataloger/golang/test-fixtures/image-small-upx/.gitignore
+++ b/syft/pkg/cataloger/golang/test-fixtures/image-small-upx/.gitignore
@ -0,0 +1 @@
+/run-me
--- a/syft/pkg/cataloger/golang/test-fixtures/image-small-upx/Dockerfile
+++ b/syft/pkg/cataloger/golang/test-fixtures/image-small-upx/Dockerfile
@ -0,0 +1,18 @@
+FROM --platform=linux/amd64 golang:1.23.2-alpine AS builder
+
+RUN apk add --no-cache upx
+
+RUN mkdir /app
+WORKDIR /app
+
+COPY go.mod go.sum ./
+RUN go mod download
+COPY main.go main.go
+
+RUN CGO_ENABLED=0 GOOS=linux go build -ldflags "-X main.Version=1.0.0" -o run-me .
+RUN upx --best --lzma --exact run-me
+
+FROM scratch
+
+COPY --from=builder /app/run-me /run-me
+ENTRYPOINT ["/run-me"]
--- a/syft/pkg/cataloger/golang/test-fixtures/image-small-upx/go.mod
+++ b/syft/pkg/cataloger/golang/test-fixtures/image-small-upx/go.mod
@ -0,0 +1,19 @@
+module anchore.io/not/real
+
+go 1.23
+
+toolchain go1.23.2
+
+require github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51
+
+require (
+	github.com/andybalholm/brotli v1.1.1 // indirect
+	github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
+	github.com/golang/snappy v0.0.4 // indirect
+	github.com/klauspost/compress v1.17.11 // indirect
+	github.com/klauspost/pgzip v1.2.6 // indirect
+	github.com/nwaples/rardecode v1.1.3 // indirect
+	github.com/pierrec/lz4/v4 v4.1.21 // indirect
+	github.com/ulikunitz/xz v0.5.12 // indirect
+	github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
+)
--- a/syft/pkg/cataloger/golang/test-fixtures/image-small-upx/go.sum
+++ b/syft/pkg/cataloger/golang/test-fixtures/image-small-upx/go.sum
@ -0,0 +1,28 @@
+github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51 h1:yhk+P8lF3ZiROjmaVRao9WGTRo4b/wYjoKEiAHWrKwc=
+github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51/go.mod h1:nwuGSd7aZp0rtYt79YggCGafz1RYsclE7pi3fhLwvuw=
+github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
+github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
+github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY=
+github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
+github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
+github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
+github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
+github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
+github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
+github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
+github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
+github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU=
+github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
+github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc=
+github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
+github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
+github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
+github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
+github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc=
+github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
+github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
+github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
+github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
+github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
--- a/syft/pkg/cataloger/golang/test-fixtures/image-small-upx/main.go
+++ b/syft/pkg/cataloger/golang/test-fixtures/image-small-upx/main.go
@ -0,0 +1,19 @@
+package main
+
+import "github.com/anchore/archiver/v3"
+
+func main() {
+
+	z := archiver.Zip{
+		MkdirAll:               true,
+		SelectiveCompression:   true,
+		ContinueOnError:        false,
+		OverwriteExisting:      false,
+		ImplicitTopLevelFolder: false,
+	}
+
+	err := z.Archive([]string{"main.go"}, "test.zip")
+	if err != nil {
+		panic(err)
+	}
+}
--- a/syft/pkg/cataloger/golang/upx.go
+++ b/syft/pkg/cataloger/golang/upx.go
@ -0,0 +1,533 @@
+package golang
+
+// UPX Decompression Support
+//
+// this file implements decompression of UPX-compressed ELF binaries to enable
+// extraction of Go build information (.go.buildinfo) from packed executables.
+//
+// UPX (Ultimate Packer for eXecutables) is a popular executable packer that
+// compresses binaries to reduce file size. When a Go binary is compressed with
+// UPX, the standard debug/buildinfo.Read() fails because the .go.buildinfo
+// section is compressed. This code decompresses the binary in-memory to allow
+// buildinfo extraction.
+//
+// # Supported Compression Methods
+//
+// Currently only LZMA (method 14) is supported, which is used by:
+//
+//	upx --best --lzma <binary>
+//
+// Other UPX methods (NRV2B, NRV2D, NRV2E, etc.) are not yet implemented but
+// could be added via the upxDecompressors dispatch map.
+//
+// # Key Functions
+//
+//   - isUPXCompressed: detects UPX magic bytes ("UPX!") in the binary
+//   - decompressUPX: main entry point; decompresses all blocks and reconstructs the ELF
+//   - decompressLZMA: handles UPX's custom 2-byte LZMA header format
+//   - unfilter49: reverses the CTO (call trick optimization) filter for x86/x64 code
+//   - parseELFPTLoadOffsets: extracts PT_LOAD segment offsets for proper block placement
+//
+// # UPX Binary Format
+//
+// UPX-compressed binaries contain several header structures followed by compressed blocks:
+//
+//	l_info (at "UPX!" magic):
+//	  - l_checksum (4 bytes before magic)
+//	  - l_magic "UPX!" (4 bytes)
+//	  - l_lsize (2 bytes) - loader size
+//	  - l_version (1 byte)
+//	  - l_format (1 byte)
+//
+//	p_info (12 bytes, follows l_info):
+//	  - p_progid (4 bytes)
+//	  - p_filesize (4 bytes) - original uncompressed file size
+//	  - p_blocksize (4 bytes)
+//
+//	b_info (12 bytes each, one per compressed block):
+//	  - sz_unc (4 bytes) - uncompressed size
+//	  - sz_cpr (4 bytes) - compressed size
+//	  - b_method (1 byte) - compression method (14 = LZMA)
+//	  - b_ftid (1 byte) - filter ID (0x49 = CTO filter)
+//	  - b_cto8 (1 byte) - filter parameter
+//	  - unused (1 byte)
+//
+// # LZMA Header Format
+//
+// UPX uses a 2-byte custom header, NOT the standard 13-byte LZMA format:
+//
+//	Byte 0: (t << 3) | pb, where t = lc + lp
+//	Byte 1: (lp << 4) | lc
+//	Byte 2+: raw LZMA stream
+//
+// This is converted to standard LZMA props: props = lc + lp*9 + pb*9*5
+//
+// # ELF Segment Placement
+//
+// Decompressed blocks must be placed at specific file offsets according to the
+// ELF PT_LOAD segments parsed from the first decompressed block. Simply
+// concatenating blocks produces invalid output.
+//
+// # References
+//
+//   - UPX source: https://github.com/upx/upx
+//   - LZMA format: https://github.com/upx/upx/blob/devel/src/compress/compress_lzma.cpp
+//   - CTO filter: https://github.com/upx/upx/blob/master/src/filter/cto.h
+//
+// note: no code was copied from the UPX repo, this is an independent implementation based on format description.
+//
+// # Anti-Unpacking / Obfuscation (Not Currently Supported)
+//
+// Malware commonly modifies UPX binaries to evade analysis. This implementation
+// does not currently handle obfuscated binaries, but these techniques could be
+// addressed in the future:
+//
+//   - Magic modification: "UPX!" replaced with custom bytes (e.g., "YTS!", "MOZI").
+//     Recovery: scan for decompression stub code patterns instead of magic bytes.
+//
+//   - Zeroed p_info fields: p_filesize and p_blocksize set to 0.
+//     Recovery: read original size from PackHeader at EOF (last 36 bytes, offset 0x18).
+//
+//   - Header corruption: checksums or version fields modified.
+//     Recovery: ignore validation and use PackHeader values as authoritative source.
+//
+// This would require parsing of the PackHeader, located in the final 36 bytes of the file, contains
+// metadata recoverable even if p_info is corrupted (not parsed today):
+//
+//   Offset  Size   Field           Description
+//   ──────────────────────────────────────────────────────────
+//   0x00    4      UPX magic       "UPX!" (0x21585055)
+//   0x04    1      version         UPX version
+//   0x05    1      format          Executable format
+//   0x06    1      method          Compression method
+//   0x07    1      level           Compression level (1-10)
+//   0x08    4      u_adler         Uncompressed data checksum
+//   0x0C    4      c_adler         Compressed data checksum
+//   0x10    4      u_len           Uncompressed length
+//   0x14    4      c_len           Compressed length
+//   0x18    4      u_file_size     Original file size  ← Recovery point
+//   0x1C    1      filter          Filter ID
+//   0x1D    1      filter_cto      Filter CTO parameter
+//   0x1E    1      n_mru           MRU parameter
+//   0x1F    1      header_checksum Header checksum
+
+import (
+	"bytes"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"io"
+
+	"github.com/ulikunitz/xz/lzma"
+)
+
+// UPX compression method constants
+const (
+	upxMethodLZMA uint8 = 14 // M_LZMA in UPX source
+)
+
+// UPX filter constants
+const (
+	upxFilterCTO uint8 = 0x49 // CTO (call trick optimization) filter for x86/x64
+)
+
+var (
+	// upxMagic is the magic bytes that identify a UPX-packed binary
+	upxMagic = []byte("UPX!")
+
+	errNotUPX               = errors.New("not a UPX-compressed binary")
+	errUnsupportedUPXMethod = errors.New("unsupported UPX compression method")
+)
+
+// upxInfo contains parsed UPX header information
+type upxInfo struct {
+	magicOffset   int64
+	version       uint8
+	format        uint8
+	originalSize  uint32 // p_filesize - original uncompressed file size
+	blockSize     uint32 // p_blocksize - size of each compression block
+	firstBlockOff int64  // offset to first b_info structure
+}
+
+// blockInfo contains information about a single compressed block
+type blockInfo struct {
+	uncompressedSize uint32
+	compressedSize   uint32
+	method           uint8
+	filterID         uint8
+	filterCTO        uint8
+	dataOffset       int64
+}
+
+// upxDecompressor is a function that decompresses data using a specific method
+type upxDecompressor func(compressedData []byte, uncompressedSize uint32) ([]byte, error)
+
+// upxDecompressors maps compression methods to their decompressor functions
+var upxDecompressors = map[uint8]upxDecompressor{
+	upxMethodLZMA: decompressLZMA,
+
+	// note: the NRV algorithms are from the UCL library, an open-source implementation based on the NRV (Not Really Vanished) algorithm.
+	// TODO: future methods can be added here
+	// upxMethodNRV2B: decompressNRV2B,
+	// upxMethodNRV2D: decompressNRV2D,
+	// upxMethodNRV2E: decompressNRV2E,
+}
+
+// unfilter49 reverses UPX filter 0x49 (CTO / call trick optimization).
+// The filter transforms CALL (0xE8) and JMP (0xE9) instruction addresses in x86/x64 code to improve compression.
+// The filtered format stores addresses as big-endian with cto8 as the high byte marker (the `cto8` parameter,
+// stored in `b_cto8`, marks transformed instructions):
+//
+//	original:  E8 xx xx xx xx  (CALL rel32, little-endian offset)
+//	filtered:  E8 CC yy yy yy  (big-endian, CC = cto8 marker)
+func unfilter49(data []byte, cto8 byte) {
+	cto := uint32(cto8) << 24
+
+	for pos := uint32(0); pos+5 <= uint32(len(data)); pos++ {
+		opcode := data[pos]
+
+		// check for E8 (CALL) or E9 (JMP)
+		if opcode == 0xE8 || opcode == 0xE9 {
+			// check if first byte after opcode matches cto8 marker
+			if data[pos+1] == cto8 {
+				// read operand as big-endian
+				jc := binary.BigEndian.Uint32(data[pos+1 : pos+5])
+				// subtract cto and position to get original relative address
+				result := jc - (pos + 1) - cto
+				// write back as little-endian
+				binary.LittleEndian.PutUint32(data[pos+1:pos+5], result)
+			}
+		}
+
+		// check for conditional jumps (0F 80-8F)
+		if opcode == 0x0F && pos+6 <= uint32(len(data)) {
+			opcode2 := data[pos+1]
+			if opcode2 >= 0x80 && opcode2 <= 0x8F && data[pos+2] == cto8 {
+				jc := binary.BigEndian.Uint32(data[pos+2 : pos+6])
+				result := jc - (pos + 2) - cto
+				binary.LittleEndian.PutUint32(data[pos+2:pos+6], result)
+			}
+		}
+	}
+}
+
+// isUPXCompressed checks if the reader contains a UPX-compressed binary
+func isUPXCompressed(r io.ReaderAt) bool {
+	// UPX magic can be at various offsets depending on the binary format
+	// scan the first 4KB for the magic bytes
+	buf := make([]byte, 4096)
+	n, err := r.ReadAt(buf, 0)
+	if err != nil && !errors.Is(err, io.EOF) {
+		return false
+	}
+	return bytes.Contains(buf[:n], upxMagic)
+}
+
+// decompressUPX attempts to decompress a UPX-compressed ELF binary.
+// It reads blocks and places them at correct file offsets based on ELF PT_LOAD segments.
+//
+// The first decompressed block contains the original ELF headers. Parse them to get PT_LOAD segment
+// file offsets for proper block placement:
+//
+//   - After decompressing block 1, parse its ELF headers:
+//     ptLoadOffsets := parseELFPTLoadOffsets(block1Data)
+//
+// - Block 1: placed at offset 0 (contains ELF header + program headers)
+// - Block 2: placed at offset 0 (overwrites/extends)
+// - Block 3+: placed at ptLoadOffsets[blockNum-2]
+//
+// Why this matters: Simply concatenating decompressed blocks produces invalid output.
+// Each block corresponds to a PT_LOAD segment and must be placed at its correct file offset.
+//
+// Returns the decompressed binary as a bytes.Reader (implements io.ReaderAt).
+func decompressUPX(r io.ReaderAt) (io.ReaderAt, error) {
+	info, err := parseUPXInfo(r)
+	if err != nil {
+		return nil, err
+	}
+
+	// allocate buffer for the full decompressed output
+	output := make([]byte, info.originalSize)
+
+	currentOffset := info.firstBlockOff
+	outputOffset := uint64(0)
+	blockNum := 0
+
+	// track PT_LOAD segment offsets for proper block placement
+	var ptLoadOffsets []uint64
+
+	for {
+		block, err := readBlockInfo(r, currentOffset)
+		if err != nil {
+			return nil, fmt.Errorf("failed to read block info at offset %d: %w", currentOffset, err)
+		}
+
+		// check for end marker (sz_unc == 0)
+		if block.uncompressedSize == 0 {
+			break
+		}
+
+		// non-LZMA method on first block is an error; on subsequent blocks it indicates end of data
+		if block.method != upxMethodLZMA {
+			if blockNum == 0 {
+				return nil, fmt.Errorf("%w: method %d", errUnsupportedUPXMethod, block.method)
+			}
+			break
+		}
+		blockNum++
+
+		decompressor, ok := upxDecompressors[block.method]
+		if !ok {
+			return nil, fmt.Errorf("%w: method %d", errUnsupportedUPXMethod, block.method)
+		}
+
+		// read compressed data for this block
+		compressedData := make([]byte, block.compressedSize)
+		_, err = r.ReadAt(compressedData, block.dataOffset)
+		if err != nil {
+			return nil, fmt.Errorf("failed to read compressed data: %w", err)
+		}
+
+		// decompress this block
+		blockData, err := decompressor(compressedData, block.uncompressedSize)
+		if err != nil {
+			return nil, fmt.Errorf("failed to decompress block: %w", err)
+		}
+
+		// apply CTO filter reversal if needed
+		if block.filterID == upxFilterCTO {
+			unfilter49(blockData, block.filterCTO)
+		}
+
+		// first block contains ELF headers - parse PT_LOAD segments for subsequent blocks
+		if blockNum == 1 {
+			ptLoadOffsets = parseELFPTLoadOffsets(blockData)
+		}
+
+		// determine where to place this block in the output
+		destOffset := outputOffset
+		if blockNum > 2 && len(ptLoadOffsets) > blockNum-2 {
+			// blocks 3+ go to their respective PT_LOAD segment offsets
+			destOffset = ptLoadOffsets[blockNum-2]
+		}
+
+		// copy block data to output at correct offset
+		if destOffset+uint64(len(blockData)) <= uint64(len(output)) {
+			copy(output[destOffset:], blockData)
+		}
+
+		outputOffset = destOffset + uint64(block.uncompressedSize)
+		currentOffset = block.dataOffset + int64(block.compressedSize)
+	}
+
+	return bytes.NewReader(output), nil
+}
+
+// parseELFPTLoadOffsets extracts PT_LOAD segment file offsets from ELF headers.
+// These offsets determine where each decompressed block should be placed.
+func parseELFPTLoadOffsets(elfHeader []byte) []uint64 {
+	if len(elfHeader) < 64 {
+		return nil
+	}
+
+	// verify ELF magic
+	if !bytes.HasPrefix(elfHeader, []byte{0x7f, 'E', 'L', 'F'}) {
+		return nil
+	}
+
+	// only support 64-bit ELF
+	if elfHeader[4] != 2 {
+		return nil
+	}
+
+	// parse ELF64 header fields
+	phoff := binary.LittleEndian.Uint64(elfHeader[0x20:0x28])
+	phentsize := binary.LittleEndian.Uint16(elfHeader[0x36:0x38])
+	phnum := binary.LittleEndian.Uint16(elfHeader[0x38:0x3a])
+
+	var offsets []uint64
+	for i := uint16(0); i < phnum; i++ {
+		phStart := phoff + uint64(i)*uint64(phentsize)
+		if phStart+uint64(phentsize) > uint64(len(elfHeader)) {
+			break
+		}
+
+		ph := elfHeader[phStart:]
+		ptype := binary.LittleEndian.Uint32(ph[0:4])
+
+		// PT_LOAD = 1
+		if ptype == 1 {
+			poffset := binary.LittleEndian.Uint64(ph[8:16])
+			offsets = append(offsets, poffset)
+		}
+	}
+
+	return offsets
+}
+
+// parseUPXInfo locates and parses the UPX header information
+func parseUPXInfo(r io.ReaderAt) (*upxInfo, error) {
+	// scan for the UPX! magic in the first 8KB
+	buf := make([]byte, 8192)
+	n, err := r.ReadAt(buf, 0)
+	if err != nil && !errors.Is(err, io.EOF) {
+		return nil, fmt.Errorf("failed to read header: %w", err)
+	}
+
+	magicIdx := bytes.Index(buf[:n], upxMagic)
+	if magicIdx == -1 {
+		return nil, errNotUPX
+	}
+
+	// UPX header structure (after finding "UPX!" magic):
+	// l_info structure (magic is at offset 4 within l_info):
+	//   offset -4: l_checksum (4 bytes) - checksum of following data
+	//   offset 0:  l_magic "UPX!" (4 bytes)
+	//   offset 4:  l_lsize (2 bytes) - loader size
+	//   offset 6:  l_version (1 byte)
+	//   offset 7:  l_format (1 byte)
+	//
+	// p_info structure (12 bytes, starts at magic+8):
+	//   offset 0: p_progid (4 bytes)
+	//   offset 4: p_filesize (4 bytes) - original file size
+	//   offset 8: p_blocksize (4 bytes)
+	//
+	// b_info structures follow (12 bytes each):
+	//   offset 0: sz_unc (4 bytes) - uncompressed size of this block
+	//   offset 4: sz_cpr (4 bytes) - compressed size (may have filter bits)
+	//   offset 8: b_method (1 byte)
+	//   offset 9: b_ftid (1 byte) - filter id
+	//   offset 10: b_cto8 (1 byte) - filter parameter
+	//   offset 11: unused (1 byte)
+
+	if magicIdx+32 > n {
+		return nil, fmt.Errorf("UPX header truncated")
+	}
+
+	lInfoBase := buf[magicIdx:]
+	pInfoBase := buf[magicIdx+8:] // p_info starts 8 bytes after magic
+
+	info := &upxInfo{
+		magicOffset:   int64(magicIdx),
+		version:       lInfoBase[6],
+		format:        lInfoBase[7],
+		originalSize:  binary.LittleEndian.Uint32(pInfoBase[4:8]),
+		blockSize:     binary.LittleEndian.Uint32(pInfoBase[8:12]),
+		firstBlockOff: int64(magicIdx + 8 + 12), // magic + l_info remainder + p_info
+	}
+
+	// sanity check
+	if info.originalSize == 0 || info.originalSize > 500*1024*1024 {
+		return nil, fmt.Errorf("invalid original size: %d", info.originalSize)
+	}
+
+	return info, nil
+}
+
+// readBlockInfo reads a b_info structure at the given offset
+func readBlockInfo(r io.ReaderAt, offset int64) (*blockInfo, error) {
+	buf := make([]byte, 12)
+	_, err := r.ReadAt(buf, offset)
+	if err != nil {
+		return nil, err
+	}
+
+	szUnc := binary.LittleEndian.Uint32(buf[0:4])
+	szCpr := binary.LittleEndian.Uint32(buf[4:8])
+
+	// the compressed size may have filter info in the high bits
+	// for some formats, but for LZMA it's typically clean
+	block := &blockInfo{
+		uncompressedSize: szUnc,
+		compressedSize:   szCpr & 0x00ffffff, // lower 24 bits
+		method:           buf[8],
+		filterID:         buf[9],
+		filterCTO:        buf[10],
+		dataOffset:       offset + 12, // data starts right after b_info
+	}
+
+	return block, nil
+}
+
+// nextPowerOf2 returns the smallest power of 2 >= n
+func nextPowerOf2(n uint32) uint32 {
+	if n == 0 {
+		return 1
+	}
+	// if already a power of 2, return it
+	if n&(n-1) == 0 {
+		return n
+	}
+	// find the highest set bit and shift left by 1
+	n--
+	n |= n >> 1
+	n |= n >> 2
+	n |= n >> 4
+	n |= n >> 8
+	n |= n >> 16
+	return n + 1
+}
+
+// decompressLZMA decompresses LZMA-compressed data as used by UPX.
+// UPX uses a 2-byte custom header format, not the standard 13-byte LZMA format.
+//
+// UPX 2-byte header encoding:
+//   - Byte 0: (t << 3) | pb, where t = lc + lp
+//   - Byte 1: (lp << 4) | lc
+//   - Byte 2+: raw LZMA stream (starts with 0x00 for range decoder init)
+//
+// Standard LZMA props encoding: props = lc + lp*9 + pb*9*5
+func decompressLZMA(compressedData []byte, uncompressedSize uint32) ([]byte, error) {
+	if len(compressedData) < 3 {
+		return nil, fmt.Errorf("compressed data too short")
+	}
+
+	// parse UPX's 2-byte LZMA header
+	pb := compressedData[0] & 0x07
+	lp := compressedData[1] >> 4
+	lc := compressedData[1] & 0x0f
+
+	// convert to standard LZMA properties byte
+	props := lc + lp*9 + pb*9*5
+
+	// raw LZMA stream starts at byte 2 (includes 0x00 init byte)
+	lzmaStream := compressedData[2:]
+
+	// compute dictionary size: must be at least as large as uncompressed size
+	// use next power of 2 for efficiency, with reasonable min/max bounds.
+	// note: if you're seeing that testing small binaries works and large ones don't,
+	// it may be that the dictionary size was not considered properly in this code.
+	const minDictSize = 64 * 1024         // 64KB minimum
+	const maxDictSize = 128 * 1024 * 1024 // 128MB maximum
+	dictSize := nextPowerOf2(uncompressedSize)
+	if dictSize < minDictSize {
+		dictSize = minDictSize
+	}
+	if dictSize > maxDictSize {
+		dictSize = maxDictSize
+	}
+
+	// construct standard 13-byte LZMA header
+	header := make([]byte, 13)
+	header[0] = props //nolint:gosec
+	binary.LittleEndian.PutUint32(header[1:5], dictSize)
+	binary.LittleEndian.PutUint64(header[5:13], uint64(uncompressedSize))
+
+	// combine header + raw stream
+	var fullStream []byte
+	fullStream = append(fullStream, header...)
+	fullStream = append(fullStream, lzmaStream...)
+
+	reader, err := lzma.NewReader(bytes.NewReader(fullStream))
+	if err != nil {
+		return nil, fmt.Errorf("failed to create LZMA reader: %w", err)
+	}
+
+	decompressed := make([]byte, uncompressedSize)
+	_, err = io.ReadFull(reader, decompressed)
+	if err != nil {
+		return nil, fmt.Errorf("failed to decompress LZMA data: %w", err)
+	}
+
+	return decompressed, nil
+}
--- a/syft/pkg/cataloger/golang/upx_test.go
+++ b/syft/pkg/cataloger/golang/upx_test.go
@ -0,0 +1,128 @@
+package golang
+
+import (
+	"bytes"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestIsUPXCompressed(t *testing.T) {
+	tests := []struct {
+		name     string
+		data     []byte
+		expected bool
+	}{
+		{
+			name:     "contains UPX magic at start",
+			data:     append([]byte("UPX!"), make([]byte, 100)...),
+			expected: true,
+		},
+		{
+			name:     "contains UPX magic with offset",
+			data:     append(append(make([]byte, 500), []byte("UPX!")...), make([]byte, 100)...),
+			expected: true,
+		},
+		{
+			name:     "no UPX magic",
+			data:     []byte("\x7FELF" + string(make([]byte, 100))),
+			expected: false,
+		},
+		{
+			name:     "empty data",
+			data:     []byte{},
+			expected: false,
+		},
+		{
+			name:     "partial UPX magic",
+			data:     []byte("UPX"),
+			expected: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			reader := bytes.NewReader(tt.data)
+			result := isUPXCompressed(reader)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestParseUPXInfo_NotUPX(t *testing.T) {
+	data := []byte("\x7FELF" + string(make([]byte, 100)))
+	reader := bytes.NewReader(data)
+
+	_, err := parseUPXInfo(reader)
+	require.Error(t, err)
+	assert.ErrorIs(t, err, errNotUPX)
+}
+
+func TestParseUPXInfo_ValidHeader(t *testing.T) {
+	// construct a minimal valid UPX header matching actual format
+	// l_info: checksum (4) + magic (4) + lsize (2) + version (1) + format (1)
+	lInfo := []byte{
+		0, 0, 0, 0, // l_checksum (before magic)
+		'U', 'P', 'X', '!', // magic
+		0, 0, // l_lsize
+		14, // l_version
+		22, // l_format (ELF)
+	}
+
+	// p_info (12 bytes): progid + filesize + blocksize
+	pInfo := []byte{
+		0, 0, 0, 0, // p_progid
+		0, 0, 0x10, 0, // p_filesize = 0x100000 (1MB) little-endian
+		0, 0, 0x10, 0, // p_blocksize
+	}
+
+	// b_info (12 bytes): sz_unc + sz_cpr + method + filter info
+	bInfo := []byte{
+		0, 0, 0x10, 0, // sz_unc = 1MB
+		0, 0, 0x08, 0, // sz_cpr = 512KB (compressed)
+		14, 0, 0, 0, // method=LZMA, filter info
+	}
+
+	data := append(append(lInfo, pInfo...), bInfo...)
+	data = append(data, make([]byte, 100)...) // padding
+
+	reader := bytes.NewReader(data)
+	info, err := parseUPXInfo(reader)
+
+	require.NoError(t, err)
+	assert.Equal(t, uint8(14), info.version)
+	assert.Equal(t, uint8(22), info.format)
+	assert.Equal(t, uint32(0x100000), info.originalSize)
+}
+
+func TestDecompressUPX_UnsupportedMethod(t *testing.T) {
+	// construct a header with an unsupported compression method
+	lInfo := []byte{
+		0, 0, 0, 0, // l_checksum
+		'U', 'P', 'X', '!',
+		0, 0, // l_lsize
+		14, 22, // version, format
+	}
+
+	pInfo := []byte{
+		0, 0, 0, 0, // p_progid
+		0x00, 0x01, 0x00, 0x00, // p_filesize = 256 bytes (small for test)
+		0, 0, 0x10, 0, // p_blocksize
+	}
+
+	bInfo := []byte{
+		0x00, 0x01, 0x00, 0x00, // sz_unc = 256
+		0x80, 0x00, 0x00, 0x00, // sz_cpr = 128
+		99, 0, 0, 0, // unsupported method
+	}
+
+	data := append(append(lInfo, pInfo...), bInfo...)
+	data = append(data, make([]byte, 1000)...)
+
+	reader := bytes.NewReader(data)
+	_, err := decompressUPX(reader)
+
+	require.Error(t, err)
+	assert.ErrorIs(t, err, errUnsupportedUPXMethod)
+}