Decompress UPX packed binaries to extract golang build info (ELF formatted binaries with lzma method only) (#4480)

* decompress upx packed binaries Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> * fix linting and remove dead code Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> --------- Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
2026-02-12 02:26:42 +01:00 · 2025-12-22 09:17:38 -05:00 · 2025-12-22 09:17:38 -05:00 · 0ea920ba6d
commit 0ea920ba6d
parent 7ef4703454
11 changed files with 804 additions and 7 deletions
--- a/go.mod
+++ b/go.mod
@ -239,7 +239,7 @@ require (
 	github.com/tidwall/match v1.1.1 // indirect
 	github.com/tidwall/pretty v1.2.1 // indirect
 	github.com/tidwall/sjson v1.2.5 // indirect
-	github.com/ulikunitz/xz v0.5.15 // indirect
+	github.com/ulikunitz/xz v0.5.15
 	github.com/vbatts/tar-split v0.12.2 // indirect
 	github.com/xanzy/ssh-agent v0.3.3 // indirect
 	github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
--- a/syft/pkg/cataloger/golang/cataloger_test.go
+++ b/syft/pkg/cataloger/golang/cataloger_test.go
@ -47,6 +47,37 @@ func Test_PackageCataloger_Binary(t *testing.T) {
 				"stdlib @ go1.23.2 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
 			},
 		},
 		{
 			name:    "upx compressed binary",
 			fixture: "image-small-upx",
 			expectedPkgs: []string{
 				"anchore.io/not/real @ v1.0.0 (/run-me)",
 				"github.com/andybalholm/brotli @ v1.1.1 (/run-me)",
 				"github.com/dsnet/compress @ v0.0.2-0.20210315054119-f66993602bf5 (/run-me)",
 				"github.com/golang/snappy @ v0.0.4 (/run-me)",
 				"github.com/klauspost/compress @ v1.17.11 (/run-me)",
 				"github.com/klauspost/pgzip @ v1.2.6 (/run-me)",
 				"github.com/nwaples/rardecode @ v1.1.3 (/run-me)",
 				"github.com/pierrec/lz4/v4 @ v4.1.21 (/run-me)",
 				"github.com/ulikunitz/xz @ v0.5.12 (/run-me)",
 				"github.com/xi2/xz @ v0.0.0-20171230120015-48954b6210f8 (/run-me)",
 				"stdlib @ go1.23.2 (/run-me)",
 				"github.com/anchore/archiver/v3 @ v3.5.3-0.20241210171143-5b1d8d1c7c51 (/run-me)",
 			},
 			expectedRels: []string{
 				"github.com/andybalholm/brotli @ v1.1.1 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
 				"github.com/dsnet/compress @ v0.0.2-0.20210315054119-f66993602bf5 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
 				"github.com/golang/snappy @ v0.0.4 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
 				"github.com/klauspost/compress @ v1.17.11 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
 				"github.com/klauspost/pgzip @ v1.2.6 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
 				"github.com/anchore/archiver/v3 @ v3.5.3-0.20241210171143-5b1d8d1c7c51 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
 				"github.com/nwaples/rardecode @ v1.1.3 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
 				"github.com/pierrec/lz4/v4 @ v4.1.21 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
 				"github.com/ulikunitz/xz @ v0.5.12 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
 				"github.com/xi2/xz @ v0.0.0-20171230120015-48954b6210f8 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
 				"stdlib @ go1.23.2 (/run-me) [dependency-of] anchore.io/not/real @ v1.0.0 (/run-me)",
 			},
 		},
 		{
 			name: "partially built binary",
 			// the difference is the build flags used to build the binary... they will not reference the module directly
--- a/syft/pkg/cataloger/golang/scan_binary.go
+++ b/syft/pkg/cataloger/golang/scan_binary.go
@ -32,7 +32,7 @@ func scanFile(location file.Location, reader unionreader.UnionReader) ([]*extend
 	var builds []*extendedBuildInfo
 	for _, r := range readers {
-		bi, err := getBuildInfo(r)
+		bi, err := getBuildInfo(r, location)
 		if err != nil {
 			log.WithFields("file", location.RealPath, "error", err).Trace("unable to read golang buildinfo")
@ -89,7 +89,7 @@ func getCryptoSettingsFromVersion(v version.Version) []string {
 	return cryptoSettings
 }
-func getBuildInfo(r io.ReaderAt) (bi *debug.BuildInfo, err error) {
+func getBuildInfo(r io.ReaderAt, location file.Location) (bi *debug.BuildInfo, err error) {
 	defer func() {
 		if r := recover(); r != nil {
 			// this can happen in cases where a malformed binary is passed in can be initially parsed, but not
@ -98,7 +98,25 @@ func getBuildInfo(r io.ReaderAt) (bi *debug.BuildInfo, err error) {
 			err = fmt.Errorf("recovered from panic: %v", r)
 		}
 	}()
 	// try to read buildinfo from the binary directly
 	bi, err = buildinfo.Read(r)
 	if err == nil {
 		return bi, nil
 	}
 	// if direct read fails and this looks like a UPX-compressed binary,
 	// try to decompress and read the buildinfo from the decompressed data
 	if isUPXCompressed(r) {
 		log.WithFields("path", location.RealPath).Trace("detected UPX-compressed Go binary, attempting decompression to read the build info")
 		decompressed, decompErr := decompressUPX(r)
 		if decompErr == nil {
 			bi, err = buildinfo.Read(decompressed)
 			if err == nil {
 				return bi, nil
 			}
 		}
 	}
 	// note: the stdlib does not export the error we need to check for
 	if err != nil {
@ -106,11 +124,11 @@ func getBuildInfo(r io.ReaderAt) (bi *debug.BuildInfo, err error) {
 			// since the cataloger can only select executables and not distinguish if they are a go-compiled
 			// binary, we should not show warnings/logs in this case. For this reason we nil-out err here.
 			err = nil
-			return
+			return bi, err
 		}
 		// in this case we could not read the or parse the file, but not explicitly because it is not a
 		// go-compiled binary (though it still might be).
-		return
+		return bi, err
 	}
-	return
+	return bi, err
 }
--- a/syft/pkg/cataloger/golang/scan_binary_test.go
+++ b/syft/pkg/cataloger/golang/scan_binary_test.go
@ -8,6 +8,8 @@ import (
 	"github.com/kastenhq/goversion/version"
 	"github.com/stretchr/testify/assert"
 	"github.com/anchore/syft/syft/file"
 )
 func Test_getBuildInfo(t *testing.T) {
@ -31,7 +33,7 @@ func Test_getBuildInfo(t *testing.T) {
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			gotBi, err := getBuildInfo(tt.args.r)
+			gotBi, err := getBuildInfo(tt.args.r, file.Location{})
 			if !tt.wantErr(t, err, fmt.Sprintf("getBuildInfo(%v)", tt.args.r)) {
 				return
 			}
--- a/syft/pkg/cataloger/golang/test-fixtures/image-small-upx/.gitignore
+++ b/syft/pkg/cataloger/golang/test-fixtures/image-small-upx/.gitignore
@ -0,0 +1 @@
 /run-me
--- a/syft/pkg/cataloger/golang/test-fixtures/image-small-upx/Dockerfile
+++ b/syft/pkg/cataloger/golang/test-fixtures/image-small-upx/Dockerfile
@ -0,0 +1,18 @@
 FROM --platform=linux/amd64 golang:1.23.2-alpine AS builder
 RUN apk add --no-cache upx
 RUN mkdir /app
 WORKDIR /app
 COPY go.mod go.sum ./
 RUN go mod download
 COPY main.go main.go
 RUN CGO_ENABLED=0 GOOS=linux go build -ldflags "-X main.Version=1.0.0" -o run-me .
 RUN upx --best --lzma --exact run-me
 FROM scratch
 COPY --from=builder /app/run-me /run-me
 ENTRYPOINT ["/run-me"]
--- a/syft/pkg/cataloger/golang/test-fixtures/image-small-upx/go.mod
+++ b/syft/pkg/cataloger/golang/test-fixtures/image-small-upx/go.mod
@ -0,0 +1,19 @@
 module anchore.io/not/real
 go 1.23
 toolchain go1.23.2
 require github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51
 require (
 	github.com/andybalholm/brotli v1.1.1 // indirect
 	github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
 	github.com/golang/snappy v0.0.4 // indirect
 	github.com/klauspost/compress v1.17.11 // indirect
 	github.com/klauspost/pgzip v1.2.6 // indirect
 	github.com/nwaples/rardecode v1.1.3 // indirect
 	github.com/pierrec/lz4/v4 v4.1.21 // indirect
 	github.com/ulikunitz/xz v0.5.12 // indirect
 	github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
 )
--- a/syft/pkg/cataloger/golang/test-fixtures/image-small-upx/go.sum
+++ b/syft/pkg/cataloger/golang/test-fixtures/image-small-upx/go.sum
@ -0,0 +1,28 @@
 github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51 h1:yhk+P8lF3ZiROjmaVRao9WGTRo4b/wYjoKEiAHWrKwc=
 github.com/anchore/archiver/v3 v3.5.3-0.20241210171143-5b1d8d1c7c51/go.mod h1:nwuGSd7aZp0rtYt79YggCGafz1RYsclE7pi3fhLwvuw=
 github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
 github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
 github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY=
 github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
 github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
 github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
 github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
 github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
 github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
 github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
 github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU=
 github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
 github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc=
 github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
 github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
 github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
 github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc=
 github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
 github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
 github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
 github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
 github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
--- a/syft/pkg/cataloger/golang/test-fixtures/image-small-upx/main.go
+++ b/syft/pkg/cataloger/golang/test-fixtures/image-small-upx/main.go
@ -0,0 +1,19 @@
 package main
 import "github.com/anchore/archiver/v3"
 func main() {
 	z := archiver.Zip{
 		MkdirAll:               true,
 		SelectiveCompression:   true,
 		ContinueOnError:        false,
 		OverwriteExisting:      false,
 		ImplicitTopLevelFolder: false,
 	}
 	err := z.Archive([]string{"main.go"}, "test.zip")
 	if err != nil {
 		panic(err)
 	}
 }
--- a/syft/pkg/cataloger/golang/upx.go
+++ b/syft/pkg/cataloger/golang/upx.go
@ -0,0 +1,533 @@
 package golang
 // UPX Decompression Support
 //
 // this file implements decompression of UPX-compressed ELF binaries to enable
 // extraction of Go build information (.go.buildinfo) from packed executables.
 //
 // UPX (Ultimate Packer for eXecutables) is a popular executable packer that
 // compresses binaries to reduce file size. When a Go binary is compressed with
 // UPX, the standard debug/buildinfo.Read() fails because the .go.buildinfo
 // section is compressed. This code decompresses the binary in-memory to allow
 // buildinfo extraction.
 //
 // # Supported Compression Methods
 //
 // Currently only LZMA (method 14) is supported, which is used by:
 //
 //	upx --best --lzma <binary>
 //
 // Other UPX methods (NRV2B, NRV2D, NRV2E, etc.) are not yet implemented but
 // could be added via the upxDecompressors dispatch map.
 //
 // # Key Functions
 //
 //   - isUPXCompressed: detects UPX magic bytes ("UPX!") in the binary
 //   - decompressUPX: main entry point; decompresses all blocks and reconstructs the ELF
 //   - decompressLZMA: handles UPX's custom 2-byte LZMA header format
 //   - unfilter49: reverses the CTO (call trick optimization) filter for x86/x64 code
 //   - parseELFPTLoadOffsets: extracts PT_LOAD segment offsets for proper block placement
 //
 // # UPX Binary Format
 //
 // UPX-compressed binaries contain several header structures followed by compressed blocks:
 //
 //	l_info (at "UPX!" magic):
 //	  - l_checksum (4 bytes before magic)
 //	  - l_magic "UPX!" (4 bytes)
 //	  - l_lsize (2 bytes) - loader size
 //	  - l_version (1 byte)
 //	  - l_format (1 byte)
 //
 //	p_info (12 bytes, follows l_info):
 //	  - p_progid (4 bytes)
 //	  - p_filesize (4 bytes) - original uncompressed file size
 //	  - p_blocksize (4 bytes)
 //
 //	b_info (12 bytes each, one per compressed block):
 //	  - sz_unc (4 bytes) - uncompressed size
 //	  - sz_cpr (4 bytes) - compressed size
 //	  - b_method (1 byte) - compression method (14 = LZMA)
 //	  - b_ftid (1 byte) - filter ID (0x49 = CTO filter)
 //	  - b_cto8 (1 byte) - filter parameter
 //	  - unused (1 byte)
 //
 // # LZMA Header Format
 //
 // UPX uses a 2-byte custom header, NOT the standard 13-byte LZMA format:
 //
 //	Byte 0: (t << 3) | pb, where t = lc + lp
 //	Byte 1: (lp << 4) | lc
 //	Byte 2+: raw LZMA stream
 //
 // This is converted to standard LZMA props: props = lc + lp*9 + pb*9*5
 //
 // # ELF Segment Placement
 //
 // Decompressed blocks must be placed at specific file offsets according to the
 // ELF PT_LOAD segments parsed from the first decompressed block. Simply
 // concatenating blocks produces invalid output.
 //
 // # References
 //
 //   - UPX source: https://github.com/upx/upx
 //   - LZMA format: https://github.com/upx/upx/blob/devel/src/compress/compress_lzma.cpp
 //   - CTO filter: https://github.com/upx/upx/blob/master/src/filter/cto.h
 //
 // note: no code was copied from the UPX repo, this is an independent implementation based on format description.
 //
 // # Anti-Unpacking / Obfuscation (Not Currently Supported)
 //
 // Malware commonly modifies UPX binaries to evade analysis. This implementation
 // does not currently handle obfuscated binaries, but these techniques could be
 // addressed in the future:
 //
 //   - Magic modification: "UPX!" replaced with custom bytes (e.g., "YTS!", "MOZI").
 //     Recovery: scan for decompression stub code patterns instead of magic bytes.
 //
 //   - Zeroed p_info fields: p_filesize and p_blocksize set to 0.
 //     Recovery: read original size from PackHeader at EOF (last 36 bytes, offset 0x18).
 //
 //   - Header corruption: checksums or version fields modified.
 //     Recovery: ignore validation and use PackHeader values as authoritative source.
 //
 // This would require parsing of the PackHeader, located in the final 36 bytes of the file, contains
 // metadata recoverable even if p_info is corrupted (not parsed today):
 //
 //   Offset  Size   Field           Description
 //   ──────────────────────────────────────────────────────────
 //   0x00    4      UPX magic       "UPX!" (0x21585055)
 //   0x04    1      version         UPX version
 //   0x05    1      format          Executable format
 //   0x06    1      method          Compression method
 //   0x07    1      level           Compression level (1-10)
 //   0x08    4      u_adler         Uncompressed data checksum
 //   0x0C    4      c_adler         Compressed data checksum
 //   0x10    4      u_len           Uncompressed length
 //   0x14    4      c_len           Compressed length
 //   0x18    4      u_file_size     Original file size  ← Recovery point
 //   0x1C    1      filter          Filter ID
 //   0x1D    1      filter_cto      Filter CTO parameter
 //   0x1E    1      n_mru           MRU parameter
 //   0x1F    1      header_checksum Header checksum
 import (
 	"bytes"
 	"encoding/binary"
 	"errors"
 	"fmt"
 	"io"
 	"github.com/ulikunitz/xz/lzma"
 )
 // UPX compression method constants
 const (
 	upxMethodLZMA uint8 = 14 // M_LZMA in UPX source
 )
 // UPX filter constants
 const (
 	upxFilterCTO uint8 = 0x49 // CTO (call trick optimization) filter for x86/x64
 )
 var (
 	// upxMagic is the magic bytes that identify a UPX-packed binary
 	upxMagic = []byte("UPX!")
 	errNotUPX               = errors.New("not a UPX-compressed binary")
 	errUnsupportedUPXMethod = errors.New("unsupported UPX compression method")
 )
 // upxInfo contains parsed UPX header information
 type upxInfo struct {
 	magicOffset   int64
 	version       uint8
 	format        uint8
 	originalSize  uint32 // p_filesize - original uncompressed file size
 	blockSize     uint32 // p_blocksize - size of each compression block
 	firstBlockOff int64  // offset to first b_info structure
 }
 // blockInfo contains information about a single compressed block
 type blockInfo struct {
 	uncompressedSize uint32
 	compressedSize   uint32
 	method           uint8
 	filterID         uint8
 	filterCTO        uint8
 	dataOffset       int64
 }
 // upxDecompressor is a function that decompresses data using a specific method
 type upxDecompressor func(compressedData []byte, uncompressedSize uint32) ([]byte, error)
 // upxDecompressors maps compression methods to their decompressor functions
 var upxDecompressors = map[uint8]upxDecompressor{
 	upxMethodLZMA: decompressLZMA,
 	// note: the NRV algorithms are from the UCL library, an open-source implementation based on the NRV (Not Really Vanished) algorithm.
 	// TODO: future methods can be added here
 	// upxMethodNRV2B: decompressNRV2B,
 	// upxMethodNRV2D: decompressNRV2D,
 	// upxMethodNRV2E: decompressNRV2E,
 }
 // unfilter49 reverses UPX filter 0x49 (CTO / call trick optimization).
 // The filter transforms CALL (0xE8) and JMP (0xE9) instruction addresses in x86/x64 code to improve compression.
 // The filtered format stores addresses as big-endian with cto8 as the high byte marker (the `cto8` parameter,
 // stored in `b_cto8`, marks transformed instructions):
 //
 //	original:  E8 xx xx xx xx  (CALL rel32, little-endian offset)
 //	filtered:  E8 CC yy yy yy  (big-endian, CC = cto8 marker)
 func unfilter49(data []byte, cto8 byte) {
 	cto := uint32(cto8) << 24
 	for pos := uint32(0); pos+5 <= uint32(len(data)); pos++ {
 		opcode := data[pos]
 		// check for E8 (CALL) or E9 (JMP)
 		if opcode == 0xE8 || opcode == 0xE9 {
 			// check if first byte after opcode matches cto8 marker
 			if data[pos+1] == cto8 {
 				// read operand as big-endian
 				jc := binary.BigEndian.Uint32(data[pos+1 : pos+5])
 				// subtract cto and position to get original relative address
 				result := jc - (pos + 1) - cto
 				// write back as little-endian
 				binary.LittleEndian.PutUint32(data[pos+1:pos+5], result)
 			}
 		}
 		// check for conditional jumps (0F 80-8F)
 		if opcode == 0x0F && pos+6 <= uint32(len(data)) {
 			opcode2 := data[pos+1]
 			if opcode2 >= 0x80 && opcode2 <= 0x8F && data[pos+2] == cto8 {
 				jc := binary.BigEndian.Uint32(data[pos+2 : pos+6])
 				result := jc - (pos + 2) - cto
 				binary.LittleEndian.PutUint32(data[pos+2:pos+6], result)
 			}
 		}
 	}
 }
 // isUPXCompressed checks if the reader contains a UPX-compressed binary
 func isUPXCompressed(r io.ReaderAt) bool {
 	// UPX magic can be at various offsets depending on the binary format
 	// scan the first 4KB for the magic bytes
 	buf := make([]byte, 4096)
 	n, err := r.ReadAt(buf, 0)
 	if err != nil && !errors.Is(err, io.EOF) {
 		return false
 	}
 	return bytes.Contains(buf[:n], upxMagic)
 }
 // decompressUPX attempts to decompress a UPX-compressed ELF binary.
 // It reads blocks and places them at correct file offsets based on ELF PT_LOAD segments.
 //
 // The first decompressed block contains the original ELF headers. Parse them to get PT_LOAD segment
 // file offsets for proper block placement:
 //
 //   - After decompressing block 1, parse its ELF headers:
 //     ptLoadOffsets := parseELFPTLoadOffsets(block1Data)
 //
 // - Block 1: placed at offset 0 (contains ELF header + program headers)
 // - Block 2: placed at offset 0 (overwrites/extends)
 // - Block 3+: placed at ptLoadOffsets[blockNum-2]
 //
 // Why this matters: Simply concatenating decompressed blocks produces invalid output.
 // Each block corresponds to a PT_LOAD segment and must be placed at its correct file offset.
 //
 // Returns the decompressed binary as a bytes.Reader (implements io.ReaderAt).
 func decompressUPX(r io.ReaderAt) (io.ReaderAt, error) {
 	info, err := parseUPXInfo(r)
 	if err != nil {
 		return nil, err
 	}
 	// allocate buffer for the full decompressed output
 	output := make([]byte, info.originalSize)
 	currentOffset := info.firstBlockOff
 	outputOffset := uint64(0)
 	blockNum := 0
 	// track PT_LOAD segment offsets for proper block placement
 	var ptLoadOffsets []uint64
 	for {
 		block, err := readBlockInfo(r, currentOffset)
 		if err != nil {
 			return nil, fmt.Errorf("failed to read block info at offset %d: %w", currentOffset, err)
 		}
 		// check for end marker (sz_unc == 0)
 		if block.uncompressedSize == 0 {
 			break
 		}
 		// non-LZMA method on first block is an error; on subsequent blocks it indicates end of data
 		if block.method != upxMethodLZMA {
 			if blockNum == 0 {
 				return nil, fmt.Errorf("%w: method %d", errUnsupportedUPXMethod, block.method)
 			}
 			break
 		}
 		blockNum++
 		decompressor, ok := upxDecompressors[block.method]
 		if !ok {
 			return nil, fmt.Errorf("%w: method %d", errUnsupportedUPXMethod, block.method)
 		}
 		// read compressed data for this block
 		compressedData := make([]byte, block.compressedSize)
 		_, err = r.ReadAt(compressedData, block.dataOffset)
 		if err != nil {
 			return nil, fmt.Errorf("failed to read compressed data: %w", err)
 		}
 		// decompress this block
 		blockData, err := decompressor(compressedData, block.uncompressedSize)
 		if err != nil {
 			return nil, fmt.Errorf("failed to decompress block: %w", err)
 		}
 		// apply CTO filter reversal if needed
 		if block.filterID == upxFilterCTO {
 			unfilter49(blockData, block.filterCTO)
 		}
 		// first block contains ELF headers - parse PT_LOAD segments for subsequent blocks
 		if blockNum == 1 {
 			ptLoadOffsets = parseELFPTLoadOffsets(blockData)
 		}
 		// determine where to place this block in the output
 		destOffset := outputOffset
 		if blockNum > 2 && len(ptLoadOffsets) > blockNum-2 {
 			// blocks 3+ go to their respective PT_LOAD segment offsets
 			destOffset = ptLoadOffsets[blockNum-2]
 		}
 		// copy block data to output at correct offset
 		if destOffset+uint64(len(blockData)) <= uint64(len(output)) {
 			copy(output[destOffset:], blockData)
 		}
 		outputOffset = destOffset + uint64(block.uncompressedSize)
 		currentOffset = block.dataOffset + int64(block.compressedSize)
 	}
 	return bytes.NewReader(output), nil
 }
 // parseELFPTLoadOffsets extracts PT_LOAD segment file offsets from ELF headers.
 // These offsets determine where each decompressed block should be placed.
 func parseELFPTLoadOffsets(elfHeader []byte) []uint64 {
 	if len(elfHeader) < 64 {
 		return nil
 	}
 	// verify ELF magic
 	if !bytes.HasPrefix(elfHeader, []byte{0x7f, 'E', 'L', 'F'}) {
 		return nil
 	}
 	// only support 64-bit ELF
 	if elfHeader[4] != 2 {
 		return nil
 	}
 	// parse ELF64 header fields
 	phoff := binary.LittleEndian.Uint64(elfHeader[0x20:0x28])
 	phentsize := binary.LittleEndian.Uint16(elfHeader[0x36:0x38])
 	phnum := binary.LittleEndian.Uint16(elfHeader[0x38:0x3a])
 	var offsets []uint64
 	for i := uint16(0); i < phnum; i++ {
 		phStart := phoff + uint64(i)*uint64(phentsize)
 		if phStart+uint64(phentsize) > uint64(len(elfHeader)) {
 			break
 		}
 		ph := elfHeader[phStart:]
 		ptype := binary.LittleEndian.Uint32(ph[0:4])
 		// PT_LOAD = 1
 		if ptype == 1 {
 			poffset := binary.LittleEndian.Uint64(ph[8:16])
 			offsets = append(offsets, poffset)
 		}
 	}
 	return offsets
 }
 // parseUPXInfo locates and parses the UPX header information
 func parseUPXInfo(r io.ReaderAt) (*upxInfo, error) {
 	// scan for the UPX! magic in the first 8KB
 	buf := make([]byte, 8192)
 	n, err := r.ReadAt(buf, 0)
 	if err != nil && !errors.Is(err, io.EOF) {
 		return nil, fmt.Errorf("failed to read header: %w", err)
 	}
 	magicIdx := bytes.Index(buf[:n], upxMagic)
 	if magicIdx == -1 {
 		return nil, errNotUPX
 	}
 	// UPX header structure (after finding "UPX!" magic):
 	// l_info structure (magic is at offset 4 within l_info):
 	//   offset -4: l_checksum (4 bytes) - checksum of following data
 	//   offset 0:  l_magic "UPX!" (4 bytes)
 	//   offset 4:  l_lsize (2 bytes) - loader size
 	//   offset 6:  l_version (1 byte)
 	//   offset 7:  l_format (1 byte)
 	//
 	// p_info structure (12 bytes, starts at magic+8):
 	//   offset 0: p_progid (4 bytes)
 	//   offset 4: p_filesize (4 bytes) - original file size
 	//   offset 8: p_blocksize (4 bytes)
 	//
 	// b_info structures follow (12 bytes each):
 	//   offset 0: sz_unc (4 bytes) - uncompressed size of this block
 	//   offset 4: sz_cpr (4 bytes) - compressed size (may have filter bits)
 	//   offset 8: b_method (1 byte)
 	//   offset 9: b_ftid (1 byte) - filter id
 	//   offset 10: b_cto8 (1 byte) - filter parameter
 	//   offset 11: unused (1 byte)
 	if magicIdx+32 > n {
 		return nil, fmt.Errorf("UPX header truncated")
 	}
 	lInfoBase := buf[magicIdx:]
 	pInfoBase := buf[magicIdx+8:] // p_info starts 8 bytes after magic
 	info := &upxInfo{
 		magicOffset:   int64(magicIdx),
 		version:       lInfoBase[6],
 		format:        lInfoBase[7],
 		originalSize:  binary.LittleEndian.Uint32(pInfoBase[4:8]),
 		blockSize:     binary.LittleEndian.Uint32(pInfoBase[8:12]),
 		firstBlockOff: int64(magicIdx + 8 + 12), // magic + l_info remainder + p_info
 	}
 	// sanity check
 	if info.originalSize == 0 || info.originalSize > 500*1024*1024 {
 		return nil, fmt.Errorf("invalid original size: %d", info.originalSize)
 	}
 	return info, nil
 }
 // readBlockInfo reads a b_info structure at the given offset
 func readBlockInfo(r io.ReaderAt, offset int64) (*blockInfo, error) {
 	buf := make([]byte, 12)
 	_, err := r.ReadAt(buf, offset)
 	if err != nil {
 		return nil, err
 	}
 	szUnc := binary.LittleEndian.Uint32(buf[0:4])
 	szCpr := binary.LittleEndian.Uint32(buf[4:8])
 	// the compressed size may have filter info in the high bits
 	// for some formats, but for LZMA it's typically clean
 	block := &blockInfo{
 		uncompressedSize: szUnc,
 		compressedSize:   szCpr & 0x00ffffff, // lower 24 bits
 		method:           buf[8],
 		filterID:         buf[9],
 		filterCTO:        buf[10],
 		dataOffset:       offset + 12, // data starts right after b_info
 	}
 	return block, nil
 }
 // nextPowerOf2 returns the smallest power of 2 >= n
 func nextPowerOf2(n uint32) uint32 {
 	if n == 0 {
 		return 1
 	}
 	// if already a power of 2, return it
 	if n&(n-1) == 0 {
 		return n
 	}
 	// find the highest set bit and shift left by 1
 	n--
 	n |= n >> 1
 	n |= n >> 2
 	n |= n >> 4
 	n |= n >> 8
 	n |= n >> 16
 	return n + 1
 }
 // decompressLZMA decompresses LZMA-compressed data as used by UPX.
 // UPX uses a 2-byte custom header format, not the standard 13-byte LZMA format.
 //
 // UPX 2-byte header encoding:
 //   - Byte 0: (t << 3) | pb, where t = lc + lp
 //   - Byte 1: (lp << 4) | lc
 //   - Byte 2+: raw LZMA stream (starts with 0x00 for range decoder init)
 //
 // Standard LZMA props encoding: props = lc + lp*9 + pb*9*5
 func decompressLZMA(compressedData []byte, uncompressedSize uint32) ([]byte, error) {
 	if len(compressedData) < 3 {
 		return nil, fmt.Errorf("compressed data too short")
 	}
 	// parse UPX's 2-byte LZMA header
 	pb := compressedData[0] & 0x07
 	lp := compressedData[1] >> 4
 	lc := compressedData[1] & 0x0f
 	// convert to standard LZMA properties byte
 	props := lc + lp*9 + pb*9*5
 	// raw LZMA stream starts at byte 2 (includes 0x00 init byte)
 	lzmaStream := compressedData[2:]
 	// compute dictionary size: must be at least as large as uncompressed size
 	// use next power of 2 for efficiency, with reasonable min/max bounds.
 	// note: if you're seeing that testing small binaries works and large ones don't,
 	// it may be that the dictionary size was not considered properly in this code.
 	const minDictSize = 64 * 1024         // 64KB minimum
 	const maxDictSize = 128 * 1024 * 1024 // 128MB maximum
 	dictSize := nextPowerOf2(uncompressedSize)
 	if dictSize < minDictSize {
 		dictSize = minDictSize
 	}
 	if dictSize > maxDictSize {
 		dictSize = maxDictSize
 	}
 	// construct standard 13-byte LZMA header
 	header := make([]byte, 13)
 	header[0] = props //nolint:gosec
 	binary.LittleEndian.PutUint32(header[1:5], dictSize)
 	binary.LittleEndian.PutUint64(header[5:13], uint64(uncompressedSize))
 	// combine header + raw stream
 	var fullStream []byte
 	fullStream = append(fullStream, header...)
 	fullStream = append(fullStream, lzmaStream...)
 	reader, err := lzma.NewReader(bytes.NewReader(fullStream))
 	if err != nil {
 		return nil, fmt.Errorf("failed to create LZMA reader: %w", err)
 	}
 	decompressed := make([]byte, uncompressedSize)
 	_, err = io.ReadFull(reader, decompressed)
 	if err != nil {
 		return nil, fmt.Errorf("failed to decompress LZMA data: %w", err)
 	}
 	return decompressed, nil
 }
--- a/syft/pkg/cataloger/golang/upx_test.go
+++ b/syft/pkg/cataloger/golang/upx_test.go
@ -0,0 +1,128 @@
 package golang
 import (
 	"bytes"
 	"testing"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 func TestIsUPXCompressed(t *testing.T) {
 	tests := []struct {
 		name     string
 		data     []byte
 		expected bool
 	}{
 		{
 			name:     "contains UPX magic at start",
 			data:     append([]byte("UPX!"), make([]byte, 100)...),
 			expected: true,
 		},
 		{
 			name:     "contains UPX magic with offset",
 			data:     append(append(make([]byte, 500), []byte("UPX!")...), make([]byte, 100)...),
 			expected: true,
 		},
 		{
 			name:     "no UPX magic",
 			data:     []byte("\x7FELF" + string(make([]byte, 100))),
 			expected: false,
 		},
 		{
 			name:     "empty data",
 			data:     []byte{},
 			expected: false,
 		},
 		{
 			name:     "partial UPX magic",
 			data:     []byte("UPX"),
 			expected: false,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			reader := bytes.NewReader(tt.data)
 			result := isUPXCompressed(reader)
 			assert.Equal(t, tt.expected, result)
 		})
 	}
 }
 func TestParseUPXInfo_NotUPX(t *testing.T) {
 	data := []byte("\x7FELF" + string(make([]byte, 100)))
 	reader := bytes.NewReader(data)
 	_, err := parseUPXInfo(reader)
 	require.Error(t, err)
 	assert.ErrorIs(t, err, errNotUPX)
 }
 func TestParseUPXInfo_ValidHeader(t *testing.T) {
 	// construct a minimal valid UPX header matching actual format
 	// l_info: checksum (4) + magic (4) + lsize (2) + version (1) + format (1)
 	lInfo := []byte{
 		0, 0, 0, 0, // l_checksum (before magic)
 		'U', 'P', 'X', '!', // magic
 		0, 0, // l_lsize
 		14, // l_version
 		22, // l_format (ELF)
 	}
 	// p_info (12 bytes): progid + filesize + blocksize
 	pInfo := []byte{
 		0, 0, 0, 0, // p_progid
 		0, 0, 0x10, 0, // p_filesize = 0x100000 (1MB) little-endian
 		0, 0, 0x10, 0, // p_blocksize
 	}
 	// b_info (12 bytes): sz_unc + sz_cpr + method + filter info
 	bInfo := []byte{
 		0, 0, 0x10, 0, // sz_unc = 1MB
 		0, 0, 0x08, 0, // sz_cpr = 512KB (compressed)
 		14, 0, 0, 0, // method=LZMA, filter info
 	}
 	data := append(append(lInfo, pInfo...), bInfo...)
 	data = append(data, make([]byte, 100)...) // padding
 	reader := bytes.NewReader(data)
 	info, err := parseUPXInfo(reader)
 	require.NoError(t, err)
 	assert.Equal(t, uint8(14), info.version)
 	assert.Equal(t, uint8(22), info.format)
 	assert.Equal(t, uint32(0x100000), info.originalSize)
 }
 func TestDecompressUPX_UnsupportedMethod(t *testing.T) {
 	// construct a header with an unsupported compression method
 	lInfo := []byte{
 		0, 0, 0, 0, // l_checksum
 		'U', 'P', 'X', '!',
 		0, 0, // l_lsize
 		14, 22, // version, format
 	}
 	pInfo := []byte{
 		0, 0, 0, 0, // p_progid
 		0x00, 0x01, 0x00, 0x00, // p_filesize = 256 bytes (small for test)
 		0, 0, 0x10, 0, // p_blocksize
 	}
 	bInfo := []byte{
 		0x00, 0x01, 0x00, 0x00, // sz_unc = 256
 		0x80, 0x00, 0x00, 0x00, // sz_cpr = 128
 		99, 0, 0, 0, // unsupported method
 	}
 	data := append(append(lInfo, pInfo...), bInfo...)
 	data = append(data, make([]byte, 1000)...)
 	reader := bytes.NewReader(data)
 	_, err := decompressUPX(reader)
 	require.Error(t, err)
 	assert.ErrorIs(t, err, errUnsupportedUPXMethod)
 }