From 758324b3e8e7e5dc4a55434309e497ec9d7bd93e Mon Sep 17 00:00:00 2001 From: Sai Asish Y Date: Wed, 22 Apr 2026 09:06:03 -0700 Subject: [PATCH] fix: propagate non-EOF errors out of safeCopy (#4807) Signed-off-by: SAY-5 --- internal/file/copy.go | 14 ++++++++- internal/file/copy_test.go | 64 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 internal/file/copy_test.go diff --git a/internal/file/copy.go b/internal/file/copy.go index d569dcc4c..058fe27eb 100644 --- a/internal/file/copy.go +++ b/internal/file/copy.go @@ -12,8 +12,20 @@ const perFileReadLimit = 2 * GB // protect against decompression bomb attacks. func safeCopy(writer io.Writer, reader io.Reader) error { numBytes, err := io.Copy(writer, io.LimitReader(reader, perFileReadLimit)) - if numBytes >= perFileReadLimit || errors.Is(err, io.EOF) { + if numBytes >= perFileReadLimit { return fmt.Errorf("zip read limit hit (potential decompression bomb attack)") } + // Propagate decompression / read errors up to the caller. io.Copy + // on the happy path returns (n, nil); the only way err is non-nil + // here is that the underlying reader surfaced a real failure + // ("flate: corrupt input before offset X" on a mangled ZIP entry, + // a network-backed reader erroring mid-stream, etc.). The previous + // implementation dropped that error and the caller stored a + // partial / empty buffer as a "successful" extract, which silently + // downgraded Java cataloger output and caused SBOM scanners to + // miss known CVEs (#4806). + if err != nil && !errors.Is(err, io.EOF) { + return fmt.Errorf("failed to read archive entry: %w", err) + } return nil } diff --git a/internal/file/copy_test.go b/internal/file/copy_test.go new file mode 100644 index 000000000..0c83d266f --- /dev/null +++ b/internal/file/copy_test.go @@ -0,0 +1,64 @@ +package file + +import ( + "bytes" + "errors" + "io" + "strings" + "testing" +) + +// errReader returns a deterministic non-EOF error after emitting some +// bytes, mimicking what compress/flate does when it hits a corrupt +// stream mid-entry. +type errReader struct { + data []byte + err error + off int +} + +func (r *errReader) Read(p []byte) (int, error) { + if r.off >= len(r.data) { + return 0, r.err + } + n := copy(p, r.data[r.off:]) + r.off += n + return n, nil +} + +func TestSafeCopy(t *testing.T) { + t.Run("clean copy returns nil", func(t *testing.T) { + var buf bytes.Buffer + if err := safeCopy(&buf, strings.NewReader("hello")); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got := buf.String(); got != "hello" { + t.Fatalf("unexpected buffer contents: %q", got) + } + }) + + t.Run("propagates decompression error", func(t *testing.T) { + // #4806: safeCopy used to drop non-EOF errors, so the caller + // would persist a partial buffer as a successful extract and + // downstream catalogers silently read empty manifests. + sentinel := errors.New("flate: corrupt input before offset 42") + var buf bytes.Buffer + err := safeCopy(&buf, &errReader{data: []byte("partial"), err: sentinel}) + if err == nil { + t.Fatalf("expected error to be returned, got nil") + } + if !errors.Is(err, sentinel) { + t.Fatalf("error does not wrap sentinel: %v", err) + } + }) + + t.Run("EOF is not treated as an error", func(t *testing.T) { + // The old code had a dead io.EOF branch that labelled clean + // reads as decompression bombs; keep the happy path clean. + var buf bytes.Buffer + err := safeCopy(&buf, io.LimitReader(strings.NewReader("abc"), 3)) + if err != nil { + t.Fatalf("unexpected error on clean copy: %v", err) + } + }) +}