fix: propagate non-EOF errors out of safeCopy (#4807)

Signed-off-by: SAY-5 <SAY-5@users.noreply.github.com>
This commit is contained in:
Sai Asish Y 2026-04-22 09:06:03 -07:00 committed by GitHub
parent 390cf6cce0
commit 758324b3e8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 77 additions and 1 deletions

View File

@ -12,8 +12,20 @@ const perFileReadLimit = 2 * GB
// protect against decompression bomb attacks.
func safeCopy(writer io.Writer, reader io.Reader) error {
numBytes, err := io.Copy(writer, io.LimitReader(reader, perFileReadLimit))
if numBytes >= perFileReadLimit || errors.Is(err, io.EOF) {
if numBytes >= perFileReadLimit {
return fmt.Errorf("zip read limit hit (potential decompression bomb attack)")
}
// Propagate decompression / read errors up to the caller. io.Copy
// on the happy path returns (n, nil); the only way err is non-nil
// here is that the underlying reader surfaced a real failure
// ("flate: corrupt input before offset X" on a mangled ZIP entry,
// a network-backed reader erroring mid-stream, etc.). The previous
// implementation dropped that error and the caller stored a
// partial / empty buffer as a "successful" extract, which silently
// downgraded Java cataloger output and caused SBOM scanners to
// miss known CVEs (#4806).
if err != nil && !errors.Is(err, io.EOF) {
return fmt.Errorf("failed to read archive entry: %w", err)
}
return nil
}

View File

@ -0,0 +1,64 @@
package file
import (
"bytes"
"errors"
"io"
"strings"
"testing"
)
// errReader returns a deterministic non-EOF error after emitting some
// bytes, mimicking what compress/flate does when it hits a corrupt
// stream mid-entry.
type errReader struct {
data []byte
err error
off int
}
func (r *errReader) Read(p []byte) (int, error) {
if r.off >= len(r.data) {
return 0, r.err
}
n := copy(p, r.data[r.off:])
r.off += n
return n, nil
}
func TestSafeCopy(t *testing.T) {
t.Run("clean copy returns nil", func(t *testing.T) {
var buf bytes.Buffer
if err := safeCopy(&buf, strings.NewReader("hello")); err != nil {
t.Fatalf("unexpected error: %v", err)
}
if got := buf.String(); got != "hello" {
t.Fatalf("unexpected buffer contents: %q", got)
}
})
t.Run("propagates decompression error", func(t *testing.T) {
// #4806: safeCopy used to drop non-EOF errors, so the caller
// would persist a partial buffer as a successful extract and
// downstream catalogers silently read empty manifests.
sentinel := errors.New("flate: corrupt input before offset 42")
var buf bytes.Buffer
err := safeCopy(&buf, &errReader{data: []byte("partial"), err: sentinel})
if err == nil {
t.Fatalf("expected error to be returned, got nil")
}
if !errors.Is(err, sentinel) {
t.Fatalf("error does not wrap sentinel: %v", err)
}
})
t.Run("EOF is not treated as an error", func(t *testing.T) {
// The old code had a dead io.EOF branch that labelled clean
// reads as decompression bombs; keep the happy path clean.
var buf bytes.Buffer
err := safeCopy(&buf, io.LimitReader(strings.NewReader("abc"), 3))
if err != nil {
t.Fatalf("unexpected error on clean copy: %v", err)
}
})
}