syft/syft/internal/unionreader/union_reader.go
Will Murphy e38851143e
chore: centralize temp files and prefer streaming IO (#4668)
* chore: centralize temp files and prefer streaming IO

Catalogers that create temp files ad-hoc can easily forget cleanup,
leaking files on disk. Similarly, io.ReadAll is convenient but risks
OOM on large or malicious inputs.

Introduce internal/tmpdir to manage all cataloger temp storage under
a single root directory with automatic cleanup. Prefer streaming
parsers (bufio.Scanner, json/yaml.NewDecoder, io.LimitReader) over
buffering entire inputs into memory. Add ruleguard rules to enforce
both practices going forward.

Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com>

* chore: go back to old release parsing

Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com>

* simplify to limit reader in version check

Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com>

* chore: regex change postponed

Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com>

* simplify supplement release to limitreader

Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com>

---------

Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com>
2026-03-18 10:53:51 -04:00

139 lines
3.2 KiB
Go

package unionreader
import (
"bytes"
"fmt"
"io"
"sync"
"github.com/diskfs/go-diskfs/filesystem/squashfs"
macho "github.com/anchore/go-macholibre"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file"
)
// UnionReader is a single interface with all reading functions needed by multi-arch binary catalogers
// cataloger.
type UnionReader interface {
io.Reader
io.ReaderAt
io.Seeker
io.Closer
}
// GetReaders extracts one or more io.ReaderAt objects representing binaries that can be processed (multiple binaries in the case for multi-architecture binaries).
func GetReaders(f UnionReader) ([]io.ReaderAt, error) {
if macho.IsUniversalMachoBinary(f) {
machoReaders, err := macho.ExtractReaders(f)
if err != nil {
log.Debugf("extracting readers: %v", err)
return nil, err
}
var readers []io.ReaderAt
for _, e := range machoReaders {
readers = append(readers, e.Reader)
}
return readers, nil
}
return []io.ReaderAt{f}, nil
}
func GetUnionReader(readerCloser io.ReadCloser) (UnionReader, error) {
reader, ok := readerCloser.(UnionReader)
if ok {
return reader, nil
}
// file.LocationReadCloser embeds a ReadCloser, which is likely
// to implement UnionReader. Check whether the embedded read closer
// implements UnionReader, and just return that if so.
if r, ok := readerCloser.(file.LocationReadCloser); ok {
return GetUnionReader(r.ReadCloser)
}
if r, ok := readerCloser.(*squashfs.File); ok {
// seeking is implemented, but not io.ReaderAt. Lets wrap it to prevent from degrading performance
// by copying all data.
return newReaderAtAdapter(r), nil
}
b, err := io.ReadAll(readerCloser) //nolint:gocritic // buffering non-seekable to ReaderAt
if err != nil {
return nil, fmt.Errorf("unable to read contents from binary: %w", err)
}
bytesReader := bytes.NewReader(b)
reader = struct {
io.ReadCloser
io.ReaderAt
io.Seeker
}{
ReadCloser: io.NopCloser(bytesReader),
ReaderAt: bytesReader,
Seeker: bytesReader,
}
return reader, nil
}
type readerAtAdapter struct {
io.ReadSeekCloser
mu *sync.Mutex
}
func newReaderAtAdapter(rs io.ReadSeekCloser) UnionReader {
return &readerAtAdapter{
ReadSeekCloser: rs,
mu: &sync.Mutex{},
}
}
func (r *readerAtAdapter) Read(p []byte) (n int, err error) {
r.mu.Lock()
defer r.mu.Unlock()
return r.ReadSeekCloser.Read(p)
}
func (r *readerAtAdapter) Seek(offset int64, whence int) (int64, error) {
r.mu.Lock()
defer r.mu.Unlock()
return r.ReadSeekCloser.Seek(offset, whence)
}
func (r *readerAtAdapter) ReadAt(p []byte, off int64) (n int, err error) {
r.mu.Lock()
defer r.mu.Unlock()
currentPos, err := r.ReadSeekCloser.Seek(0, io.SeekCurrent) // save current pos
if err != nil {
return 0, err
}
_, err = r.ReadSeekCloser.Seek(off, io.SeekStart) // seek to absolute position `off`
if err != nil {
return 0, err
}
n, err = r.ReadSeekCloser.Read(p) // read from that absolute position
// restore the position for the stateful read/seek operations
if restoreErr := r.restorePosition(currentPos); restoreErr != nil {
if err == nil {
err = restoreErr
}
}
return n, err
}
func (r *readerAtAdapter) restorePosition(pos int64) error {
_, err := r.ReadSeekCloser.Seek(pos, io.SeekStart)
return err
}