mirror of
https://github.com/anchore/syft.git
synced 2025-11-18 00:43:20 +01:00
very WIP: lazy union reader
Signed-off-by: Will Murphy <will.murphy@anchore.com>
This commit is contained in:
parent
6440f26b5a
commit
20a26a0dfe
142
syft/internal/unionreader/lazy_union_reader.go
Normal file
142
syft/internal/unionreader/lazy_union_reader.go
Normal file
@ -0,0 +1,142 @@
|
||||
package unionreader
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"sync"
|
||||
)
|
||||
|
||||
const readSize int64 = 1024 * 1024
|
||||
|
||||
// lazyUnionReader must implement UnionReader
|
||||
var _ UnionReader = (*lazyUnionReader)(nil)
|
||||
|
||||
// lazyUnionReader wraps an io.Reader to make it into a logical ReadSeeker
|
||||
// The reader maintains a []byte, which is everything that has been read so far.
|
||||
// Otherwise, callers needing a ReadSeeker might copy the entire reader into
|
||||
// a buffer in order to have a seeker.
|
||||
type lazyUnionReader struct {
|
||||
buf []byte // the bytes that have been read so far
|
||||
cursor int64 // the current position where Read() will take place
|
||||
done bool // whether we have seen EOF from rc
|
||||
rc io.ReadCloser // the underlying reader
|
||||
mu sync.Mutex // exported methods must acquire this lock before changing any field. Unexported methods assume their caller acquired the lock
|
||||
}
|
||||
|
||||
func (c *lazyUnionReader) Read(p []byte) (n int, err error) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
needBytes := int64(len(p))
|
||||
newOffset := c.cursor + needBytes
|
||||
err = c.ensureReadUntil(newOffset)
|
||||
if err != nil && !errors.Is(err, io.EOF) {
|
||||
return 0, err
|
||||
}
|
||||
// stop reading either at cursor + length p, or the end of the buffer, whichever is sooner
|
||||
end := min(c.cursor+int64(len(p)), int64(len(c.buf)))
|
||||
copy(p, c.buf[c.cursor:end])
|
||||
n = int(end - c.cursor)
|
||||
c.cursor = end
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (c *lazyUnionReader) ReadAt(p []byte, off int64) (n int, err error) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
needUntil := int64(len(p)) + off
|
||||
err = c.ensureReadUntil(needUntil)
|
||||
end := min(off+int64(len(p)), int64(len(c.buf)))
|
||||
copy(p, c.buf[off:end])
|
||||
return int(end - off), err
|
||||
}
|
||||
|
||||
func (c *lazyUnionReader) Seek(offset int64, whence int) (int64, error) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
var trueOffset int64
|
||||
var err error
|
||||
switch whence {
|
||||
case io.SeekStart:
|
||||
trueOffset = offset
|
||||
case io.SeekCurrent:
|
||||
trueOffset = offset + c.cursor
|
||||
case io.SeekEnd:
|
||||
err = c.readAll()
|
||||
trueOffset = c.maxRead() + offset
|
||||
}
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if trueOffset < 0 {
|
||||
return 0, fmt.Errorf("request to read negative offset impossible %v", trueOffset)
|
||||
}
|
||||
c.cursor = trueOffset
|
||||
return c.cursor, nil
|
||||
}
|
||||
|
||||
func (c *lazyUnionReader) Close() error {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
return c.rc.Close()
|
||||
}
|
||||
|
||||
func (c *lazyUnionReader) readAll() error {
|
||||
buf, err := io.ReadAll(c.rc)
|
||||
switch {
|
||||
case err != nil && errors.Is(err, io.EOF):
|
||||
err = nil
|
||||
case err != nil:
|
||||
return err
|
||||
}
|
||||
//c.maxRead = c.maxRead() + int64(len(buf))
|
||||
c.buf = append(c.buf, buf...)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *lazyUnionReader) ensureReadUntil(offset int64) error {
|
||||
readN := offset - c.maxRead()
|
||||
if readN <= 0 {
|
||||
return nil
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
_, err := io.CopyN(&buf, c.rc, readN)
|
||||
if err != nil && !errors.Is(err, io.EOF) {
|
||||
return err
|
||||
}
|
||||
c.buf = append(c.buf, buf.Bytes()...)
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *lazyUnionReader) maxRead() int64 {
|
||||
return int64(len(c.buf))
|
||||
}
|
||||
|
||||
func max(ints ...int64) int64 {
|
||||
var maxSeen int64
|
||||
for _, in := range ints {
|
||||
if in > maxSeen {
|
||||
maxSeen = in
|
||||
}
|
||||
}
|
||||
return maxSeen
|
||||
}
|
||||
|
||||
func min(ints ...int64) int64 {
|
||||
minSeeen := int64(math.MaxInt64) // really? math.MaxInt64 has type int?
|
||||
for _, n := range ints {
|
||||
if n < minSeeen {
|
||||
minSeeen = n
|
||||
}
|
||||
}
|
||||
return minSeeen
|
||||
}
|
||||
|
||||
func newLazyUnionReader(readCloser io.ReadCloser) (UnionReader, error) {
|
||||
return &lazyUnionReader{
|
||||
rc: readCloser,
|
||||
mu: sync.Mutex{},
|
||||
}, nil
|
||||
}
|
||||
281
syft/internal/unionreader/lazy_union_reader_test.go
Normal file
281
syft/internal/unionreader/lazy_union_reader_test.go
Normal file
@ -0,0 +1,281 @@
|
||||
package unionreader
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"io"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type spyingCloser struct {
|
||||
closed bool
|
||||
io.Reader
|
||||
}
|
||||
|
||||
func (s *spyingCloser) Close() error {
|
||||
s.closed = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func Test_lazyUnionReader_Close(t *testing.T) {
|
||||
r := strings.NewReader("some string")
|
||||
sc := &spyingCloser{
|
||||
false,
|
||||
r,
|
||||
}
|
||||
subject, err := newLazyUnionReader(sc)
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, subject.Close())
|
||||
assert.True(t, sc.closed)
|
||||
}
|
||||
|
||||
func Test_lazyUnionReader_ReadAll(t *testing.T) {
|
||||
rc := io.NopCloser(strings.NewReader("some data"))
|
||||
subject, err := newLazyUnionReader(rc)
|
||||
require.NoError(t, err)
|
||||
|
||||
bytes, err := io.ReadAll(subject)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "some data", string(bytes))
|
||||
}
|
||||
|
||||
func Test_lazyUnionReader_RepeatedlyRead(t *testing.T) {
|
||||
data := "some data for our reader that we need to read!"
|
||||
rc := io.NopCloser(strings.NewReader(data))
|
||||
subject, err := newLazyUnionReader(rc)
|
||||
require.NoError(t, err)
|
||||
var readErr error
|
||||
var readResult []byte
|
||||
for readErr == nil {
|
||||
buf := make([]byte, 2)
|
||||
var n int
|
||||
n, readErr = subject.Read(buf)
|
||||
readResult = append(readResult, buf[:n]...)
|
||||
}
|
||||
assert.Equal(t, data, string(readResult))
|
||||
assert.ErrorIs(t, readErr, io.EOF)
|
||||
}
|
||||
|
||||
func Test_lazyUnionReader_ReadAt(t *testing.T) {
|
||||
readStart := make([]byte, 4)
|
||||
readMid := make([]byte, 4)
|
||||
readEnd := make([]byte, 4)
|
||||
tests := []struct {
|
||||
name string
|
||||
dst []byte
|
||||
off int64
|
||||
wantN int
|
||||
wantBytes []byte
|
||||
wantEOF bool
|
||||
}{
|
||||
{
|
||||
name: "read first 4 bytes",
|
||||
dst: readStart,
|
||||
off: 0,
|
||||
wantN: 4,
|
||||
wantBytes: []byte("0123"),
|
||||
},
|
||||
{
|
||||
name: "read 4 bytes from middle",
|
||||
dst: readMid,
|
||||
off: 4,
|
||||
wantN: 4,
|
||||
wantBytes: []byte("4567"),
|
||||
},
|
||||
{
|
||||
name: "read last 4 bytes",
|
||||
dst: readEnd,
|
||||
off: 12,
|
||||
wantN: 4,
|
||||
wantBytes: []byte("cdef"),
|
||||
},
|
||||
{
|
||||
name: "read past end",
|
||||
dst: make([]byte, 4),
|
||||
off: 14,
|
||||
wantN: 2,
|
||||
wantBytes: []byte("ef"),
|
||||
wantEOF: true,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
rc := io.NopCloser(strings.NewReader("0123456789abcdef"))
|
||||
subject, err := newLazyUnionReader(rc)
|
||||
require.NoError(t, err)
|
||||
n, err := subject.ReadAt(tt.dst, tt.off)
|
||||
assert.Equal(t, tt.wantN, n)
|
||||
assert.Equal(t, string(tt.wantBytes), string(tt.dst[:tt.wantN]))
|
||||
if tt.wantEOF {
|
||||
assert.ErrorIs(t, err, io.EOF)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_lazyUnionReader_Seek(t *testing.T) {
|
||||
//const seek = 0
|
||||
//const read = 1
|
||||
type command struct {
|
||||
seekOffset int64
|
||||
seekWhence int
|
||||
readDst []byte
|
||||
}
|
||||
data := []byte("this is a string of data that I'm very excited to share")
|
||||
tests := []struct {
|
||||
name string
|
||||
commands []command
|
||||
wantBytes []byte
|
||||
wantEOF bool
|
||||
}{
|
||||
{
|
||||
name: "read the first 4 bytes twice",
|
||||
commands: []command{
|
||||
{
|
||||
readDst: make([]byte, 4),
|
||||
},
|
||||
{
|
||||
seekOffset: 0,
|
||||
seekWhence: io.SeekStart,
|
||||
},
|
||||
{
|
||||
readDst: make([]byte, 4),
|
||||
},
|
||||
},
|
||||
wantBytes: []byte("thisthis"),
|
||||
},
|
||||
{
|
||||
name: "read the last 4 bytes twice",
|
||||
commands: []command{
|
||||
{
|
||||
seekWhence: io.SeekEnd,
|
||||
seekOffset: -4,
|
||||
},
|
||||
{
|
||||
readDst: make([]byte, 4),
|
||||
},
|
||||
{
|
||||
seekWhence: io.SeekEnd,
|
||||
seekOffset: -4,
|
||||
},
|
||||
{
|
||||
readDst: make([]byte, 4),
|
||||
},
|
||||
},
|
||||
wantBytes: []byte("harehare"),
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
rc := io.NopCloser(bytes.NewReader(data))
|
||||
subject, err := newLazyUnionReader(rc)
|
||||
require.NoError(t, err)
|
||||
var readSeekErr error
|
||||
var readResult []byte
|
||||
for _, c := range tt.commands {
|
||||
var n int
|
||||
if len(c.readDst) > 0 {
|
||||
n, readSeekErr = subject.Read(c.readDst)
|
||||
readResult = append(readResult, c.readDst[:n]...)
|
||||
} else {
|
||||
_, readSeekErr = subject.Seek(c.seekOffset, c.seekWhence)
|
||||
}
|
||||
}
|
||||
if tt.wantEOF {
|
||||
assert.ErrorIs(t, readSeekErr, io.EOF)
|
||||
}
|
||||
assert.Equal(t, string(tt.wantBytes), string(readResult))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_lazyUnionReader_ensureReadUntil(t *testing.T) {
|
||||
type fields struct {
|
||||
buf []byte
|
||||
cursor int64
|
||||
maxRead int64
|
||||
done bool
|
||||
rc io.ReadCloser
|
||||
mu sync.Mutex
|
||||
}
|
||||
type args struct {
|
||||
offset int64
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
fields fields
|
||||
args args
|
||||
wantErr assert.ErrorAssertionFunc
|
||||
}{
|
||||
// TODO: Add test cases.
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
c := &lazyUnionReader{
|
||||
buf: tt.fields.buf,
|
||||
cursor: tt.fields.cursor,
|
||||
done: tt.fields.done,
|
||||
rc: tt.fields.rc,
|
||||
mu: tt.fields.mu,
|
||||
}
|
||||
tt.wantErr(t, c.ensureReadUntil(tt.args.offset), fmt.Sprintf("ensureReadUntil(%v)", tt.args.offset))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_lazyUnionReader_readAll(t *testing.T) {
|
||||
type fields struct {
|
||||
buf []byte
|
||||
cursor int64
|
||||
maxRead int64
|
||||
done bool
|
||||
rc io.ReadCloser
|
||||
mu sync.Mutex
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
fields fields
|
||||
wantErr assert.ErrorAssertionFunc
|
||||
}{
|
||||
// TODO: Add test cases.
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
c := &lazyUnionReader{
|
||||
buf: tt.fields.buf,
|
||||
cursor: tt.fields.cursor,
|
||||
done: tt.fields.done,
|
||||
rc: tt.fields.rc,
|
||||
mu: tt.fields.mu,
|
||||
}
|
||||
tt.wantErr(t, c.readAll(), fmt.Sprintf("readAll()"))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_newLazyUnionReader(t *testing.T) {
|
||||
type args struct {
|
||||
readCloser io.ReadCloser
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want UnionReader
|
||||
wantErr assert.ErrorAssertionFunc
|
||||
}{
|
||||
// TODO: Add test cases.
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := newLazyUnionReader(tt.args.readCloser)
|
||||
if !tt.wantErr(t, err, fmt.Sprintf("newLazyUnionReader(%v)", tt.args.readCloser)) {
|
||||
return
|
||||
}
|
||||
assert.Equalf(t, tt.want, got, "newLazyUnionReader(%v)", tt.args.readCloser)
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -1,12 +1,9 @@
|
||||
package unionreader
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
macho "github.com/anchore/go-macholibre"
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"io"
|
||||
)
|
||||
|
||||
// UnionReader is a single interface with all reading functions needed by multi-arch binary catalogers
|
||||
@ -43,23 +40,24 @@ func GetUnionReader(readerCloser io.ReadCloser) (UnionReader, error) {
|
||||
if ok {
|
||||
return reader, nil
|
||||
}
|
||||
|
||||
b, err := io.ReadAll(readerCloser)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to read contents from binary: %w", err)
|
||||
}
|
||||
|
||||
bytesReader := bytes.NewReader(b)
|
||||
|
||||
reader = struct {
|
||||
io.ReadCloser
|
||||
io.ReaderAt
|
||||
io.Seeker
|
||||
}{
|
||||
ReadCloser: io.NopCloser(bytesReader),
|
||||
ReaderAt: bytesReader,
|
||||
Seeker: bytesReader,
|
||||
}
|
||||
|
||||
return reader, nil
|
||||
return newLazyUnionReader(readerCloser)
|
||||
//
|
||||
//b, err := io.ReadAll(readerCloser)
|
||||
//if err != nil {
|
||||
// return nil, fmt.Errorf("unable to read contents from binary: %w", err)
|
||||
//}
|
||||
//
|
||||
//bytesReader := bytes.NewReader(b)
|
||||
//
|
||||
//reader = struct {
|
||||
// io.ReadCloser
|
||||
// io.ReaderAt
|
||||
// io.Seeker
|
||||
//}{
|
||||
// ReadCloser: io.NopCloser(bytesReader),
|
||||
// ReaderAt: bytesReader,
|
||||
// Seeker: bytesReader,
|
||||
//}
|
||||
//
|
||||
//return reader, nil
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user