diff --git a/syft/internal/unionreader/lazy_union_reader.go b/syft/internal/unionreader/lazy_union_reader.go new file mode 100644 index 000000000..a6ce9a033 --- /dev/null +++ b/syft/internal/unionreader/lazy_union_reader.go @@ -0,0 +1,142 @@ +package unionreader + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "sync" +) + +const readSize int64 = 1024 * 1024 + +// lazyUnionReader must implement UnionReader +var _ UnionReader = (*lazyUnionReader)(nil) + +// lazyUnionReader wraps an io.Reader to make it into a logical ReadSeeker +// The reader maintains a []byte, which is everything that has been read so far. +// Otherwise, callers needing a ReadSeeker might copy the entire reader into +// a buffer in order to have a seeker. +type lazyUnionReader struct { + buf []byte // the bytes that have been read so far + cursor int64 // the current position where Read() will take place + done bool // whether we have seen EOF from rc + rc io.ReadCloser // the underlying reader + mu sync.Mutex // exported methods must acquire this lock before changing any field. Unexported methods assume their caller acquired the lock +} + +func (c *lazyUnionReader) Read(p []byte) (n int, err error) { + c.mu.Lock() + defer c.mu.Unlock() + needBytes := int64(len(p)) + newOffset := c.cursor + needBytes + err = c.ensureReadUntil(newOffset) + if err != nil && !errors.Is(err, io.EOF) { + return 0, err + } + // stop reading either at cursor + length p, or the end of the buffer, whichever is sooner + end := min(c.cursor+int64(len(p)), int64(len(c.buf))) + copy(p, c.buf[c.cursor:end]) + n = int(end - c.cursor) + c.cursor = end + return n, err +} + +func (c *lazyUnionReader) ReadAt(p []byte, off int64) (n int, err error) { + c.mu.Lock() + defer c.mu.Unlock() + needUntil := int64(len(p)) + off + err = c.ensureReadUntil(needUntil) + end := min(off+int64(len(p)), int64(len(c.buf))) + copy(p, c.buf[off:end]) + return int(end - off), err +} + +func (c *lazyUnionReader) Seek(offset int64, whence int) (int64, error) { + c.mu.Lock() + defer c.mu.Unlock() + var trueOffset int64 + var err error + switch whence { + case io.SeekStart: + trueOffset = offset + case io.SeekCurrent: + trueOffset = offset + c.cursor + case io.SeekEnd: + err = c.readAll() + trueOffset = c.maxRead() + offset + } + if err != nil { + return 0, err + } + if trueOffset < 0 { + return 0, fmt.Errorf("request to read negative offset impossible %v", trueOffset) + } + c.cursor = trueOffset + return c.cursor, nil +} + +func (c *lazyUnionReader) Close() error { + c.mu.Lock() + defer c.mu.Unlock() + return c.rc.Close() +} + +func (c *lazyUnionReader) readAll() error { + buf, err := io.ReadAll(c.rc) + switch { + case err != nil && errors.Is(err, io.EOF): + err = nil + case err != nil: + return err + } + //c.maxRead = c.maxRead() + int64(len(buf)) + c.buf = append(c.buf, buf...) + return nil +} + +func (c *lazyUnionReader) ensureReadUntil(offset int64) error { + readN := offset - c.maxRead() + if readN <= 0 { + return nil + } + var buf bytes.Buffer + _, err := io.CopyN(&buf, c.rc, readN) + if err != nil && !errors.Is(err, io.EOF) { + return err + } + c.buf = append(c.buf, buf.Bytes()...) + return err +} + +func (c *lazyUnionReader) maxRead() int64 { + return int64(len(c.buf)) +} + +func max(ints ...int64) int64 { + var maxSeen int64 + for _, in := range ints { + if in > maxSeen { + maxSeen = in + } + } + return maxSeen +} + +func min(ints ...int64) int64 { + minSeeen := int64(math.MaxInt64) // really? math.MaxInt64 has type int? + for _, n := range ints { + if n < minSeeen { + minSeeen = n + } + } + return minSeeen +} + +func newLazyUnionReader(readCloser io.ReadCloser) (UnionReader, error) { + return &lazyUnionReader{ + rc: readCloser, + mu: sync.Mutex{}, + }, nil +} diff --git a/syft/internal/unionreader/lazy_union_reader_test.go b/syft/internal/unionreader/lazy_union_reader_test.go new file mode 100644 index 000000000..44c27bc46 --- /dev/null +++ b/syft/internal/unionreader/lazy_union_reader_test.go @@ -0,0 +1,281 @@ +package unionreader + +import ( + "bytes" + "fmt" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "io" + "strings" + "sync" + "testing" +) + +type spyingCloser struct { + closed bool + io.Reader +} + +func (s *spyingCloser) Close() error { + s.closed = true + return nil +} + +func Test_lazyUnionReader_Close(t *testing.T) { + r := strings.NewReader("some string") + sc := &spyingCloser{ + false, + r, + } + subject, err := newLazyUnionReader(sc) + require.NoError(t, err) + require.NoError(t, subject.Close()) + assert.True(t, sc.closed) +} + +func Test_lazyUnionReader_ReadAll(t *testing.T) { + rc := io.NopCloser(strings.NewReader("some data")) + subject, err := newLazyUnionReader(rc) + require.NoError(t, err) + + bytes, err := io.ReadAll(subject) + require.NoError(t, err) + assert.Equal(t, "some data", string(bytes)) +} + +func Test_lazyUnionReader_RepeatedlyRead(t *testing.T) { + data := "some data for our reader that we need to read!" + rc := io.NopCloser(strings.NewReader(data)) + subject, err := newLazyUnionReader(rc) + require.NoError(t, err) + var readErr error + var readResult []byte + for readErr == nil { + buf := make([]byte, 2) + var n int + n, readErr = subject.Read(buf) + readResult = append(readResult, buf[:n]...) + } + assert.Equal(t, data, string(readResult)) + assert.ErrorIs(t, readErr, io.EOF) +} + +func Test_lazyUnionReader_ReadAt(t *testing.T) { + readStart := make([]byte, 4) + readMid := make([]byte, 4) + readEnd := make([]byte, 4) + tests := []struct { + name string + dst []byte + off int64 + wantN int + wantBytes []byte + wantEOF bool + }{ + { + name: "read first 4 bytes", + dst: readStart, + off: 0, + wantN: 4, + wantBytes: []byte("0123"), + }, + { + name: "read 4 bytes from middle", + dst: readMid, + off: 4, + wantN: 4, + wantBytes: []byte("4567"), + }, + { + name: "read last 4 bytes", + dst: readEnd, + off: 12, + wantN: 4, + wantBytes: []byte("cdef"), + }, + { + name: "read past end", + dst: make([]byte, 4), + off: 14, + wantN: 2, + wantBytes: []byte("ef"), + wantEOF: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rc := io.NopCloser(strings.NewReader("0123456789abcdef")) + subject, err := newLazyUnionReader(rc) + require.NoError(t, err) + n, err := subject.ReadAt(tt.dst, tt.off) + assert.Equal(t, tt.wantN, n) + assert.Equal(t, string(tt.wantBytes), string(tt.dst[:tt.wantN])) + if tt.wantEOF { + assert.ErrorIs(t, err, io.EOF) + } + }) + } +} + +func Test_lazyUnionReader_Seek(t *testing.T) { + //const seek = 0 + //const read = 1 + type command struct { + seekOffset int64 + seekWhence int + readDst []byte + } + data := []byte("this is a string of data that I'm very excited to share") + tests := []struct { + name string + commands []command + wantBytes []byte + wantEOF bool + }{ + { + name: "read the first 4 bytes twice", + commands: []command{ + { + readDst: make([]byte, 4), + }, + { + seekOffset: 0, + seekWhence: io.SeekStart, + }, + { + readDst: make([]byte, 4), + }, + }, + wantBytes: []byte("thisthis"), + }, + { + name: "read the last 4 bytes twice", + commands: []command{ + { + seekWhence: io.SeekEnd, + seekOffset: -4, + }, + { + readDst: make([]byte, 4), + }, + { + seekWhence: io.SeekEnd, + seekOffset: -4, + }, + { + readDst: make([]byte, 4), + }, + }, + wantBytes: []byte("harehare"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rc := io.NopCloser(bytes.NewReader(data)) + subject, err := newLazyUnionReader(rc) + require.NoError(t, err) + var readSeekErr error + var readResult []byte + for _, c := range tt.commands { + var n int + if len(c.readDst) > 0 { + n, readSeekErr = subject.Read(c.readDst) + readResult = append(readResult, c.readDst[:n]...) + } else { + _, readSeekErr = subject.Seek(c.seekOffset, c.seekWhence) + } + } + if tt.wantEOF { + assert.ErrorIs(t, readSeekErr, io.EOF) + } + assert.Equal(t, string(tt.wantBytes), string(readResult)) + }) + } +} + +func Test_lazyUnionReader_ensureReadUntil(t *testing.T) { + type fields struct { + buf []byte + cursor int64 + maxRead int64 + done bool + rc io.ReadCloser + mu sync.Mutex + } + type args struct { + offset int64 + } + tests := []struct { + name string + fields fields + args args + wantErr assert.ErrorAssertionFunc + }{ + // TODO: Add test cases. + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := &lazyUnionReader{ + buf: tt.fields.buf, + cursor: tt.fields.cursor, + done: tt.fields.done, + rc: tt.fields.rc, + mu: tt.fields.mu, + } + tt.wantErr(t, c.ensureReadUntil(tt.args.offset), fmt.Sprintf("ensureReadUntil(%v)", tt.args.offset)) + }) + } +} + +func Test_lazyUnionReader_readAll(t *testing.T) { + type fields struct { + buf []byte + cursor int64 + maxRead int64 + done bool + rc io.ReadCloser + mu sync.Mutex + } + tests := []struct { + name string + fields fields + wantErr assert.ErrorAssertionFunc + }{ + // TODO: Add test cases. + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := &lazyUnionReader{ + buf: tt.fields.buf, + cursor: tt.fields.cursor, + done: tt.fields.done, + rc: tt.fields.rc, + mu: tt.fields.mu, + } + tt.wantErr(t, c.readAll(), fmt.Sprintf("readAll()")) + }) + } +} + +func Test_newLazyUnionReader(t *testing.T) { + type args struct { + readCloser io.ReadCloser + } + tests := []struct { + name string + args args + want UnionReader + wantErr assert.ErrorAssertionFunc + }{ + // TODO: Add test cases. + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := newLazyUnionReader(tt.args.readCloser) + if !tt.wantErr(t, err, fmt.Sprintf("newLazyUnionReader(%v)", tt.args.readCloser)) { + return + } + assert.Equalf(t, tt.want, got, "newLazyUnionReader(%v)", tt.args.readCloser) + }) + } +} diff --git a/syft/internal/unionreader/union_reader.go b/syft/internal/unionreader/union_reader.go index 86a493ad0..811e71b67 100644 --- a/syft/internal/unionreader/union_reader.go +++ b/syft/internal/unionreader/union_reader.go @@ -1,12 +1,9 @@ package unionreader import ( - "bytes" - "fmt" - "io" - macho "github.com/anchore/go-macholibre" "github.com/anchore/syft/internal/log" + "io" ) // UnionReader is a single interface with all reading functions needed by multi-arch binary catalogers @@ -43,23 +40,24 @@ func GetUnionReader(readerCloser io.ReadCloser) (UnionReader, error) { if ok { return reader, nil } - - b, err := io.ReadAll(readerCloser) - if err != nil { - return nil, fmt.Errorf("unable to read contents from binary: %w", err) - } - - bytesReader := bytes.NewReader(b) - - reader = struct { - io.ReadCloser - io.ReaderAt - io.Seeker - }{ - ReadCloser: io.NopCloser(bytesReader), - ReaderAt: bytesReader, - Seeker: bytesReader, - } - - return reader, nil + return newLazyUnionReader(readerCloser) + // + //b, err := io.ReadAll(readerCloser) + //if err != nil { + // return nil, fmt.Errorf("unable to read contents from binary: %w", err) + //} + // + //bytesReader := bytes.NewReader(b) + // + //reader = struct { + // io.ReadCloser + // io.ReaderAt + // io.Seeker + //}{ + // ReadCloser: io.NopCloser(bytesReader), + // ReaderAt: bytesReader, + // Seeker: bytesReader, + //} + // + //return reader, nil }