From 0fe13888d50ce847d2fd77c14a0f55259ef054f8 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 23 Jan 2024 16:44:57 -0500 Subject: [PATCH] swap format readseekers for readers (#2515) Signed-off-by: Alex Goodman --- syft/format/cyclonedxjson/decoder.go | 18 ++-- syft/format/cyclonedxxml/decoder.go | 16 +-- syft/format/decoders.go | 8 +- syft/format/internal/cyclonedxutil/decoder.go | 12 ++- .../format/internal/stream/seekable_reader.go | 24 +++++ .../internal/stream/seekable_reader_test.go | 101 ++++++++++++++++++ syft/format/spdxjson/decoder.go | 16 +-- syft/format/spdxtagvalue/decoder.go | 13 ++- syft/format/syftjson/decoder.go | 19 ++-- syft/sbom/format.go | 4 +- 10 files changed, 188 insertions(+), 43 deletions(-) create mode 100644 syft/format/internal/stream/seekable_reader.go create mode 100644 syft/format/internal/stream/seekable_reader_test.go diff --git a/syft/format/cyclonedxjson/decoder.go b/syft/format/cyclonedxjson/decoder.go index 5fba5b9a6..0f8665a37 100644 --- a/syft/format/cyclonedxjson/decoder.go +++ b/syft/format/cyclonedxjson/decoder.go @@ -10,6 +10,7 @@ import ( "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/format/common/cyclonedxhelpers" "github.com/anchore/syft/syft/format/internal/cyclonedxutil" + "github.com/anchore/syft/syft/format/internal/stream" "github.com/anchore/syft/syft/sbom" ) @@ -25,10 +26,12 @@ func NewFormatDecoder() sbom.FormatDecoder { } } -func (d decoder) Decode(reader io.ReadSeeker) (*sbom.SBOM, sbom.FormatID, string, error) { - if reader == nil { - return nil, "", "", fmt.Errorf("no SBOM bytes provided") +func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) { + reader, err := stream.SeekableReader(r) + if err != nil { + return nil, "", "", err } + id, version := d.Identify(reader) if id != ID { return nil, "", "", fmt.Errorf("not a cyclonedx json document") @@ -50,10 +53,12 @@ func (d decoder) Decode(reader io.ReadSeeker) (*sbom.SBOM, sbom.FormatID, string return s, id, version, nil } -func (d decoder) Identify(reader io.ReadSeeker) (sbom.FormatID, string) { - if reader == nil { +func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) { + reader, err := stream.SeekableReader(r) + if err != nil { return "", "" } + if _, err := reader.Seek(0, io.SeekStart); err != nil { log.Debugf("unable to seek to start of CycloneDX JSON SBOM: %+v", err) return "", "" @@ -68,8 +73,7 @@ func (d decoder) Identify(reader io.ReadSeeker) (sbom.FormatID, string) { dec := json.NewDecoder(reader) var doc Document - err := dec.Decode(&doc) - if err != nil { + if err = dec.Decode(&doc); err != nil { // maybe not json? maybe not valid? doesn't matter, we won't process it. return "", "" } diff --git a/syft/format/cyclonedxxml/decoder.go b/syft/format/cyclonedxxml/decoder.go index 4e9df4a65..b6f026421 100644 --- a/syft/format/cyclonedxxml/decoder.go +++ b/syft/format/cyclonedxxml/decoder.go @@ -11,6 +11,7 @@ import ( "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/format/common/cyclonedxhelpers" "github.com/anchore/syft/syft/format/internal/cyclonedxutil" + "github.com/anchore/syft/syft/format/internal/stream" "github.com/anchore/syft/syft/sbom" ) @@ -26,9 +27,10 @@ func NewFormatDecoder() sbom.FormatDecoder { } } -func (d decoder) Decode(reader io.ReadSeeker) (*sbom.SBOM, sbom.FormatID, string, error) { - if reader == nil { - return nil, "", "", fmt.Errorf("no SBOM bytes provided") +func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) { + reader, err := stream.SeekableReader(r) + if err != nil { + return nil, "", "", err } id, version := d.Identify(reader) @@ -52,8 +54,9 @@ func (d decoder) Decode(reader io.ReadSeeker) (*sbom.SBOM, sbom.FormatID, string return s, id, version, nil } -func (d decoder) Identify(reader io.ReadSeeker) (sbom.FormatID, string) { - if reader == nil { +func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) { + reader, err := stream.SeekableReader(r) + if err != nil { return "", "" } @@ -69,8 +72,7 @@ func (d decoder) Identify(reader io.ReadSeeker) (sbom.FormatID, string) { dec := xml.NewDecoder(reader) var doc Document - err := dec.Decode(&doc) - if err != nil { + if err = dec.Decode(&doc); err != nil { // maybe not xml? maybe not valid? doesn't matter, we won't process it. return "", "" } diff --git a/syft/format/decoders.go b/syft/format/decoders.go index f7e0d6a2d..93afc3406 100644 --- a/syft/format/decoders.go +++ b/syft/format/decoders.go @@ -42,7 +42,7 @@ func NewDecoderCollection(decoders ...sbom.FormatDecoder) sbom.FormatDecoder { } // Decode takes a set of bytes and attempts to decode it into an SBOM relative to the decoders in the collection. -func (c *DecoderCollection) Decode(reader io.ReadSeeker) (*sbom.SBOM, sbom.FormatID, string, error) { +func (c *DecoderCollection) Decode(reader io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) { if reader == nil { return nil, "", "", fmt.Errorf("no SBOM bytes provided") } @@ -67,7 +67,7 @@ func (c *DecoderCollection) Decode(reader io.ReadSeeker) (*sbom.SBOM, sbom.Forma } // Identify takes a set of bytes and attempts to identify the format of the SBOM relative to the decoders in the collection. -func (c *DecoderCollection) Identify(reader io.ReadSeeker) (sbom.FormatID, string) { +func (c *DecoderCollection) Identify(reader io.Reader) (sbom.FormatID, string) { if reader == nil { return "", "" } @@ -81,11 +81,11 @@ func (c *DecoderCollection) Identify(reader io.ReadSeeker) (sbom.FormatID, strin } // Identify takes a set of bytes and attempts to identify the format of the SBOM. -func Identify(reader io.ReadSeeker) (sbom.FormatID, string) { +func Identify(reader io.Reader) (sbom.FormatID, string) { return staticDecoders.Identify(reader) } // Decode takes a set of bytes and attempts to decode it into an SBOM. -func Decode(reader io.ReadSeeker) (*sbom.SBOM, sbom.FormatID, string, error) { +func Decode(reader io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) { return staticDecoders.Decode(reader) } diff --git a/syft/format/internal/cyclonedxutil/decoder.go b/syft/format/internal/cyclonedxutil/decoder.go index 735e8866d..0eb6c5310 100644 --- a/syft/format/internal/cyclonedxutil/decoder.go +++ b/syft/format/internal/cyclonedxutil/decoder.go @@ -5,6 +5,8 @@ import ( "io" "github.com/CycloneDX/cyclonedx-go" + + "github.com/anchore/syft/syft/format/internal/stream" ) type Decoder struct { @@ -17,14 +19,20 @@ func NewDecoder(format cyclonedx.BOMFileFormat) Decoder { } } -func (d Decoder) Decode(reader io.ReadSeeker) (*cyclonedx.BOM, error) { +func (d Decoder) Decode(r io.Reader) (*cyclonedx.BOM, error) { + reader, err := stream.SeekableReader(r) + if err != nil { + return nil, err + } + doc := &cyclonedx.BOM{ Components: &[]cyclonedx.Component{}, } if _, err := reader.Seek(0, io.SeekStart); err != nil { return nil, fmt.Errorf("unable to seek to start of CycloneDX SBOM: %w", err) } - err := cyclonedx.NewBOMDecoder(reader, d.format).Decode(doc) + + err = cyclonedx.NewBOMDecoder(reader, d.format).Decode(doc) if err != nil { return nil, err } diff --git a/syft/format/internal/stream/seekable_reader.go b/syft/format/internal/stream/seekable_reader.go new file mode 100644 index 000000000..e9841892c --- /dev/null +++ b/syft/format/internal/stream/seekable_reader.go @@ -0,0 +1,24 @@ +package stream + +import ( + "bytes" + "fmt" + "io" +) + +func SeekableReader(reader io.Reader) (io.ReadSeeker, error) { + if reader == nil { + return nil, fmt.Errorf("no bytes provided") + } + + if r, ok := reader.(io.ReadSeeker); ok { + return r, nil + } + + content, err := io.ReadAll(reader) + if err != nil { + return nil, err + } + + return bytes.NewReader(content), nil +} diff --git a/syft/format/internal/stream/seekable_reader_test.go b/syft/format/internal/stream/seekable_reader_test.go new file mode 100644 index 000000000..d1bc3577a --- /dev/null +++ b/syft/format/internal/stream/seekable_reader_test.go @@ -0,0 +1,101 @@ +package stream + +import ( + "bytes" + "io" + "reflect" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSeekableReader(t *testing.T) { + tests := []struct { + name string + input io.Reader + assert func(io.Reader, io.ReadSeeker) + wantErr require.ErrorAssertionFunc + }{ + { + name: "nil reader", + input: nil, + wantErr: require.Error, + }, + { + name: "empty reader", + input: bytes.NewBuffer([]byte{}), // does not implement io.Seeker (but does implement io.Reader) + assert: func(input io.Reader, got io.ReadSeeker) { + impl, ok := got.(*bytes.Reader) // implements bytes.Reader + require.True(t, ok) + _, err := impl.Seek(0, io.SeekStart) + require.NoError(t, err) + content, err := io.ReadAll(impl) + require.NoError(t, err) + require.Equal(t, []byte{}, content) + }, + }, + { + name: "empty read seeker", + input: bytes.NewReader([]byte{}), // implements io.ReadSeeker + assert: func(input io.Reader, got io.ReadSeeker) { + impl, ok := got.(*bytes.Reader) + require.True(t, ok) + _, err := impl.Seek(0, io.SeekStart) + require.NoError(t, err) + content, err := io.ReadAll(impl) + require.NoError(t, err) + require.Equal(t, []byte{}, content) + + // assert this is the same read seeker (reflect tt.input pointer is the same as the impl pointer + inputImpl, ok := input.(*bytes.Reader) + require.True(t, ok) + assert.Equal(t, reflect.ValueOf(inputImpl).Pointer(), reflect.ValueOf(impl).Pointer()) + }, + }, + { + name: "non-empty read seeker", + input: bytes.NewReader([]byte("hello world!")), // implements io.ReadSeeker + assert: func(input io.Reader, got io.ReadSeeker) { + impl, ok := got.(*bytes.Reader) + require.True(t, ok) + _, err := impl.Seek(0, io.SeekStart) + require.NoError(t, err) + content, err := io.ReadAll(impl) + require.NoError(t, err) + require.Equal(t, []byte("hello world!"), content) + + // assert this is the same read seeker (reflect tt.input pointer is the same as the impl pointer + inputImpl, ok := input.(*bytes.Reader) + require.True(t, ok) + assert.Equal(t, reflect.ValueOf(inputImpl).Pointer(), reflect.ValueOf(impl).Pointer()) + }, + }, + { + name: "non-empty reader", + input: bytes.NewBufferString("hello world!"), // does not implement io.Seeker (but does implement io.Reader) + assert: func(input io.Reader, got io.ReadSeeker) { + impl, ok := got.(*bytes.Reader) + require.True(t, ok) + _, err := impl.Seek(0, io.SeekStart) + require.NoError(t, err) + content, err := io.ReadAll(impl) + require.NoError(t, err) + require.Equal(t, []byte("hello world!"), content) + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + got, err := SeekableReader(tt.input) + tt.wantErr(t, err) + if err != nil { + return + } + tt.assert(tt.input, got) + }) + } +} diff --git a/syft/format/spdxjson/decoder.go b/syft/format/spdxjson/decoder.go index f9484dcca..5f38ce7ca 100644 --- a/syft/format/spdxjson/decoder.go +++ b/syft/format/spdxjson/decoder.go @@ -10,6 +10,7 @@ import ( "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/format/common/spdxhelpers" + "github.com/anchore/syft/syft/format/internal/stream" "github.com/anchore/syft/syft/sbom" ) @@ -22,9 +23,10 @@ func NewFormatDecoder() sbom.FormatDecoder { return decoder{} } -func (d decoder) Decode(reader io.ReadSeeker) (*sbom.SBOM, sbom.FormatID, string, error) { - if reader == nil { - return nil, "", "", fmt.Errorf("no SBOM bytes provided") +func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) { + reader, err := stream.SeekableReader(r) + if err != nil { + return nil, "", "", err } // since spdx lib will always return the latest version of the document, we need to identify the version @@ -54,8 +56,9 @@ func (d decoder) Decode(reader io.ReadSeeker) (*sbom.SBOM, sbom.FormatID, string return s, id, version, nil } -func (d decoder) Identify(reader io.ReadSeeker) (sbom.FormatID, string) { - if reader == nil { +func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) { + reader, err := stream.SeekableReader(r) + if err != nil { return "", "" } @@ -75,8 +78,7 @@ func (d decoder) Identify(reader io.ReadSeeker) (sbom.FormatID, string) { dec := json.NewDecoder(reader) var doc Document - err := dec.Decode(&doc) - if err != nil { + if err = dec.Decode(&doc); err != nil { // maybe not json? maybe not valid? doesn't matter, we won't process it. return "", "" } diff --git a/syft/format/spdxtagvalue/decoder.go b/syft/format/spdxtagvalue/decoder.go index 4f70e382c..7b6a6fe8f 100644 --- a/syft/format/spdxtagvalue/decoder.go +++ b/syft/format/spdxtagvalue/decoder.go @@ -10,6 +10,7 @@ import ( "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/format/common/spdxhelpers" + "github.com/anchore/syft/syft/format/internal/stream" "github.com/anchore/syft/syft/sbom" ) @@ -22,9 +23,10 @@ func NewFormatDecoder() sbom.FormatDecoder { return decoder{} } -func (d decoder) Decode(reader io.ReadSeeker) (*sbom.SBOM, sbom.FormatID, string, error) { - if reader == nil { - return nil, "", "", fmt.Errorf("no SBOM bytes provided") +func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) { + reader, err := stream.SeekableReader(r) + if err != nil { + return nil, "", "", err } // since spdx lib will always return the latest version of the document, we need to identify the version @@ -54,8 +56,9 @@ func (d decoder) Decode(reader io.ReadSeeker) (*sbom.SBOM, sbom.FormatID, string return s, id, version, nil } -func (d decoder) Identify(reader io.ReadSeeker) (sbom.FormatID, string) { - if reader == nil { +func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) { + reader, err := stream.SeekableReader(r) + if err != nil { return "", "" } diff --git a/syft/format/syftjson/decoder.go b/syft/format/syftjson/decoder.go index 22cb86455..4bcb53ee4 100644 --- a/syft/format/syftjson/decoder.go +++ b/syft/format/syftjson/decoder.go @@ -10,6 +10,7 @@ import ( "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/format/internal/stream" "github.com/anchore/syft/syft/format/syftjson/model" "github.com/anchore/syft/syft/sbom" ) @@ -22,9 +23,10 @@ func NewFormatDecoder() sbom.FormatDecoder { return decoder{} } -func (d decoder) Decode(reader io.ReadSeeker) (*sbom.SBOM, sbom.FormatID, string, error) { - if reader == nil { - return nil, "", "", fmt.Errorf("no SBOM bytes provided") +func (d decoder) Decode(r io.Reader) (*sbom.SBOM, sbom.FormatID, string, error) { + reader, err := stream.SeekableReader(r) + if err != nil { + return nil, "", "", err } id, version := d.Identify(reader) @@ -39,8 +41,7 @@ func (d decoder) Decode(reader io.ReadSeeker) (*sbom.SBOM, sbom.FormatID, string dec := json.NewDecoder(reader) - err := dec.Decode(&doc) - if err != nil { + if err = dec.Decode(&doc); err != nil { return nil, "", "", fmt.Errorf("unable to decode syft-json document: %w", err) } @@ -51,8 +52,9 @@ func (d decoder) Decode(reader io.ReadSeeker) (*sbom.SBOM, sbom.FormatID, string return toSyftModel(doc), ID, doc.Schema.Version, nil } -func (d decoder) Identify(reader io.ReadSeeker) (sbom.FormatID, string) { - if reader == nil { +func (d decoder) Identify(r io.Reader) (sbom.FormatID, string) { + reader, err := stream.SeekableReader(r) + if err != nil { return "", "" } @@ -68,8 +70,7 @@ func (d decoder) Identify(reader io.ReadSeeker) (sbom.FormatID, string) { dec := json.NewDecoder(reader) var doc Document - err := dec.Decode(&doc) - if err != nil { + if err = dec.Decode(&doc); err != nil { // maybe not json? maybe not valid? doesn't matter, we won't process it. return "", "" } diff --git a/syft/sbom/format.go b/syft/sbom/format.go index 94901ffce..56652e698 100644 --- a/syft/sbom/format.go +++ b/syft/sbom/format.go @@ -23,9 +23,9 @@ type FormatEncoder interface { type FormatDecoder interface { // Decode will return an SBOM from the given reader. If the bytes are not a valid SBOM for the given format // then an error will be returned. - Decode(io.ReadSeeker) (*SBOM, FormatID, string, error) + Decode(io.Reader) (*SBOM, FormatID, string, error) // Identify will return the format ID and version for the given reader. Note: this does not validate the // full SBOM, only pulls the minimal information necessary to identify the format. - Identify(io.ReadSeeker) (FormatID, string) + Identify(io.Reader) (FormatID, string) }