diff --git a/cmd/syft/internal/commands/update.go b/cmd/syft/internal/commands/update.go index 27629ff49..79e0ac538 100644 --- a/cmd/syft/internal/commands/update.go +++ b/cmd/syft/internal/commands/update.go @@ -107,7 +107,7 @@ func fetchLatestApplicationVersion(id clio.Identification) (*hashiVersion.Versio return nil, fmt.Errorf("HTTP %d on fetching latest version: %s", resp.StatusCode, resp.Status) } - versionBytes, err := io.ReadAll(resp.Body) + versionBytes, err := io.ReadAll(io.LimitReader(resp.Body, 500)) if err != nil { return nil, fmt.Errorf("failed to read latest version: %w", err) } diff --git a/examples/create_custom_sbom/alpine_configuration_cataloger.go b/examples/create_custom_sbom/alpine_configuration_cataloger.go index d43ff22e2..459cc3433 100644 --- a/examples/create_custom_sbom/alpine_configuration_cataloger.go +++ b/examples/create_custom_sbom/alpine_configuration_cataloger.go @@ -94,7 +94,7 @@ func getVersion(resolver file.Resolver) (string, []file.Location, error) { } defer internal.CloseAndLogError(reader, locations[0].RealPath) - version, err := io.ReadAll(reader) + version, err := io.ReadAll(reader) //nolint:gocritic // example code if err != nil { return "", nil, fmt.Errorf("unable to read alpine version: %w", err) } @@ -117,7 +117,7 @@ func getAPKKeys(resolver file.Resolver) (map[string]string, []file.Location, err if err != nil { return nil, nil, fmt.Errorf("unable to resolve file contents by location at %s: %w", location.RealPath, err) } - content, err := io.ReadAll(reader) + content, err := io.ReadAll(reader) //nolint:gocritic // example code if err != nil { return nil, nil, fmt.Errorf("unable to read apk key content at %s: %w", location.RealPath, err) } diff --git a/go.mod b/go.mod index 9b14a5523..5e81d09bb 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,6 @@ require ( github.com/Masterminds/sprig/v3 v3.3.0 github.com/OneOfOne/xxhash v1.2.8 github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d - github.com/acobaugh/osrelease v0.1.0 github.com/adrg/xdg v0.5.3 github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9 github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716 @@ -270,6 +269,7 @@ require ( ) require ( + github.com/acobaugh/osrelease v0.1.0 github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be github.com/cespare/xxhash/v2 v2.3.0 github.com/gpustack/gguf-parser-go v0.24.0 diff --git a/internal/jsonschema/main.go b/internal/jsonschema/main.go index f5dc06f2c..55b718597 100644 --- a/internal/jsonschema/main.go +++ b/internal/jsonschema/main.go @@ -215,7 +215,7 @@ func write(schema []byte) { panic(err) } - existingSchemaBytes, err := io.ReadAll(existingFh) + existingSchemaBytes, err := io.ReadAll(existingFh) //nolint:gocritic // offline code generator if err != nil { panic(err) } diff --git a/internal/licenses/find_evidence.go b/internal/licenses/find_evidence.go index 1e41036e5..f5f020ca2 100644 --- a/internal/licenses/find_evidence.go +++ b/internal/licenses/find_evidence.go @@ -10,7 +10,7 @@ func (s *scanner) FindEvidence(_ context.Context, reader io.Reader) (evidence [] return nil, nil, nil } - content, err = io.ReadAll(reader) + content, err = io.ReadAll(reader) //nolint:gocritic // license scanner requires full content if err != nil { return nil, nil, err } diff --git a/internal/testutils/golden_files.go b/internal/testutils/golden_files.go index 1051f6525..6a44048a8 100644 --- a/internal/testutils/golden_files.go +++ b/internal/testutils/golden_files.go @@ -31,7 +31,7 @@ func UpdateGoldenFileContents(t *testing.T, contents []byte) { t.Log(dangerText("!!! UPDATING GOLDEN FILE !!!"), goldenFilePath) - err := os.WriteFile(goldenFilePath, contents, 0600) + err := os.WriteFile(goldenFilePath, contents, 0o600) if err != nil { t.Fatalf("could not update golden file (%s): %+v", goldenFilePath, err) } @@ -50,7 +50,9 @@ func GetGoldenFileContents(t *testing.T) []byte { } defer f.Close() - bytes, err := io.ReadAll(f) + // suppress lint prohibiting ReadAll since golden files are source files in Syft's repository + // and not user provided artifacts. + bytes, err := io.ReadAll(f) //nolint:gocritic // golden files are trusted source files, not user artifacts if err != nil { t.Fatalf("could not read file (%s): %+v", goldenPath, err) } diff --git a/internal/tmpdir/tmpdir.go b/internal/tmpdir/tmpdir.go new file mode 100644 index 000000000..d77d33205 --- /dev/null +++ b/internal/tmpdir/tmpdir.go @@ -0,0 +1,121 @@ +package tmpdir + +import ( + "context" + "fmt" + "os" + "sync" + "sync/atomic" +) + +type ctxKey struct{} + +// Root creates a new root temp directory with the given prefix and returns a context with the +// TempDir attached. Callers should defer Cleanup() on the returned TempDir to ensure all +// temp files are removed. +func Root(ctx context.Context, prefix string) (context.Context, *TempDir) { + td := &TempDir{prefix: prefix} + return context.WithValue(ctx, ctxKey{}, td), td +} + +// FromPath creates a TempDir backed by an existing directory. The caller owns the lifecycle of +// the directory; Cleanup() is a no-op. This is useful for wrapping a directory from t.TempDir() +// where the test framework handles cleanup automatically. +func FromPath(dir string) *TempDir { + td := &TempDir{} + td.root = dir + td.initOnce.Do(func() {}) // mark as initialized + return td +} + +// WithValue returns a new context with the given TempDir attached. Use this to inject an +// existing TempDir into a context (e.g., sharing a TempDir across multiple test contexts). +func WithValue(ctx context.Context, td *TempDir) context.Context { + return context.WithValue(ctx, ctxKey{}, td) +} + +// FromContext returns the TempDir from the context, or nil if none is set. +func FromContext(ctx context.Context) *TempDir { + td, _ := ctx.Value(ctxKey{}).(*TempDir) + return td +} + +// TempDir manages a tree of temporary directories. All files and child directories live under +// a single root path that can be removed in one shot via Cleanup(). After initialization, the +// struct has no mutable state — NewChild and NewFile delegate uniqueness to os.MkdirTemp and +// os.CreateTemp respectively, so no locking is needed on the hot path. +type TempDir struct { + prefix string + root string // set exactly once by initOnce + initOnce sync.Once + initErr error + cleanupOnce sync.Once + cleaned atomic.Bool +} + +func noop() {} + +// path returns the root directory, lazily creating it on the first call. +func (t *TempDir) path() (string, error) { + t.initOnce.Do(func() { + t.root, t.initErr = os.MkdirTemp("", t.prefix+"-") + }) + if t.initErr != nil { + return "", fmt.Errorf("failed to create root temp dir: %w", t.initErr) + } + if t.cleaned.Load() { + return "", fmt.Errorf("temp dir has been cleaned up") + } + return t.root, nil +} + +// NewChild creates a named subdirectory under this TempDir. The returned cleanup function removes +// the subdirectory and all contents; callers should defer it to reclaim space early. The root +// Cleanup acts as a safety net if the per-child cleanup is missed. The cleanup function is safe +// to call multiple times and is safe to call after the root has already been cleaned up. +func (t *TempDir) NewChild(name string) (string, func(), error) { + root, err := t.path() + if err != nil { + return "", noop, err + } + dir, err := os.MkdirTemp(root, name+"-") + if err != nil { + return "", noop, fmt.Errorf("failed to create child temp dir: %w", err) + } + cleanup := func() { + _ = os.RemoveAll(dir) + } + return dir, cleanup, nil +} + +// NewFile creates a new temp file under this TempDir with the given name pattern (as in os.CreateTemp). +// The caller is responsible for closing the file. The returned cleanup function removes the file; +// callers should defer it to reclaim space early. The root Cleanup acts as a safety net if the +// per-file cleanup is missed. The cleanup function is safe to call multiple times. +func (t *TempDir) NewFile(pattern string) (*os.File, func(), error) { + root, err := t.path() + if err != nil { + return nil, noop, err + } + f, err := os.CreateTemp(root, pattern) + if err != nil { + return nil, noop, fmt.Errorf("failed to create temp file: %w", err) + } + cleanup := func() { + _ = os.Remove(f.Name()) + } + return f, cleanup, nil +} + +// Cleanup removes the entire root directory and all contents. Safe to call multiple times. +func (t *TempDir) Cleanup() error { + var err error + t.cleanupOnce.Do(func() { + t.cleaned.Store(true) + if t.root == "" { + return + } + err = os.RemoveAll(t.root) + }) + return err +} diff --git a/internal/tmpdir/tmpdir_test.go b/internal/tmpdir/tmpdir_test.go new file mode 100644 index 000000000..99da49d13 --- /dev/null +++ b/internal/tmpdir/tmpdir_test.go @@ -0,0 +1,295 @@ +package tmpdir + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestRootAndFromContext(t *testing.T) { + ctx := context.Background() + assert.Nil(t, FromContext(ctx)) + + ctx, td := Root(ctx, "test") + require.NotNil(t, FromContext(ctx)) + assert.Same(t, td, FromContext(ctx)) +} + +func TestWithValue(t *testing.T) { + _, td := Root(context.Background(), "test") + defer td.Cleanup() + + // inject the existing TempDir into a fresh context + ctx := WithValue(context.Background(), td) + assert.Same(t, td, FromContext(ctx)) + + // the injected TempDir is fully functional + f, cleanup, err := FromContext(ctx).NewFile("with-value-*.txt") + require.NoError(t, err) + defer cleanup() + require.NoError(t, f.Close()) +} + +func TestNewChild(t *testing.T) { + ctx, td := Root(context.Background(), "test") + defer td.Cleanup() + _ = ctx + + child1, cleanup1, err := td.NewChild("sub") + require.NoError(t, err) + defer cleanup1() + child2, cleanup2, err := td.NewChild("sub") + require.NoError(t, err) + defer cleanup2() + + // children are distinct + assert.NotEqual(t, child1, child2) + + // both exist and are under the same root + info1, err := os.Stat(child1) + require.NoError(t, err) + assert.True(t, info1.IsDir()) + + info2, err := os.Stat(child2) + require.NoError(t, err) + assert.True(t, info2.IsDir()) + + assert.Equal(t, filepath.Dir(child1), filepath.Dir(child2)) +} + +func TestNewFile(t *testing.T) { + _, td := Root(context.Background(), "test") + defer td.Cleanup() + + f, cleanup, err := td.NewFile("hello-*.txt") + require.NoError(t, err) + defer cleanup() + + _, err = f.WriteString("hello") + require.NoError(t, err) + require.NoError(t, f.Close()) + + content, err := os.ReadFile(f.Name()) + require.NoError(t, err) + assert.Equal(t, "hello", string(content)) +} + +func TestCleanup(t *testing.T) { + _, td := Root(context.Background(), "test") + + child, _, err := td.NewChild("sub") + require.NoError(t, err) + + f, _, err := td.NewFile("file-*") + require.NoError(t, err) + fname := f.Name() + f.Close() + + // write a file inside the child dir too + require.NoError(t, os.WriteFile(filepath.Join(child, "inner.txt"), []byte("x"), 0600)) + + // everything exists + _, err = os.Stat(child) + require.NoError(t, err) + _, err = os.Stat(fname) + require.NoError(t, err) + + // cleanup + require.NoError(t, td.Cleanup()) + + // everything is gone + _, err = os.Stat(child) + assert.True(t, os.IsNotExist(err)) + _, err = os.Stat(fname) + assert.True(t, os.IsNotExist(err)) + + // double cleanup is safe + require.NoError(t, td.Cleanup()) +} + +func TestCleanupPreventsNewAllocation(t *testing.T) { + _, td := Root(context.Background(), "test") + require.NoError(t, td.Cleanup()) + + _, _, err := td.NewChild("nope") + assert.Error(t, err) + + _, _, err = td.NewFile("nope-*") + assert.Error(t, err) +} + +func TestEarlyCleanupFile(t *testing.T) { + _, td := Root(context.Background(), "test") + defer td.Cleanup() + + f, cleanup, err := td.NewFile("early-*.txt") + require.NoError(t, err) + + fname := f.Name() + require.NoError(t, f.Close()) + + // file exists before cleanup + _, err = os.Stat(fname) + require.NoError(t, err) + + // early cleanup removes the file + cleanup() + _, err = os.Stat(fname) + assert.True(t, os.IsNotExist(err)) + + // calling cleanup again is safe (idempotent) + cleanup() +} + +func TestEarlyCleanupChild(t *testing.T) { + _, td := Root(context.Background(), "test") + defer td.Cleanup() + + child, cleanup, err := td.NewChild("early") + require.NoError(t, err) + + // child dir exists + _, err = os.Stat(child) + require.NoError(t, err) + + // early cleanup removes it + cleanup() + _, err = os.Stat(child) + assert.True(t, os.IsNotExist(err)) + + // calling cleanup again is safe (idempotent) + cleanup() +} + +func TestEarlyCleanupThenRootCleanup(t *testing.T) { + _, td := Root(context.Background(), "test") + + f, cleanupFile, err := td.NewFile("combo-*.txt") + require.NoError(t, err) + fname := f.Name() + f.Close() + + child, cleanupChild, err := td.NewChild("combo") + require.NoError(t, err) + + // early cleanup both + cleanupFile() + cleanupChild() + + // files are already gone + _, err = os.Stat(fname) + assert.True(t, os.IsNotExist(err)) + _, err = os.Stat(child) + assert.True(t, os.IsNotExist(err)) + + // root cleanup still works (no error on already-removed contents) + require.NoError(t, td.Cleanup()) +} + +func TestConcurrentNewChildAndNewFile(t *testing.T) { + _, td := Root(context.Background(), "test") + defer td.Cleanup() + + const goroutines = 20 + errs := make(chan error, goroutines) + paths := make(chan string, goroutines) + + for i := 0; i < goroutines; i++ { + go func(i int) { + if i%2 == 0 { + child, cleanup, err := td.NewChild("concurrent") + if err != nil { + errs <- err + return + } + defer cleanup() + paths <- child + } else { + f, cleanup, err := td.NewFile("concurrent-*.txt") + if err != nil { + errs <- err + return + } + defer cleanup() + _ = f.Close() + paths <- f.Name() + } + errs <- nil + }(i) + } + + seen := make(map[string]bool) + for i := 0; i < goroutines; i++ { + err := <-errs + require.NoError(t, err) + } + close(paths) + for p := range paths { + assert.False(t, seen[p], "duplicate path: %s", p) + seen[p] = true + } + assert.Len(t, seen, goroutines) +} + +func TestConcurrentNewChildDuringCleanup(t *testing.T) { + _, td := Root(context.Background(), "test") + + // trigger root creation + _, cleanup, err := td.NewChild("init") + require.NoError(t, err) + cleanup() + + // cleanup and concurrent NewChild should not panic + done := make(chan struct{}) + go func() { + _ = td.Cleanup() + close(done) + }() + // try creating children concurrently with cleanup — should get errors, not panics + for i := 0; i < 10; i++ { + _, c, _ := td.NewChild("race") + if c != nil { + c() + } + } + <-done +} + +func TestLazyCreation(t *testing.T) { + _, td := Root(context.Background(), "test") + + // root dir is not created until needed + assert.Equal(t, "", td.root) + + _, _, err := td.NewFile("trigger-*") + require.NoError(t, err) + + assert.NotEmpty(t, td.root) + + require.NoError(t, td.Cleanup()) +} + +func TestFromPath(t *testing.T) { + dir := t.TempDir() + td := FromPath(dir) + + // can create children + child, cleanup, err := td.NewChild("sub") + require.NoError(t, err) + defer cleanup() + assert.DirExists(t, child) + + // can create files + f, cleanupFile, err := td.NewFile("file-*.txt") + require.NoError(t, err) + defer cleanupFile() + require.NoError(t, f.Close()) + assert.FileExists(t, f.Name()) + + // root is the provided dir + assert.Equal(t, dir, filepath.Dir(child)) +} diff --git a/syft/create_sbom.go b/syft/create_sbom.go index 126015f2b..817e698c6 100644 --- a/syft/create_sbom.go +++ b/syft/create_sbom.go @@ -12,8 +12,10 @@ import ( "github.com/anchore/go-sync" "github.com/anchore/syft/internal/bus" "github.com/anchore/syft/internal/licenses" + "github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/sbomsync" "github.com/anchore/syft/internal/task" + "github.com/anchore/syft/internal/tmpdir" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/cataloging" "github.com/anchore/syft/syft/event/monitor" @@ -24,6 +26,8 @@ import ( // CreateSBOM creates a software bill-of-materials from the given source. If the CreateSBOMConfig is nil, then // default options will be used. +// +//nolint:funlen func CreateSBOM(ctx context.Context, src source.Source, cfg *CreateSBOMConfig) (*sbom.SBOM, error) { if cfg == nil { cfg = DefaultCreateSBOMConfig() @@ -65,12 +69,25 @@ func CreateSBOM(ctx context.Context, src source.Source, cfg *CreateSBOMConfig) ( }, } - // setup everything we need in context: license scanner, executors, etc. + // check if the caller already provided a TempDir; if so, don't clean it up (the caller owns it) + callerOwnsTempDir := tmpdir.FromContext(ctx) != nil + + // setup everything we need in context: license scanner, executors, temp storage, etc. ctx, err = setupContext(ctx, cfg) if err != nil { return nil, err } + if !callerOwnsTempDir { + if td := tmpdir.FromContext(ctx); td != nil { + defer func() { + if err := td.Cleanup(); err != nil { + log.Warnf("failed to clean up temp dir: %v", err) + } + }() + } + } + catalogingProgress := monitorCatalogingTask(src.ID(), taskGroups) packageCatalogingProgress := monitorPackageCatalogingTask() @@ -95,6 +112,11 @@ func setupContext(ctx context.Context, cfg *CreateSBOMConfig) (context.Context, // configure parallel executors ctx = setContextExecutors(ctx, cfg) + // configure temp dir factory for catalogers (if not already set) + if tmpdir.FromContext(ctx) == nil { + ctx, _ = tmpdir.Root(ctx, "syft-cataloger") + } + // configure license scanner // skip injecting a license scanner if one already set on context if licenses.IsContextLicenseScannerSet(ctx) { diff --git a/syft/format/internal/stream/seekable_reader.go b/syft/format/internal/stream/seekable_reader.go index a4dd9e060..5df51a881 100644 --- a/syft/format/internal/stream/seekable_reader.go +++ b/syft/format/internal/stream/seekable_reader.go @@ -20,7 +20,7 @@ func SeekableReader(reader io.Reader) (io.ReadSeeker, error) { return getOffsetReadSeeker(r) } - content, err := io.ReadAll(reader) + content, err := io.ReadAll(reader) //nolint:gocritic // buffering non-seekable to seekable reader if err != nil { return nil, err } diff --git a/syft/internal/unionreader/union_reader.go b/syft/internal/unionreader/union_reader.go index 96bf558b2..8c3a09cc7 100644 --- a/syft/internal/unionreader/union_reader.go +++ b/syft/internal/unionreader/union_reader.go @@ -62,7 +62,7 @@ func GetUnionReader(readerCloser io.ReadCloser) (UnionReader, error) { return newReaderAtAdapter(r), nil } - b, err := io.ReadAll(readerCloser) + b, err := io.ReadAll(readerCloser) //nolint:gocritic // buffering non-seekable to ReaderAt if err != nil { return nil, fmt.Errorf("unable to read contents from binary: %w", err) } diff --git a/syft/linux/identify_release.go b/syft/linux/identify_release.go index 0ff7ab280..334d1a890 100644 --- a/syft/linux/identify_release.go +++ b/syft/linux/identify_release.go @@ -92,7 +92,7 @@ func tryParseReleaseInfo(resolver file.Resolver, location file.Location, logger } defer internal.CloseAndLogError(contentReader, location.AccessPath) - content, err := io.ReadAll(contentReader) + content, err := io.ReadAll(io.LimitReader(contentReader, 5*1024*1024)) if err != nil { logger.WithFields("error", err, "path", location.RealPath).Trace("unable to read contents") return nil diff --git a/syft/linux/supplement_release.go b/syft/linux/supplement_release.go index 184da9379..dfa2c9b28 100644 --- a/syft/linux/supplement_release.go +++ b/syft/linux/supplement_release.go @@ -38,7 +38,7 @@ func readDebianVersionFile(resolver file.Resolver, location file.Location) strin return "" } defer internal.CloseAndLogError(rdr, location.RealPath) - contents, err := io.ReadAll(rdr) + contents, err := io.ReadAll(io.LimitReader(rdr, 5*1024*1024)) if err != nil { log.Debugf("error reading %s: %v", location.RealPath, err) return "" diff --git a/syft/pkg/cataloger/ai/parse_gguf_model.go b/syft/pkg/cataloger/ai/parse_gguf_model.go index 3fe78f658..f59fddfd5 100644 --- a/syft/pkg/cataloger/ai/parse_gguf_model.go +++ b/syft/pkg/cataloger/ai/parse_gguf_model.go @@ -5,7 +5,6 @@ import ( "encoding/json" "fmt" "io" - "os" "path/filepath" "sort" "strings" @@ -15,6 +14,7 @@ import ( "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/internal/tmpdir" "github.com/anchore/syft/internal/unknown" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" @@ -24,17 +24,19 @@ import ( // parseGGUFModel parses a GGUF model file and returns the discovered package. // This implementation only reads the header portion of the file, not the entire model. -func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { +func parseGGUFModel(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { defer internal.CloseAndLogError(reader, reader.Path()) - // Create a temporary file for the library to parse - // The library requires a file path, so we create a temp file - tempFile, err := os.CreateTemp("", "syft-gguf-*.gguf") + td := tmpdir.FromContext(ctx) + if td == nil { + return nil, nil, fmt.Errorf("no temp dir factory in context") + } + tempFile, cleanup, err := td.NewFile("syft-gguf-*.gguf") if err != nil { return nil, nil, fmt.Errorf("failed to create temp file: %w", err) } + defer cleanup() tempPath := tempFile.Name() - defer os.Remove(tempPath) // Copy and validate the GGUF file header using LimitedReader to prevent OOM // We use LimitedReader to cap reads at maxHeaderSize (50MB) diff --git a/syft/pkg/cataloger/alpine/parse_apk_db.go b/syft/pkg/cataloger/alpine/parse_apk_db.go index 548b9df11..094d076f6 100644 --- a/syft/pkg/cataloger/alpine/parse_apk_db.go +++ b/syft/pkg/cataloger/alpine/parse_apk_db.go @@ -171,7 +171,7 @@ func findReleases(resolver file.Resolver, dbPath string) []linux.Release { func parseReleasesFromAPKRepository(reader file.LocationReadCloser) []linux.Release { var releases []linux.Release - reposB, err := io.ReadAll(reader) + reposB, err := io.ReadAll(reader) //nolint:gocritic // regex matching requires full buffer if err != nil { log.Tracef("unable to read APK repositories file %q: %+v", reader.RealPath, err) return nil diff --git a/syft/pkg/cataloger/erlang/erlang_parser.go b/syft/pkg/cataloger/erlang/erlang_parser.go index 60149b631..83aeb8f01 100644 --- a/syft/pkg/cataloger/erlang/erlang_parser.go +++ b/syft/pkg/cataloger/erlang/erlang_parser.go @@ -47,7 +47,7 @@ func node(value interface{}) erlangNode { // parseErlang basic parser for erlang, used by rebar.lock func parseErlang(reader io.Reader) (erlangNode, error) { - data, err := io.ReadAll(reader) + data, err := io.ReadAll(reader) //nolint:gocritic // custom parser requires []byte if err != nil { return node(nil), err } diff --git a/syft/pkg/cataloger/githubactions/parse_composite_action.go b/syft/pkg/cataloger/githubactions/parse_composite_action.go index 082d70fb7..96378815e 100644 --- a/syft/pkg/cataloger/githubactions/parse_composite_action.go +++ b/syft/pkg/cataloger/githubactions/parse_composite_action.go @@ -3,7 +3,6 @@ package githubactions import ( "context" "fmt" - "io" "go.yaml.in/yaml/v3" @@ -25,13 +24,9 @@ type compositeActionRunsDef struct { } func parseCompositeActionForActionUsage(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { - contents, errs := io.ReadAll(reader) - if errs != nil { - return nil, nil, fmt.Errorf("unable to read yaml composite action file: %w", errs) - } - var ca compositeActionDef - if errs = yaml.Unmarshal(contents, &ca); errs != nil { + var errs error + if errs = yaml.NewDecoder(reader).Decode(&ca); errs != nil { return nil, nil, fmt.Errorf("unable to parse yaml composite action file: %w", errs) } diff --git a/syft/pkg/cataloger/githubactions/parse_workflow.go b/syft/pkg/cataloger/githubactions/parse_workflow.go index c4f27c549..f784bce63 100644 --- a/syft/pkg/cataloger/githubactions/parse_workflow.go +++ b/syft/pkg/cataloger/githubactions/parse_workflow.go @@ -3,7 +3,6 @@ package githubactions import ( "context" "fmt" - "io" "regexp" "go.yaml.in/yaml/v3" @@ -41,14 +40,10 @@ type stepDef struct { } func parseWorkflowForWorkflowUsage(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { - contents, errs := io.ReadAll(reader) - if errs != nil { - return nil, nil, fmt.Errorf("unable to read yaml workflow file: %w", errs) - } - // parse the yaml file into a generic node to preserve comments var node yaml.Node - if errs = yaml.Unmarshal(contents, &node); errs != nil { + var errs error + if errs = yaml.NewDecoder(reader).Decode(&node); errs != nil { return nil, nil, fmt.Errorf("unable to parse yaml workflow file: %w", errs) } @@ -79,14 +74,10 @@ func parseWorkflowForWorkflowUsage(_ context.Context, _ file.Resolver, _ *generi } func parseWorkflowForActionUsage(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { - contents, errs := io.ReadAll(reader) - if errs != nil { - return nil, nil, fmt.Errorf("unable to read yaml workflow file: %w", errs) - } - // parse the yaml file into a generic node to preserve comments var node yaml.Node - if errs = yaml.Unmarshal(contents, &node); errs != nil { + var errs error + if errs = yaml.NewDecoder(reader).Decode(&node); errs != nil { return nil, nil, fmt.Errorf("unable to parse yaml workflow file: %w", errs) } diff --git a/syft/pkg/cataloger/golang/licenses.go b/syft/pkg/cataloger/golang/licenses.go index 80c4cfe10..cb5a67c0d 100644 --- a/syft/pkg/cataloger/golang/licenses.go +++ b/syft/pkg/cataloger/golang/licenses.go @@ -23,6 +23,7 @@ import ( "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/cache" "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/internal/tmpdir" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg/cataloger/internal/licenses" @@ -163,7 +164,10 @@ func (c *goLicenseResolver) getLicensesFromRemote(ctx context.Context, moduleNam return c.licenseCache.Resolve(fmt.Sprintf("%s/%s", moduleName, moduleVersion), func() ([]pkg.License, error) { proxies := remotesForModule(c.opts.Proxies, c.opts.NoProxy, moduleName) - urlPrefix, fsys, err := getModule(proxies, moduleName, moduleVersion) + urlPrefix, fsys, cleanup, err := getModule(ctx, proxies, moduleName, moduleVersion) + if cleanup != nil { + defer cleanup() + } if err != nil { return nil, err } @@ -258,7 +262,7 @@ func processCaps(s string) string { }) } -func getModule(proxies []string, moduleName, moduleVersion string) (urlPrefix string, fsys fs.FS, err error) { +func getModule(ctx context.Context, proxies []string, moduleName, moduleVersion string) (urlPrefix string, fsys fs.FS, cleanup func(), err error) { for _, proxy := range proxies { u, _ := url.Parse(proxy) if proxy == "direct" { @@ -267,7 +271,7 @@ func getModule(proxies []string, moduleName, moduleVersion string) (urlPrefix st } switch u.Scheme { case "https", "http": - urlPrefix, fsys, err = getModuleProxy(proxy, moduleName, moduleVersion) + urlPrefix, fsys, cleanup, err = getModuleProxy(ctx, proxy, moduleName, moduleVersion) case "file": p := filepath.Join(u.Path, moduleName, "@v", moduleVersion) urlPrefix = path.Join("file://", p) + "/" @@ -281,42 +285,78 @@ func getModule(proxies []string, moduleName, moduleVersion string) (urlPrefix st return } -func getModuleProxy(proxy string, moduleName string, moduleVersion string) (moduleURL string, out fs.FS, _ error) { +// getModuleProxy downloads a Go module zip from the given proxy and returns a filesystem view of its contents. +// The returned cleanup function closes the underlying temp file and removes it; callers must not use the +// returned fs.FS after calling cleanup. +func getModuleProxy(ctx context.Context, proxy string, moduleName string, moduleVersion string) (moduleURL string, out fs.FS, cleanup func(), _ error) { u := fmt.Sprintf("%s/%s/@v/%s.zip", proxy, moduleName, moduleVersion) // get the module zip log.WithFields("url", u).Info("downloading go module from proxy") resp, err := http.Get(u) //nolint:gosec if err != nil { - return "", nil, err + return "", nil, nil, err } defer func() { _ = resp.Body.Close() }() if resp.StatusCode != http.StatusOK { + // close the first response before retrying with a lowercased module name + _ = resp.Body.Close() + u = fmt.Sprintf("%s/%s/@v/%s.zip", proxy, strings.ToLower(moduleName), moduleVersion) // try lowercasing it; some packages have mixed casing that really messes up the proxy resp, err = http.Get(u) //nolint:gosec if err != nil { - return "", nil, err + return "", nil, nil, err } defer func() { _ = resp.Body.Close() }() if resp.StatusCode != http.StatusOK { - return "", nil, fmt.Errorf("failed to get module zip: %s", resp.Status) + return "", nil, nil, fmt.Errorf("failed to get module zip: %s", resp.Status) } } - // read the zip - b, err := io.ReadAll(resp.Body) + // stream the zip to a temp file to avoid unbounded memory usage + td := tmpdir.FromContext(ctx) + if td == nil { + return "", nil, nil, fmt.Errorf("no temp dir factory in context") + } + tmpFile, cleanupFile, err := td.NewFile("gomodule-*.zip") //nolint:gocritic // cleanup is returned to caller, not deferred here if err != nil { - return "", nil, err + return "", nil, nil, fmt.Errorf("failed to create temp file for module zip: %w", err) } - out, err = zip.NewReader(bytes.NewReader(b), resp.ContentLength) + cleanup = func() { + _ = tmpFile.Close() + cleanupFile() + } + + // cap downloads at 500MB to prevent disk exhaustion from malicious proxies + const maxModuleZipSize = 500 * 1024 * 1024 + size, err := io.Copy(tmpFile, io.LimitReader(resp.Body, maxModuleZipSize)) + if err != nil { + cleanup() + return "", nil, nil, fmt.Errorf("failed to download module zip: %w", err) + } + if size >= maxModuleZipSize { + cleanup() + return "", nil, nil, fmt.Errorf("module zip exceeds %d byte size limit", maxModuleZipSize) + } + + if _, err := tmpFile.Seek(0, io.SeekStart); err != nil { + cleanup() + return "", nil, nil, fmt.Errorf("failed to seek module zip: %w", err) + } + + out, err = zip.NewReader(tmpFile, size) + if err != nil { + cleanup() + return "", nil, nil, fmt.Errorf("failed to read module zip: %w", err) + } versionPath := findVersionPath(out, ".") out = getSubFS(out, versionPath) - return u + "#" + versionPath + "/", out, err + return u + "#" + versionPath + "/", out, cleanup, err } func findVersionPath(f fs.FS, dir string) string { diff --git a/syft/pkg/cataloger/golang/licenses_test.go b/syft/pkg/cataloger/golang/licenses_test.go index 27d079ce6..0674b0449 100644 --- a/syft/pkg/cataloger/golang/licenses_test.go +++ b/syft/pkg/cataloger/golang/licenses_test.go @@ -24,7 +24,7 @@ import ( ) func Test_LicenseSearch(t *testing.T) { - ctx := pkgtest.Context() + ctx := pkgtest.Context(t) loc1 := file.NewLocation("github.com/someorg/somename@v0.3.2/LICENSE") loc2 := file.NewLocation("github.com/!cap!o!r!g/!cap!project@v4.111.5/LICENSE.txt") @@ -321,7 +321,7 @@ func Test_noLocalGoModDir(t *testing.T) { validTmp := t.TempDir() require.NoError(t, os.MkdirAll(filepath.Join(validTmp, "mod@ver"), 0700|os.ModeDir)) - ctx := pkgtest.Context() + ctx := pkgtest.Context(t) tests := []struct { name string dir string diff --git a/syft/pkg/cataloger/golang/parse_go_mod.go b/syft/pkg/cataloger/golang/parse_go_mod.go index c3e6bcba0..d593ed299 100644 --- a/syft/pkg/cataloger/golang/parse_go_mod.go +++ b/syft/pkg/cataloger/golang/parse_go_mod.go @@ -324,7 +324,7 @@ func buildModuleRelationships( } func (c *goModCataloger) parseModFileContents(reader file.LocationReadCloser) (*modfile.File, error) { - contents, err := io.ReadAll(reader) + contents, err := io.ReadAll(reader) //nolint:gocritic // modfile.Parse requires []byte if err != nil { return nil, fmt.Errorf("failed to read go module: %w", err) } diff --git a/syft/pkg/cataloger/haskell/parse_stack_lock.go b/syft/pkg/cataloger/haskell/parse_stack_lock.go index bc1de89e3..90c25d02e 100644 --- a/syft/pkg/cataloger/haskell/parse_stack_lock.go +++ b/syft/pkg/cataloger/haskell/parse_stack_lock.go @@ -3,7 +3,6 @@ package haskell import ( "context" "fmt" - "io" "strings" "go.yaml.in/yaml/v3" @@ -42,16 +41,11 @@ type completedSnapshot struct { // parseStackLock is a parser function for stack.yaml.lock contents, returning all packages discovered. func parseStackLock(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { - bytes, err := io.ReadAll(reader) - if err != nil { - return nil, nil, fmt.Errorf("failed to load stack.yaml.lock file: %w", err) - } - var lockFile stackLock - if err := yaml.Unmarshal(bytes, &lockFile); err != nil { + if err := yaml.NewDecoder(reader).Decode(&lockFile); err != nil { log.WithFields("error", err, "path", reader.RealPath).Trace("failed to parse stack.yaml.lock") - return nil, nil, fmt.Errorf("failed to parse stack.yaml.lock file") + return nil, nil, fmt.Errorf("failed to parse stack.yaml.lock file: %w", err) } var ( diff --git a/syft/pkg/cataloger/haskell/parse_stack_yaml.go b/syft/pkg/cataloger/haskell/parse_stack_yaml.go index 32aa129b5..cd0fc4167 100644 --- a/syft/pkg/cataloger/haskell/parse_stack_yaml.go +++ b/syft/pkg/cataloger/haskell/parse_stack_yaml.go @@ -3,7 +3,6 @@ package haskell import ( "context" "fmt" - "io" "go.yaml.in/yaml/v3" @@ -23,16 +22,11 @@ type stackYaml struct { // parseStackYaml is a parser function for stack.yaml contents, returning all packages discovered. func parseStackYaml(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { - bytes, err := io.ReadAll(reader) - if err != nil { - return nil, nil, fmt.Errorf("failed to load stack.yaml file: %w", err) - } - var stackFile stackYaml - if err := yaml.Unmarshal(bytes, &stackFile); err != nil { + if err := yaml.NewDecoder(reader).Decode(&stackFile); err != nil { log.WithFields("error", err, "path", reader.RealPath).Trace("failed to parse stack.yaml") - return nil, nil, fmt.Errorf("failed to parse stack.yaml file") + return nil, nil, fmt.Errorf("failed to parse stack.yaml file: %w", err) } var pkgs []pkg.Package diff --git a/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/nvd_api_client.go b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/nvd_api_client.go index 904639f6b..0c8084efe 100644 --- a/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/nvd_api_client.go +++ b/syft/pkg/cataloger/internal/cpegenerate/dictionary/index-generator/nvd_api_client.go @@ -270,7 +270,7 @@ func (c *NVDAPIClient) fetchPage(ctx context.Context, startIndex int, start, end // handleRateLimit handles HTTP 429 responses by parsing Retry-After and waiting func (c *NVDAPIClient) handleRateLimit(ctx context.Context, httpResp *http.Response, attempt int) error { - body, _ := io.ReadAll(httpResp.Body) + body, _ := io.ReadAll(httpResp.Body) //nolint:gocritic // offline code generator httpResp.Body.Close() // parse Retry-After header @@ -298,7 +298,7 @@ func checkHTTPStatus(httpResp *http.Response) error { if httpResp.StatusCode == http.StatusOK { return nil } - body, _ := io.ReadAll(httpResp.Body) + body, _ := io.ReadAll(httpResp.Body) //nolint:gocritic // offline code generator httpResp.Body.Close() return fmt.Errorf("NVD API error (status %d): %s", httpResp.StatusCode, string(body)) } diff --git a/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go b/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go index 137a315dc..c66f6a1d2 100644 --- a/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go +++ b/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go @@ -24,6 +24,7 @@ import ( "github.com/anchore/syft/internal/cmptest" "github.com/anchore/syft/internal/licenses" "github.com/anchore/syft/internal/relationship" + "github.com/anchore/syft/internal/tmpdir" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/internal/fileresolver" @@ -62,7 +63,7 @@ type CatalogTester struct { skipTestObservations bool } -func Context() context.Context { +func initLicenseScanner() { once.Do(func() { // most of the time in testing is initializing the scanner. Let's do that just once sc := &licenses.ScannerConfig{Scanner: licensecheck.Scan, CoverageThreshold: 75} @@ -72,13 +73,20 @@ func Context() context.Context { } licenseScanner = &scanner }) +} - return licenses.SetContextLicenseScanner(context.Background(), *licenseScanner) +// Context returns a context with a shared license scanner and a TempDir backed by t.TempDir(), +// so cleanup is handled automatically when the test finishes. +func Context(t *testing.T) context.Context { + t.Helper() + initLicenseScanner() + td := tmpdir.FromPath(t.TempDir()) + ctx := tmpdir.WithValue(context.Background(), td) + return licenses.SetContextLicenseScanner(ctx, *licenseScanner) } func NewCatalogTester() *CatalogTester { return &CatalogTester{ - context: Context(), locationComparer: cmptest.DefaultLocationComparer, licenseComparer: cmptest.DefaultLicenseComparer, packageStringer: stringPackage, @@ -272,9 +280,18 @@ func (p *CatalogTester) WithoutTestObserver() *CatalogTester { return p } +func (p *CatalogTester) ensureContext(t *testing.T) context.Context { + t.Helper() + if p.context != nil { + return p.context + } + return Context(t) +} + func (p *CatalogTester) TestParser(t *testing.T, parser generic.Parser) { t.Helper() - pkgs, relationships, err := parser(p.context, p.resolver, p.env, p.reader) + ctx := p.ensureContext(t) + pkgs, relationships, err := parser(ctx, p.resolver, p.env, p.reader) // only test for errors if explicitly requested if p.wantErr != nil { @@ -289,10 +306,11 @@ func (p *CatalogTester) TestParser(t *testing.T, parser generic.Parser) { func (p *CatalogTester) TestCataloger(t *testing.T, cataloger pkg.Cataloger) { t.Helper() + ctx := p.ensureContext(t) resolver := NewObservingResolver(p.resolver) - pkgs, relationships, err := cataloger.Catalog(p.context, resolver) + pkgs, relationships, err := cataloger.Catalog(ctx, resolver) // this is a minimum set, the resolver may return more that just this list for _, path := range p.expectedPathResponses { diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index 7cefa9f47..0195247a9 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -18,6 +18,7 @@ import ( "github.com/anchore/syft/internal" intFile "github.com/anchore/syft/internal/file" "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/internal/tmpdir" "github.com/anchore/syft/internal/unknown" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" @@ -104,7 +105,11 @@ func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, d virtualElements := strings.Split(reader.Path(), ":") currentFilepath := virtualElements[len(virtualElements)-1] - contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(currentFilepath, reader) + td := tmpdir.FromContext(ctx) + if td == nil { + return nil, func() {}, fmt.Errorf("no temp dir factory in context") + } + contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(td, currentFilepath, reader) if err != nil { return nil, cleanupFn, fmt.Errorf("unable to process java archive: %w", err) } diff --git a/syft/pkg/cataloger/java/archive_parser_test.go b/syft/pkg/cataloger/java/archive_parser_test.go index d30de4625..6afa74528 100644 --- a/syft/pkg/cataloger/java/archive_parser_test.go +++ b/syft/pkg/cataloger/java/archive_parser_test.go @@ -2,7 +2,6 @@ package java import ( "bufio" - "context" "fmt" "io" "os" @@ -30,7 +29,7 @@ import ( func TestSearchMavenForLicenses(t *testing.T) { url := maventest.MockRepo(t, "internal/maven/testdata/maven-repo") - ctx := pkgtest.Context() + ctx := pkgtest.Context(t) tests := []struct { name string @@ -72,7 +71,7 @@ func TestSearchMavenForLicenses(t *testing.T) { require.NoError(t, err) // setup parser - ap, cleanupFn, err := newJavaArchiveParser(context.Background(), + ap, cleanupFn, err := newJavaArchiveParser(pkgtest.Context(t), file.LocationReadCloser{ Location: file.NewLocation(fixture.Name()), ReadCloser: fixture, @@ -81,17 +80,17 @@ func TestSearchMavenForLicenses(t *testing.T) { require.NoError(t, err) // assert licenses are discovered from upstream - _, _, _, parsedPom := ap.discoverMainPackageFromPomInfo(context.Background()) + _, _, _, parsedPom := ap.discoverMainPackageFromPomInfo(pkgtest.Context(t)) require.NotNil(t, parsedPom, "expected to find pom information in the fixture") require.NotNil(t, parsedPom.project, "expected parsedPom to have a project") - resolvedLicenses, _ := ap.maven.ResolveLicenses(context.Background(), parsedPom.project) + resolvedLicenses, _ := ap.maven.ResolveLicenses(pkgtest.Context(t), parsedPom.project) assert.Equal(t, tc.expectedLicenses, toPkgLicenses(ctx, nil, resolvedLicenses)) }) } } func TestParseJar(t *testing.T) { - ctx := pkgtest.Context() + ctx := pkgtest.Context(t) tests := []struct { name string fixture string @@ -372,7 +371,7 @@ func TestParseJar(t *testing.T) { UseNetwork: false, UseMavenLocalRepository: false, } - parser, cleanupFn, err := newJavaArchiveParser(context.Background(), + parser, cleanupFn, err := newJavaArchiveParser(pkgtest.Context(t), file.LocationReadCloser{ Location: file.NewLocation(fixture.Name()), ReadCloser: fixture, @@ -662,7 +661,7 @@ func TestParseNestedJar(t *testing.T) { require.NoError(t, err) gap := newGenericArchiveParserAdapter(ArchiveCatalogerConfig{}) - actual, _, err := gap.processJavaArchive(context.Background(), file.LocationReadCloser{ + actual, _, err := gap.processJavaArchive(pkgtest.Context(t), file.LocationReadCloser{ Location: file.NewLocation(fixture.Name()), ReadCloser: fixture, }, nil) @@ -1080,7 +1079,7 @@ func Test_newPackageFromMavenData(t *testing.T) { test.expectedParent.Locations = locations r := maven.NewResolver(nil, maven.DefaultConfig()) - actualPackage := newPackageFromMavenData(context.Background(), r, test.props, test.project, test.parent, file.NewLocation(virtualPath)) + actualPackage := newPackageFromMavenData(pkgtest.Context(t), r, test.props, test.project, test.parent, file.NewLocation(virtualPath)) if test.expectedPackage == nil { require.Nil(t, actualPackage) } else { @@ -1120,7 +1119,7 @@ func Test_artifactIDMatchesFilename(t *testing.T) { } func Test_parseJavaArchive_regressions(t *testing.T) { - ctx := context.TODO() + ctx := pkgtest.Context(t) apiAll := pkg.Package{ Name: "api-all", Version: "2.0.0", @@ -1499,7 +1498,7 @@ func Test_deterministicMatchingPomProperties(t *testing.T) { fixture, err := os.Open(fixturePath) require.NoError(t, err) - parser, cleanupFn, err := newJavaArchiveParser(context.Background(), + parser, cleanupFn, err := newJavaArchiveParser(pkgtest.Context(t), file.LocationReadCloser{ Location: file.NewLocation(fixture.Name()), ReadCloser: fixture, @@ -1507,7 +1506,7 @@ func Test_deterministicMatchingPomProperties(t *testing.T) { defer cleanupFn() require.NoError(t, err) - groupID, artifactID, version, _ := parser.discoverMainPackageFromPomInfo(context.TODO()) + groupID, artifactID, version, _ := parser.discoverMainPackageFromPomInfo(pkgtest.Context(t)) require.Equal(t, test.expected, maven.NewID(groupID, artifactID, version)) }() } @@ -1634,9 +1633,9 @@ func Test_jarPomPropertyResolutionDoesNotPanic(t *testing.T) { fixture, err := os.Open(jarName) require.NoError(t, err) - ctx := context.TODO() + ctx := pkgtest.Context(t) // setup parser - ap, cleanupFn, err := newJavaArchiveParser(context.Background(), + ap, cleanupFn, err := newJavaArchiveParser(pkgtest.Context(t), file.LocationReadCloser{ Location: file.NewLocation(fixture.Name()), ReadCloser: fixture, diff --git a/syft/pkg/cataloger/java/internal/maven/pom_parser.go b/syft/pkg/cataloger/java/internal/maven/pom_parser.go index ed0637e1c..155d0199c 100644 --- a/syft/pkg/cataloger/java/internal/maven/pom_parser.go +++ b/syft/pkg/cataloger/java/internal/maven/pom_parser.go @@ -40,7 +40,7 @@ func ParsePomXML(content io.Reader) (project *Project, err error) { } func getUtf8Reader(content io.Reader) (io.Reader, error) { - pomContents, err := io.ReadAll(content) + pomContents, err := io.ReadAll(content) //nolint:gocritic // charset detection requires full buffer if err != nil { return nil, err } diff --git a/syft/pkg/cataloger/java/internal/maven/resolver.go b/syft/pkg/cataloger/java/internal/maven/resolver.go index 907fef314..56a92db20 100644 --- a/syft/pkg/cataloger/java/internal/maven/resolver.go +++ b/syft/pkg/cataloger/java/internal/maven/resolver.go @@ -463,7 +463,7 @@ func (r *Resolver) cacheResolveReader(key string, resolve func() (io.ReadCloser, defer internal.CloseAndLogError(contentReader, key) // store the contents to return a new reader with the same content - contents, err := io.ReadAll(contentReader) + contents, err := io.ReadAll(contentReader) //nolint:gocritic // caching requires full buffer if err != nil { return nil, err } diff --git a/syft/pkg/cataloger/java/parse_pom_xml_test.go b/syft/pkg/cataloger/java/parse_pom_xml_test.go index 8d7c81b26..8a622b086 100644 --- a/syft/pkg/cataloger/java/parse_pom_xml_test.go +++ b/syft/pkg/cataloger/java/parse_pom_xml_test.go @@ -1,7 +1,6 @@ package java import ( - "context" "os" "testing" @@ -194,7 +193,7 @@ func Test_parseCommonsTextPomXMLProject(t *testing.T) { func Test_parsePomXMLProject(t *testing.T) { // TODO: ideally we would have the path to the contained pom.xml, not the jar jarLocation := file.NewLocation("path/to/archive.jar") - ctx := context.TODO() + ctx := pkgtest.Context(t) tests := []struct { name string project *pkg.JavaPomProject @@ -265,11 +264,11 @@ func Test_parsePomXMLProject(t *testing.T) { pom, err := maven.ParsePomXML(fixture) require.NoError(t, err) - actual := newPomProject(context.Background(), r, fixture.Name(), pom) + actual := newPomProject(pkgtest.Context(t), r, fixture.Name(), pom) assert.NoError(t, err) assert.Equal(t, test.project, actual) - licenses, err := r.ResolveLicenses(context.Background(), pom) + licenses, err := r.ResolveLicenses(pkgtest.Context(t), pom) //assert.NoError(t, err) assert.Equal(t, test.licenses, toPkgLicenses(ctx, &jarLocation, licenses)) }) @@ -331,7 +330,7 @@ func Test_pomParent(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { r := maven.NewResolver(nil, maven.DefaultConfig()) - assert.Equal(t, test.expected, pomParent(context.Background(), r, &maven.Project{Parent: test.input})) + assert.Equal(t, test.expected, pomParent(pkgtest.Context(t), r, &maven.Project{Parent: test.input})) }) } } @@ -433,7 +432,7 @@ func Test_resolveLicenses(t *testing.T) { fr, err := ds.FileResolver(source.AllLayersScope) require.NoError(t, err) - ctx := context.TODO() + ctx := pkgtest.Context(t) pkgs, _, err := cat.Catalog(ctx, fr) require.NoError(t, err) diff --git a/syft/pkg/cataloger/java/save_archive_to_tmp.go b/syft/pkg/cataloger/java/save_archive_to_tmp.go index c602170a7..c77188239 100644 --- a/syft/pkg/cataloger/java/save_archive_to_tmp.go +++ b/syft/pkg/cataloger/java/save_archive_to_tmp.go @@ -6,23 +6,16 @@ import ( "os" "path/filepath" - "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/internal/tmpdir" ) -func saveArchiveToTmp(archiveVirtualPath string, reader io.Reader) (string, string, func(), error) { +func saveArchiveToTmp(td *tmpdir.TempDir, archiveVirtualPath string, reader io.Reader) (string, string, func(), error) { name := filepath.Base(archiveVirtualPath) - tempDir, err := os.MkdirTemp("", "syft-archive-contents-") + tempDir, cleanupFn, err := td.NewChild("archive-contents") //nolint:gocritic // cleanup is returned to caller, not deferred here if err != nil { return "", "", func() {}, fmt.Errorf("unable to create tempdir for archive processing: %w", err) } - cleanupFn := func() { - err = os.RemoveAll(tempDir) - if err != nil { - log.Errorf("unable to cleanup archive tempdir: %+v", err) - } - } - archivePath := filepath.Join(tempDir, "archive-"+name) contentDir := filepath.Join(tempDir, "contents") @@ -37,6 +30,9 @@ func saveArchiveToTmp(archiveVirtualPath string, reader io.Reader) (string, stri } defer archiveFile.Close() + // note: no size limit here — the reader comes from a file already enumerated by the + // resolver, not from an untrusted network source. The file size is bounded by the + // source image/directory being scanned. _, err = io.Copy(archiveFile, reader) if err != nil { return contentDir, archivePath, cleanupFn, fmt.Errorf("unable to copy archive: %w", err) diff --git a/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go b/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go index 4c4edc595..8dcd4d454 100644 --- a/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go +++ b/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go @@ -5,6 +5,7 @@ import ( "fmt" intFile "github.com/anchore/syft/internal/file" + "github.com/anchore/syft/internal/tmpdir" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/pkg" @@ -58,7 +59,11 @@ func newGenericTarWrappedJavaArchiveParser(cfg ArchiveCatalogerConfig) genericTa } func (gtp genericTarWrappedJavaArchiveParser) parseTarWrappedJavaArchive(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { - contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(reader.Path(), reader) + td := tmpdir.FromContext(ctx) + if td == nil { + return nil, nil, fmt.Errorf("no temp dir factory in context") + } + contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(td, reader.Path(), reader) // note: even on error, we should always run cleanup functions defer cleanupFn() if err != nil { diff --git a/syft/pkg/cataloger/java/tar_wrapped_archive_parser_test.go b/syft/pkg/cataloger/java/tar_wrapped_archive_parser_test.go index 83f0fa3d6..8bc3a9b53 100644 --- a/syft/pkg/cataloger/java/tar_wrapped_archive_parser_test.go +++ b/syft/pkg/cataloger/java/tar_wrapped_archive_parser_test.go @@ -1,7 +1,6 @@ package java import ( - "context" "os" "path" "testing" @@ -50,7 +49,7 @@ func Test_parseTarWrappedJavaArchive(t *testing.T) { } gtp := newGenericTarWrappedJavaArchiveParser(ArchiveCatalogerConfig{}) - actualPkgs, _, err := gtp.parseTarWrappedJavaArchive(context.Background(), nil, nil, file.LocationReadCloser{ + actualPkgs, _, err := gtp.parseTarWrappedJavaArchive(pkgtest.Context(t), nil, nil, file.LocationReadCloser{ Location: file.NewLocation(test.fixture), ReadCloser: fixture, }) diff --git a/syft/pkg/cataloger/java/zip_wrapped_archive_parser.go b/syft/pkg/cataloger/java/zip_wrapped_archive_parser.go index e515f4f90..e5a7b6aec 100644 --- a/syft/pkg/cataloger/java/zip_wrapped_archive_parser.go +++ b/syft/pkg/cataloger/java/zip_wrapped_archive_parser.go @@ -5,6 +5,7 @@ import ( "fmt" intFile "github.com/anchore/syft/internal/file" + "github.com/anchore/syft/internal/tmpdir" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/pkg" @@ -30,7 +31,11 @@ func newGenericZipWrappedJavaArchiveParser(cfg ArchiveCatalogerConfig) genericZi } func (gzp genericZipWrappedJavaArchiveParser) parseZipWrappedJavaArchive(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { - contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(reader.Path(), reader) + td := tmpdir.FromContext(ctx) + if td == nil { + return nil, nil, fmt.Errorf("no temp dir factory in context") + } + contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(td, reader.Path(), reader) // note: even on error, we should always run cleanup functions defer cleanupFn() if err != nil { diff --git a/syft/pkg/cataloger/java/zip_wrapped_archive_parser_test.go b/syft/pkg/cataloger/java/zip_wrapped_archive_parser_test.go index 4e6ae5ba3..1366fd917 100644 --- a/syft/pkg/cataloger/java/zip_wrapped_archive_parser_test.go +++ b/syft/pkg/cataloger/java/zip_wrapped_archive_parser_test.go @@ -1,7 +1,6 @@ package java import ( - "context" "os" "path" "testing" @@ -10,6 +9,7 @@ import ( "github.com/stretchr/testify/require" "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" ) func Test_parseZipWrappedJavaArchive(t *testing.T) { @@ -36,7 +36,7 @@ func Test_parseZipWrappedJavaArchive(t *testing.T) { gzp := newGenericZipWrappedJavaArchiveParser(ArchiveCatalogerConfig{}) - actualPkgs, _, err := gzp.parseZipWrappedJavaArchive(context.Background(), nil, nil, file.LocationReadCloser{ + actualPkgs, _, err := gzp.parseZipWrappedJavaArchive(pkgtest.Context(t), nil, nil, file.LocationReadCloser{ Location: file.NewLocation(test.fixture), ReadCloser: fixture, }) diff --git a/syft/pkg/cataloger/javascript/package.go b/syft/pkg/cataloger/javascript/package.go index 0fb79e5c9..54440a190 100644 --- a/syft/pkg/cataloger/javascript/package.go +++ b/syft/pkg/cataloger/javascript/package.go @@ -4,7 +4,6 @@ import ( "context" "encoding/json" "fmt" - "io" "net/http" "net/url" "path" @@ -257,19 +256,12 @@ func getLicenseFromNpmRegistry(baseURL, packageName, version string) (string, er } }() - bytes, err := io.ReadAll(resp.Body) - if err != nil { - return "", fmt.Errorf("unable to parse package from npm registry: %w", err) - } - - dec := json.NewDecoder(strings.NewReader(string(bytes))) - // Read "license" from the response var license struct { License string `json:"license"` } - if err := dec.Decode(&license); err != nil { + if err := json.NewDecoder(resp.Body).Decode(&license); err != nil { return "", fmt.Errorf("unable to parse license from npm registry: %w", err) } @@ -324,14 +316,8 @@ func parseLicensesFromLocation(l file.Location, resolver file.Resolver, pkgFile } defer internal.CloseAndLogError(contentReader, l.RealPath) - contents, err := io.ReadAll(contentReader) - if err != nil { - log.Debugf("error reading file contents for %s: %v", pkgFile, err) - return nil, err - } - var pkgJSON packageJSON - err = json.Unmarshal(contents, &pkgJSON) + err = json.NewDecoder(contentReader).Decode(&pkgJSON) if err != nil { log.Debugf("error parsing %s: %v", pkgFile, err) return nil, err diff --git a/syft/pkg/cataloger/javascript/parse_pnpm_lock.go b/syft/pkg/cataloger/javascript/parse_pnpm_lock.go index 6ab32cae5..29001b9a6 100644 --- a/syft/pkg/cataloger/javascript/parse_pnpm_lock.go +++ b/syft/pkg/cataloger/javascript/parse_pnpm_lock.go @@ -179,7 +179,7 @@ func newPnpmLockfileParser(version float64) pnpmLockfileParser { // parsePnpmLock is the main parser function for pnpm-lock.yaml files. func (a genericPnpmLockAdapter) parsePnpmLock(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { - data, err := io.ReadAll(reader) + data, err := io.ReadAll(reader) //nolint:gocritic // multi-pass parse requires []byte if err != nil { return nil, nil, fmt.Errorf("failed to load pnpm-lock.yaml file: %w", err) } diff --git a/syft/pkg/cataloger/javascript/parse_yarn_lock.go b/syft/pkg/cataloger/javascript/parse_yarn_lock.go index 7132d1819..af45f1407 100644 --- a/syft/pkg/cataloger/javascript/parse_yarn_lock.go +++ b/syft/pkg/cataloger/javascript/parse_yarn_lock.go @@ -166,7 +166,8 @@ func findDevOnlyPkgs(yarnPkgs []yarnPackage, prodDeps, devDeps map[string]string } func parseYarnV1LockFile(reader io.ReadCloser) ([]yarnPackage, error) { - content, err := io.ReadAll(reader) + // TODO: refactor to use bufio.Scanner for streaming line-by-line parsing instead of reading the entire file + content, err := io.ReadAll(reader) //nolint:gocritic // stateful multi-line parser; candidate for streaming refactor if err != nil { return nil, fmt.Errorf("failed to read yarn.lock file: %w", err) } @@ -265,7 +266,8 @@ func (a genericYarnLockAdapter) parseYarnLock(ctx context.Context, resolver file return nil, nil, nil } - data, err := io.ReadAll(reader) + // TODO: refactor to detect version from the first line via bufio.Scanner, then dispatch to a streaming parser + data, err := io.ReadAll(reader) //nolint:gocritic // two-pass parse: version detection then format-specific parsing if err != nil { return nil, nil, fmt.Errorf("failed to load yarn.lock file: %w", err) } diff --git a/syft/pkg/cataloger/lua/rockspec_parser.go b/syft/pkg/cataloger/lua/rockspec_parser.go index 19f251aea..067ce4c44 100644 --- a/syft/pkg/cataloger/lua/rockspec_parser.go +++ b/syft/pkg/cataloger/lua/rockspec_parser.go @@ -39,7 +39,7 @@ var noReturn = rockspec{ // parseRockspec basic parser for rockspec func parseRockspecData(reader io.Reader) (rockspec, error) { - data, err := io.ReadAll(reader) + data, err := io.ReadAll(reader) //nolint:gocritic // custom parser requires []byte if err != nil { return noReturn, err } diff --git a/syft/pkg/cataloger/nix/cataloger.go b/syft/pkg/cataloger/nix/cataloger.go index b4566787b..11dc62b32 100644 --- a/syft/pkg/cataloger/nix/cataloger.go +++ b/syft/pkg/cataloger/nix/cataloger.go @@ -46,7 +46,7 @@ func (c cataloger) Name() string { func (c cataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { // always try the DB cataloger first (based off of information recorded by actions taken by nix tooling) - pkgs, rels, err := c.dbParser.catalog(resolver) + pkgs, rels, err := c.dbParser.catalog(ctx, resolver) if err != nil { return nil, nil, fmt.Errorf("failed to catalog nix packages from database: %w", err) } diff --git a/syft/pkg/cataloger/nix/db_cataloger.go b/syft/pkg/cataloger/nix/db_cataloger.go index 0405bb656..615e213ef 100644 --- a/syft/pkg/cataloger/nix/db_cataloger.go +++ b/syft/pkg/cataloger/nix/db_cataloger.go @@ -1,8 +1,9 @@ package nix import ( + "bufio" + "context" "fmt" - "io" "path" "strconv" "strings" @@ -17,7 +18,7 @@ import ( const defaultSchema = 10 -type dbProcessor func(config Config, dbLocation file.Location, resolver file.Resolver, catalogerName string) ([]pkg.Package, []artifact.Relationship, error) +type dbProcessor func(ctx context.Context, config Config, dbLocation file.Location, resolver file.Resolver, catalogerName string) ([]pkg.Package, []artifact.Relationship, error) type dbCataloger struct { config Config @@ -45,7 +46,7 @@ type dbPackageEntry struct { Files []string } -func (c dbCataloger) catalog(resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { +func (c dbCataloger) catalog(ctx context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { dbLocs, err := resolver.FilesByGlob("**/nix/var/nix/db/db.sqlite") if err != nil { return nil, nil, fmt.Errorf("failed to find Nix database: %w", err) @@ -65,7 +66,7 @@ func (c dbCataloger) catalog(resolver file.Resolver) ([]pkg.Package, []artifact. continue } - newPkgs, newRelationships, err := parser(c.config, dbLoc, resolver, c.catalogerName) + newPkgs, newRelationships, err := parser(ctx, c.config, dbLoc, resolver, c.catalogerName) if err != nil { errs = unknown.Append(errs, dbLoc.Coordinates, err) continue @@ -92,13 +93,13 @@ func (c dbCataloger) selectDBParser(dbLocation file.Location, resolver file.Reso return c.schemaProcessor[defaultSchema], 0 } - contents, err := io.ReadAll(schemaContents) - if err != nil { + scanner := bufio.NewScanner(schemaContents) + if !scanner.Scan() { log.WithFields("path", loc.RealPath).Tracef("failed to read Nix database schema file, assuming %d", defaultSchema) return c.schemaProcessor[defaultSchema], 0 } - schema, err := strconv.Atoi(strings.TrimSpace(string(contents))) + schema, err := strconv.Atoi(strings.TrimSpace(scanner.Text())) if err != nil { log.WithFields("path", loc.RealPath).Tracef("failed to parse Nix database schema file, assuming %d", defaultSchema) return c.schemaProcessor[defaultSchema], 0 diff --git a/syft/pkg/cataloger/nix/db_cataloger_v10.go b/syft/pkg/cataloger/nix/db_cataloger_v10.go index e522e6f8e..849faffa4 100644 --- a/syft/pkg/cataloger/nix/db_cataloger_v10.go +++ b/syft/pkg/cataloger/nix/db_cataloger_v10.go @@ -1,6 +1,7 @@ package nix import ( + "context" "database/sql" "fmt" "io" @@ -8,6 +9,7 @@ import ( "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/internal/tmpdir" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/pkg" @@ -15,26 +17,33 @@ import ( var _ dbProcessor = processV10DB -func processV10DB(config Config, dbLocation file.Location, resolver file.Resolver, catalogerName string) ([]pkg.Package, []artifact.Relationship, error) { +func processV10DB(ctx context.Context, config Config, dbLocation file.Location, resolver file.Resolver, catalogerName string) ([]pkg.Package, []artifact.Relationship, error) { dbContents, err := resolver.FileContentsByLocation(dbLocation) defer internal.CloseAndLogError(dbContents, dbLocation.RealPath) if err != nil { return nil, nil, fmt.Errorf("unable to read Nix database: %w", err) } - tempDB, err := createTempDB(dbContents) + td := tmpdir.FromContext(ctx) + if td == nil { + return nil, nil, fmt.Errorf("no temp dir factory in context") + } + tempDB, cleanupDB, err := createTempDB(td, dbContents) if err != nil { return nil, nil, fmt.Errorf("failed to create temporary database: %w", err) } - defer os.RemoveAll(tempDB.Name()) + // defer order is LIFO: cleanupDB (remove file) must run after db.Close and tempDB.Close + defer cleanupDB() + + // close order is LIFO: db.Close() (SQLite conn) → tempDB.Close() (file handle) → cleanupDB (remove file) + defer tempDB.Close() db, err := sql.Open("sqlite", tempDB.Name()) if err != nil { return nil, nil, fmt.Errorf("failed to open database: %w", err) } - - db.SetConnMaxLifetime(0) defer db.Close() + db.SetConnMaxLifetime(0) packageEntries, err := extractV10DBPackages(config, db, dbLocation, resolver) if err != nil { @@ -232,18 +241,20 @@ func finalizeV10DBResults(db *sql.DB, packageEntries map[int]*dbPackageEntry, ca return pkgs, relationships, nil } -func createTempDB(content io.ReadCloser) (*os.File, error) { - tempFile, err := os.CreateTemp("", "nix-db-*.sqlite") +func createTempDB(td *tmpdir.TempDir, content io.ReadCloser) (*os.File, func(), error) { + noop := func() {} + + tempFile, cleanup, err := td.NewFile("nix-db-*.sqlite") //nolint:gocritic // cleanup is returned to caller, not deferred here if err != nil { - return nil, err + return nil, noop, err } _, err = io.Copy(tempFile, content) if err != nil { tempFile.Close() - os.Remove(tempFile.Name()) - return nil, err + cleanup() + return nil, noop, err } - return tempFile, nil + return tempFile, cleanup, nil } diff --git a/syft/pkg/cataloger/ocaml/parse_opam.go b/syft/pkg/cataloger/ocaml/parse_opam.go index fdad51e9f..da6c16b6e 100644 --- a/syft/pkg/cataloger/ocaml/parse_opam.go +++ b/syft/pkg/cataloger/ocaml/parse_opam.go @@ -25,7 +25,7 @@ func parseOpamPackage(ctx context.Context, _ file.Resolver, _ *generic.Environme homepageRe := regexp.MustCompile(`(?m)homepage:\s*"(?P[^"]+)"`) urlRe := regexp.MustCompile(`(?m)url\s*{(?P[^}]+)}`) - data, err := io.ReadAll(reader) + data, err := io.ReadAll(reader) //nolint:gocritic // regex matching requires full buffer if err != nil { log.WithFields("error", err).Trace("unable to read opam package") return nil, nil, nil diff --git a/syft/pkg/cataloger/php/parse_pecl_pear.go b/syft/pkg/cataloger/php/parse_pecl_pear.go index c4af6cebe..8941da417 100644 --- a/syft/pkg/cataloger/php/parse_pecl_pear.go +++ b/syft/pkg/cataloger/php/parse_pecl_pear.go @@ -58,7 +58,7 @@ func parsePear(ctx context.Context, _ file.Resolver, _ *generic.Environment, rea // parsePeclPearSerialized is a parser function for Pear metadata contents, returning "Default" php packages discovered. func parsePeclPearSerialized(reader file.LocationReadCloser) (*peclPearData, error) { - data, err := io.ReadAll(reader) + data, err := io.ReadAll(reader) //nolint:gocritic // phpserialize requires []byte if err != nil { return nil, fmt.Errorf("failed to read file: %w", err) diff --git a/syft/pkg/cataloger/python/cataloger_test.go b/syft/pkg/cataloger/python/cataloger_test.go index 9ad435a7e..3d9f64311 100644 --- a/syft/pkg/cataloger/python/cataloger_test.go +++ b/syft/pkg/cataloger/python/cataloger_test.go @@ -473,7 +473,7 @@ func Test_PackageCataloger_IgnorePackage(t *testing.T) { t.Run(test.MetadataFixture, func(t *testing.T) { resolver := file.NewMockResolverForPaths(test.MetadataFixture) - actual, _, err := NewInstalledPackageCataloger().Catalog(pkgtest.Context(), resolver) + actual, _, err := NewInstalledPackageCataloger().Catalog(pkgtest.Context(t), resolver) require.NoError(t, err) if len(actual) != 0 { diff --git a/syft/pkg/cataloger/python/license.go b/syft/pkg/cataloger/python/license.go index d9a2420fb..6cfc0f818 100644 --- a/syft/pkg/cataloger/python/license.go +++ b/syft/pkg/cataloger/python/license.go @@ -4,10 +4,8 @@ import ( "context" "encoding/json" "fmt" - "io" "net/http" "net/url" - "strings" "time" "github.com/anchore/syft/internal/cache" @@ -100,13 +98,6 @@ func getLicenseFromPypiRegistry(baseURL, packageName, version string) (string, e return "", fmt.Errorf("unable to get package from pypi registry") } - bytes, err := io.ReadAll(resp.Body) - if err != nil { - return "", fmt.Errorf("unable to parse package from pypi registry: %w", err) - } - - dec := json.NewDecoder(strings.NewReader(string(bytes))) - // Read "license" from the response var pypiResponse struct { Info struct { @@ -115,7 +106,7 @@ func getLicenseFromPypiRegistry(baseURL, packageName, version string) (string, e } `json:"info"` } - if err := dec.Decode(&pypiResponse); err != nil { + if err := json.NewDecoder(resp.Body).Decode(&pypiResponse); err != nil { return "", fmt.Errorf("unable to parse license from pypi registry: %w", err) } diff --git a/syft/pkg/cataloger/python/parse_uv_lock.go b/syft/pkg/cataloger/python/parse_uv_lock.go index 75747e9a8..5ac0b3a1c 100644 --- a/syft/pkg/cataloger/python/parse_uv_lock.go +++ b/syft/pkg/cataloger/python/parse_uv_lock.go @@ -143,7 +143,7 @@ func (ulp uvLockParser) uvLockPackages(ctx context.Context, reader file.Location var parsedLockFileVersion uvLockFileVersion // we cannot use the reader twice, so we read the contents first --uv.lock files tend to be small enough - contents, err := io.ReadAll(reader) + contents, err := io.ReadAll(reader) //nolint:gocritic // multi-pass parse requires []byte if err != nil { return nil, unknown.New(reader.Location, fmt.Errorf("failed to read uv lock file: %w", err)) } diff --git a/syft/pkg/cataloger/python/parse_wheel_egg.go b/syft/pkg/cataloger/python/parse_wheel_egg.go index a66eda760..0c4cf1563 100644 --- a/syft/pkg/cataloger/python/parse_wheel_egg.go +++ b/syft/pkg/cataloger/python/parse_wheel_egg.go @@ -4,7 +4,6 @@ import ( "bufio" "context" "encoding/json" - "io" "path" "path/filepath" "strings" @@ -174,13 +173,8 @@ func fetchDirectURLData(resolver file.Resolver, metadataLocation file.Location) } defer internal.CloseAndLogError(directURLContents, directURLLocation.AccessPath) - buffer, err := io.ReadAll(directURLContents) - if err != nil { - return nil, nil, err - } - var directURLJson directURLOrigin - if err := json.Unmarshal(buffer, &directURLJson); err != nil { + if err := json.NewDecoder(directURLContents).Decode(&directURLJson); err != nil { return nil, nil, err } diff --git a/syft/pkg/cataloger/redhat/parse_rpm_db.go b/syft/pkg/cataloger/redhat/parse_rpm_db.go index 5caa3e2e3..22da431b5 100644 --- a/syft/pkg/cataloger/redhat/parse_rpm_db.go +++ b/syft/pkg/cataloger/redhat/parse_rpm_db.go @@ -5,11 +5,11 @@ import ( "errors" "fmt" "io" - "os" "strings" rpmdb "github.com/anchore/go-rpmdb/pkg" "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/internal/tmpdir" "github.com/anchore/syft/internal/unknown" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" @@ -22,21 +22,16 @@ import ( // //nolint:funlen func parseRpmDB(ctx context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { - f, err := os.CreateTemp("", "rpmdb") + td := tmpdir.FromContext(ctx) + if td == nil { + return nil, nil, fmt.Errorf("no temp dir factory in context") + } + f, cleanup, err := td.NewFile("rpmdb-*") if err != nil { return nil, nil, fmt.Errorf("failed to create temp rpmdb file: %w", err) } - - defer func() { - err = f.Close() - if err != nil { - log.Errorf("failed to close temp rpmdb file: %+v", err) - } - err = os.Remove(f.Name()) - if err != nil { - log.Errorf("failed to remove temp rpmdb file: %+v", err) - } - }() + defer cleanup() + defer f.Close() _, err = io.Copy(f, reader) if err != nil { diff --git a/syft/pkg/cataloger/sbom/cataloger.go b/syft/pkg/cataloger/sbom/cataloger.go index 998fecaee..0ea8483cd 100644 --- a/syft/pkg/cataloger/sbom/cataloger.go +++ b/syft/pkg/cataloger/sbom/cataloger.go @@ -85,6 +85,6 @@ func adaptToReadSeeker(reader io.Reader) (io.ReadSeeker, error) { log.Debug("SBOM cataloger reader is not a ReadSeeker, reading entire SBOM into memory") var buff bytes.Buffer - _, err := io.Copy(&buff, reader) + _, err := io.Copy(&buff, reader) //nolint:gocritic // buffering to ReadSeeker return bytes.NewReader(buff.Bytes()), err } diff --git a/syft/pkg/cataloger/swift/parse_podfile_lock.go b/syft/pkg/cataloger/swift/parse_podfile_lock.go index 86ab560d2..fb39c13e6 100644 --- a/syft/pkg/cataloger/swift/parse_podfile_lock.go +++ b/syft/pkg/cataloger/swift/parse_podfile_lock.go @@ -3,7 +3,6 @@ package swift import ( "context" "fmt" - "io" "strings" "go.yaml.in/yaml/v3" @@ -28,12 +27,8 @@ type podfileLock struct { // parsePodfileLock is a parser function for Podfile.lock contents, returning all cocoapods pods discovered. func parsePodfileLock(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { - bytes, err := io.ReadAll(reader) - if err != nil { - return nil, nil, fmt.Errorf("unable to read file: %w", err) - } var podfile podfileLock - if err = yaml.Unmarshal(bytes, &podfile); err != nil { + if err := yaml.NewDecoder(reader).Decode(&podfile); err != nil { return nil, nil, fmt.Errorf("unable to parse yaml: %w", err) } diff --git a/syft/pkg/cataloger/swipl/parse_pack.go b/syft/pkg/cataloger/swipl/parse_pack.go index 1fcc9cf98..f4fbe5d73 100644 --- a/syft/pkg/cataloger/swipl/parse_pack.go +++ b/syft/pkg/cataloger/swipl/parse_pack.go @@ -20,7 +20,7 @@ func parsePackPackage(ctx context.Context, resolver file.Resolver, _ *generic.En homeRe := regexp.MustCompile(`home\(\s*'([^']+)'\s*\)`) authorRe := regexp.MustCompile(`(author|packager)\(\s*'([^']+)'\s*(?:,\s*'([^']+)'\s*)?\)`) - data, err := io.ReadAll(reader) + data, err := io.ReadAll(reader) //nolint:gocritic // regex matching requires full buffer if err != nil { log.WithFields("error", err).Trace("unable to parse Rockspec app") return nil, nil, nil diff --git a/syft/pkg/cataloger/terraform/parse_tf_lock.go b/syft/pkg/cataloger/terraform/parse_tf_lock.go index bd7091f7e..8056c8e2b 100644 --- a/syft/pkg/cataloger/terraform/parse_tf_lock.go +++ b/syft/pkg/cataloger/terraform/parse_tf_lock.go @@ -20,7 +20,7 @@ type terraformLockFile struct { func parseTerraformLock(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { var lockFile terraformLockFile - contents, err := io.ReadAll(reader) + contents, err := io.ReadAll(reader) //nolint:gocritic // hclsimple.Decode requires []byte if err != nil { return nil, nil, fmt.Errorf("failed to read terraform lock file: %w", err) } diff --git a/syft/pkg/license.go b/syft/pkg/license.go index fa2351471..df7fe0f78 100644 --- a/syft/pkg/license.go +++ b/syft/pkg/license.go @@ -411,7 +411,7 @@ func (b *licenseBuilder) licenseFromContentHash(content string) License { } func contentFromReader(r io.Reader) (string, error) { - bytes, err := io.ReadAll(r) + bytes, err := io.ReadAll(r) //nolint:gocritic // reading license content for storage if err != nil { return "", err } diff --git a/syft/source/snapsource/snapcraft_api.go b/syft/source/snapsource/snapcraft_api.go index 0acc82d46..99c816519 100644 --- a/syft/source/snapsource/snapcraft_api.go +++ b/syft/source/snapsource/snapcraft_api.go @@ -3,7 +3,6 @@ package snapsource import ( "encoding/json" "fmt" - "io" "net/http" "regexp" "strconv" @@ -215,13 +214,8 @@ func (c *snapcraftClient) GetSnapDownloadURL(id snapIdentity) (string, error) { return "", fmt.Errorf("API request failed with status code %d", resp.StatusCode) } - body, err := io.ReadAll(resp.Body) - if err != nil { - return "", fmt.Errorf("failed to read response body: %w", err) - } - var info snapcraftInfo - if err := json.Unmarshal(body, &info); err != nil { + if err := json.NewDecoder(resp.Body).Decode(&info); err != nil { return "", fmt.Errorf("failed to parse JSON response: %w", err) } @@ -259,13 +253,8 @@ func (c *snapcraftClient) CheckSnapExists(snapName string) (bool, string, error) return false, "", fmt.Errorf("find API request failed with status code %d", resp.StatusCode) } - body, err := io.ReadAll(resp.Body) - if err != nil { - return false, "", fmt.Errorf("failed to read find response body: %w", err) - } - var findResp snapFindResponse - if err := json.Unmarshal(body, &findResp); err != nil { + if err := json.NewDecoder(resp.Body).Decode(&findResp); err != nil { return false, "", fmt.Errorf("failed to parse find JSON response: %w", err) } diff --git a/test/rules/rules.go b/test/rules/rules.go index b102c68c7..f6f1a0e15 100644 --- a/test/rules/rules.go +++ b/test/rules/rules.go @@ -24,6 +24,50 @@ func isPtr(ctx *dsl.VarFilterContext) bool { return strings.HasPrefix(ctx.Type.String(), "*") || strings.HasPrefix(ctx.Type.Underlying().String(), "*") } +// nolint:unused +func noUnboundedReads(m dsl.Matcher) { + // flag io.ReadAll where the argument is not already wrapped in io.LimitReader + m.Match(`io.ReadAll($reader)`). + Where(!m["reader"].Text.Matches(`(?i)LimitReader|LimitedReader`)). + Report("do not use unbounded io.ReadAll; wrap the reader with io.LimitReader or use a streaming parser") + + // flag io.Copy only when the destination is an in-memory buffer + // io.Copy to files, hash writers, encoders, etc. is streaming and safe + m.Match(`io.Copy($dst, $src)`). + Where((m["dst"].Type.Is(`*bytes.Buffer`) || m["dst"].Type.Is(`*strings.Builder`)) && !m["src"].Text.Matches(`(?i)LimitReader|LimitedReader`)). + Report("do not use unbounded io.Copy to in-memory buffer; wrap the source reader with io.LimitReader") +} + +// nolint:unused +func noDirectTempFiles(m dsl.Matcher) { + // catalogers must use tmpdir.FromContext(ctx) instead of creating temp files/dirs directly, + // so that all temp storage is centrally managed and cleaned up + m.Match( + `os.CreateTemp($*_)`, + `os.MkdirTemp($*_)`, + ). + Where(m.File().PkgPath.Matches(`/cataloger/`)). + Report("do not use os.CreateTemp/os.MkdirTemp in catalogers; use tmpdir.FromContext(ctx) instead") +} + +// nolint:unused +func tmpCleanupDeferred(m dsl.Matcher) { + // ensure the cleanup function returned by NewFile/NewChild is deferred, not discarded + m.Match( + `$_, $cleanup, $err := $x.NewFile($*_); if $*_ { $*_ }; $next`, + `$_, $cleanup, $err = $x.NewFile($*_); if $*_ { $*_ }; $next`, + ). + Where(!m["next"].Text.Matches(`^defer `)). + Report("defer the cleanup function returned by NewFile immediately after the error check") + + m.Match( + `$_, $cleanup, $err := $x.NewChild($*_); if $*_ { $*_ }; $next`, + `$_, $cleanup, $err = $x.NewChild($*_); if $*_ { $*_ }; $next`, + ). + Where(!m["next"].Text.Matches(`^defer `)). + Report("defer the cleanup function returned by NewChild immediately after the error check") +} + // nolint:unused func packagesInRelationshipsAsValues(m dsl.Matcher) { m.Import("github.com/anchore/syft/syft/artifact")