chore: centralize temp files and prefer streaming IO (#4668)

* chore: centralize temp files and prefer streaming IO

Catalogers that create temp files ad-hoc can easily forget cleanup,
leaking files on disk. Similarly, io.ReadAll is convenient but risks
OOM on large or malicious inputs.

Introduce internal/tmpdir to manage all cataloger temp storage under
a single root directory with automatic cleanup. Prefer streaming
parsers (bufio.Scanner, json/yaml.NewDecoder, io.LimitReader) over
buffering entire inputs into memory. Add ruleguard rules to enforce
both practices going forward.

Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com>

* chore: go back to old release parsing

Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com>

* simplify to limit reader in version check

Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com>

* chore: regex change postponed

Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com>

* simplify supplement release to limitreader

Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com>

---------

Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com>
This commit is contained in:
Will Murphy 2026-03-18 10:53:51 -04:00 committed by GitHub
parent a3dacf5ecd
commit e38851143e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
56 changed files with 704 additions and 214 deletions

View File

@ -107,7 +107,7 @@ func fetchLatestApplicationVersion(id clio.Identification) (*hashiVersion.Versio
return nil, fmt.Errorf("HTTP %d on fetching latest version: %s", resp.StatusCode, resp.Status)
}
versionBytes, err := io.ReadAll(resp.Body)
versionBytes, err := io.ReadAll(io.LimitReader(resp.Body, 500))
if err != nil {
return nil, fmt.Errorf("failed to read latest version: %w", err)
}

View File

@ -94,7 +94,7 @@ func getVersion(resolver file.Resolver) (string, []file.Location, error) {
}
defer internal.CloseAndLogError(reader, locations[0].RealPath)
version, err := io.ReadAll(reader)
version, err := io.ReadAll(reader) //nolint:gocritic // example code
if err != nil {
return "", nil, fmt.Errorf("unable to read alpine version: %w", err)
}
@ -117,7 +117,7 @@ func getAPKKeys(resolver file.Resolver) (map[string]string, []file.Location, err
if err != nil {
return nil, nil, fmt.Errorf("unable to resolve file contents by location at %s: %w", location.RealPath, err)
}
content, err := io.ReadAll(reader)
content, err := io.ReadAll(reader) //nolint:gocritic // example code
if err != nil {
return nil, nil, fmt.Errorf("unable to read apk key content at %s: %w", location.RealPath, err)
}

2
go.mod
View File

@ -9,7 +9,6 @@ require (
github.com/Masterminds/sprig/v3 v3.3.0
github.com/OneOfOne/xxhash v1.2.8
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d
github.com/acobaugh/osrelease v0.1.0
github.com/adrg/xdg v0.5.3
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9
github.com/anchore/clio v0.0.0-20250319180342-2cfe4b0cb716
@ -270,6 +269,7 @@ require (
)
require (
github.com/acobaugh/osrelease v0.1.0
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be
github.com/cespare/xxhash/v2 v2.3.0
github.com/gpustack/gguf-parser-go v0.24.0

View File

@ -215,7 +215,7 @@ func write(schema []byte) {
panic(err)
}
existingSchemaBytes, err := io.ReadAll(existingFh)
existingSchemaBytes, err := io.ReadAll(existingFh) //nolint:gocritic // offline code generator
if err != nil {
panic(err)
}

View File

@ -10,7 +10,7 @@ func (s *scanner) FindEvidence(_ context.Context, reader io.Reader) (evidence []
return nil, nil, nil
}
content, err = io.ReadAll(reader)
content, err = io.ReadAll(reader) //nolint:gocritic // license scanner requires full content
if err != nil {
return nil, nil, err
}

View File

@ -31,7 +31,7 @@ func UpdateGoldenFileContents(t *testing.T, contents []byte) {
t.Log(dangerText("!!! UPDATING GOLDEN FILE !!!"), goldenFilePath)
err := os.WriteFile(goldenFilePath, contents, 0600)
err := os.WriteFile(goldenFilePath, contents, 0o600)
if err != nil {
t.Fatalf("could not update golden file (%s): %+v", goldenFilePath, err)
}
@ -50,7 +50,9 @@ func GetGoldenFileContents(t *testing.T) []byte {
}
defer f.Close()
bytes, err := io.ReadAll(f)
// suppress lint prohibiting ReadAll since golden files are source files in Syft's repository
// and not user provided artifacts.
bytes, err := io.ReadAll(f) //nolint:gocritic // golden files are trusted source files, not user artifacts
if err != nil {
t.Fatalf("could not read file (%s): %+v", goldenPath, err)
}

121
internal/tmpdir/tmpdir.go Normal file
View File

@ -0,0 +1,121 @@
package tmpdir
import (
"context"
"fmt"
"os"
"sync"
"sync/atomic"
)
type ctxKey struct{}
// Root creates a new root temp directory with the given prefix and returns a context with the
// TempDir attached. Callers should defer Cleanup() on the returned TempDir to ensure all
// temp files are removed.
func Root(ctx context.Context, prefix string) (context.Context, *TempDir) {
td := &TempDir{prefix: prefix}
return context.WithValue(ctx, ctxKey{}, td), td
}
// FromPath creates a TempDir backed by an existing directory. The caller owns the lifecycle of
// the directory; Cleanup() is a no-op. This is useful for wrapping a directory from t.TempDir()
// where the test framework handles cleanup automatically.
func FromPath(dir string) *TempDir {
td := &TempDir{}
td.root = dir
td.initOnce.Do(func() {}) // mark as initialized
return td
}
// WithValue returns a new context with the given TempDir attached. Use this to inject an
// existing TempDir into a context (e.g., sharing a TempDir across multiple test contexts).
func WithValue(ctx context.Context, td *TempDir) context.Context {
return context.WithValue(ctx, ctxKey{}, td)
}
// FromContext returns the TempDir from the context, or nil if none is set.
func FromContext(ctx context.Context) *TempDir {
td, _ := ctx.Value(ctxKey{}).(*TempDir)
return td
}
// TempDir manages a tree of temporary directories. All files and child directories live under
// a single root path that can be removed in one shot via Cleanup(). After initialization, the
// struct has no mutable state — NewChild and NewFile delegate uniqueness to os.MkdirTemp and
// os.CreateTemp respectively, so no locking is needed on the hot path.
type TempDir struct {
prefix string
root string // set exactly once by initOnce
initOnce sync.Once
initErr error
cleanupOnce sync.Once
cleaned atomic.Bool
}
func noop() {}
// path returns the root directory, lazily creating it on the first call.
func (t *TempDir) path() (string, error) {
t.initOnce.Do(func() {
t.root, t.initErr = os.MkdirTemp("", t.prefix+"-")
})
if t.initErr != nil {
return "", fmt.Errorf("failed to create root temp dir: %w", t.initErr)
}
if t.cleaned.Load() {
return "", fmt.Errorf("temp dir has been cleaned up")
}
return t.root, nil
}
// NewChild creates a named subdirectory under this TempDir. The returned cleanup function removes
// the subdirectory and all contents; callers should defer it to reclaim space early. The root
// Cleanup acts as a safety net if the per-child cleanup is missed. The cleanup function is safe
// to call multiple times and is safe to call after the root has already been cleaned up.
func (t *TempDir) NewChild(name string) (string, func(), error) {
root, err := t.path()
if err != nil {
return "", noop, err
}
dir, err := os.MkdirTemp(root, name+"-")
if err != nil {
return "", noop, fmt.Errorf("failed to create child temp dir: %w", err)
}
cleanup := func() {
_ = os.RemoveAll(dir)
}
return dir, cleanup, nil
}
// NewFile creates a new temp file under this TempDir with the given name pattern (as in os.CreateTemp).
// The caller is responsible for closing the file. The returned cleanup function removes the file;
// callers should defer it to reclaim space early. The root Cleanup acts as a safety net if the
// per-file cleanup is missed. The cleanup function is safe to call multiple times.
func (t *TempDir) NewFile(pattern string) (*os.File, func(), error) {
root, err := t.path()
if err != nil {
return nil, noop, err
}
f, err := os.CreateTemp(root, pattern)
if err != nil {
return nil, noop, fmt.Errorf("failed to create temp file: %w", err)
}
cleanup := func() {
_ = os.Remove(f.Name())
}
return f, cleanup, nil
}
// Cleanup removes the entire root directory and all contents. Safe to call multiple times.
func (t *TempDir) Cleanup() error {
var err error
t.cleanupOnce.Do(func() {
t.cleaned.Store(true)
if t.root == "" {
return
}
err = os.RemoveAll(t.root)
})
return err
}

View File

@ -0,0 +1,295 @@
package tmpdir
import (
"context"
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestRootAndFromContext(t *testing.T) {
ctx := context.Background()
assert.Nil(t, FromContext(ctx))
ctx, td := Root(ctx, "test")
require.NotNil(t, FromContext(ctx))
assert.Same(t, td, FromContext(ctx))
}
func TestWithValue(t *testing.T) {
_, td := Root(context.Background(), "test")
defer td.Cleanup()
// inject the existing TempDir into a fresh context
ctx := WithValue(context.Background(), td)
assert.Same(t, td, FromContext(ctx))
// the injected TempDir is fully functional
f, cleanup, err := FromContext(ctx).NewFile("with-value-*.txt")
require.NoError(t, err)
defer cleanup()
require.NoError(t, f.Close())
}
func TestNewChild(t *testing.T) {
ctx, td := Root(context.Background(), "test")
defer td.Cleanup()
_ = ctx
child1, cleanup1, err := td.NewChild("sub")
require.NoError(t, err)
defer cleanup1()
child2, cleanup2, err := td.NewChild("sub")
require.NoError(t, err)
defer cleanup2()
// children are distinct
assert.NotEqual(t, child1, child2)
// both exist and are under the same root
info1, err := os.Stat(child1)
require.NoError(t, err)
assert.True(t, info1.IsDir())
info2, err := os.Stat(child2)
require.NoError(t, err)
assert.True(t, info2.IsDir())
assert.Equal(t, filepath.Dir(child1), filepath.Dir(child2))
}
func TestNewFile(t *testing.T) {
_, td := Root(context.Background(), "test")
defer td.Cleanup()
f, cleanup, err := td.NewFile("hello-*.txt")
require.NoError(t, err)
defer cleanup()
_, err = f.WriteString("hello")
require.NoError(t, err)
require.NoError(t, f.Close())
content, err := os.ReadFile(f.Name())
require.NoError(t, err)
assert.Equal(t, "hello", string(content))
}
func TestCleanup(t *testing.T) {
_, td := Root(context.Background(), "test")
child, _, err := td.NewChild("sub")
require.NoError(t, err)
f, _, err := td.NewFile("file-*")
require.NoError(t, err)
fname := f.Name()
f.Close()
// write a file inside the child dir too
require.NoError(t, os.WriteFile(filepath.Join(child, "inner.txt"), []byte("x"), 0600))
// everything exists
_, err = os.Stat(child)
require.NoError(t, err)
_, err = os.Stat(fname)
require.NoError(t, err)
// cleanup
require.NoError(t, td.Cleanup())
// everything is gone
_, err = os.Stat(child)
assert.True(t, os.IsNotExist(err))
_, err = os.Stat(fname)
assert.True(t, os.IsNotExist(err))
// double cleanup is safe
require.NoError(t, td.Cleanup())
}
func TestCleanupPreventsNewAllocation(t *testing.T) {
_, td := Root(context.Background(), "test")
require.NoError(t, td.Cleanup())
_, _, err := td.NewChild("nope")
assert.Error(t, err)
_, _, err = td.NewFile("nope-*")
assert.Error(t, err)
}
func TestEarlyCleanupFile(t *testing.T) {
_, td := Root(context.Background(), "test")
defer td.Cleanup()
f, cleanup, err := td.NewFile("early-*.txt")
require.NoError(t, err)
fname := f.Name()
require.NoError(t, f.Close())
// file exists before cleanup
_, err = os.Stat(fname)
require.NoError(t, err)
// early cleanup removes the file
cleanup()
_, err = os.Stat(fname)
assert.True(t, os.IsNotExist(err))
// calling cleanup again is safe (idempotent)
cleanup()
}
func TestEarlyCleanupChild(t *testing.T) {
_, td := Root(context.Background(), "test")
defer td.Cleanup()
child, cleanup, err := td.NewChild("early")
require.NoError(t, err)
// child dir exists
_, err = os.Stat(child)
require.NoError(t, err)
// early cleanup removes it
cleanup()
_, err = os.Stat(child)
assert.True(t, os.IsNotExist(err))
// calling cleanup again is safe (idempotent)
cleanup()
}
func TestEarlyCleanupThenRootCleanup(t *testing.T) {
_, td := Root(context.Background(), "test")
f, cleanupFile, err := td.NewFile("combo-*.txt")
require.NoError(t, err)
fname := f.Name()
f.Close()
child, cleanupChild, err := td.NewChild("combo")
require.NoError(t, err)
// early cleanup both
cleanupFile()
cleanupChild()
// files are already gone
_, err = os.Stat(fname)
assert.True(t, os.IsNotExist(err))
_, err = os.Stat(child)
assert.True(t, os.IsNotExist(err))
// root cleanup still works (no error on already-removed contents)
require.NoError(t, td.Cleanup())
}
func TestConcurrentNewChildAndNewFile(t *testing.T) {
_, td := Root(context.Background(), "test")
defer td.Cleanup()
const goroutines = 20
errs := make(chan error, goroutines)
paths := make(chan string, goroutines)
for i := 0; i < goroutines; i++ {
go func(i int) {
if i%2 == 0 {
child, cleanup, err := td.NewChild("concurrent")
if err != nil {
errs <- err
return
}
defer cleanup()
paths <- child
} else {
f, cleanup, err := td.NewFile("concurrent-*.txt")
if err != nil {
errs <- err
return
}
defer cleanup()
_ = f.Close()
paths <- f.Name()
}
errs <- nil
}(i)
}
seen := make(map[string]bool)
for i := 0; i < goroutines; i++ {
err := <-errs
require.NoError(t, err)
}
close(paths)
for p := range paths {
assert.False(t, seen[p], "duplicate path: %s", p)
seen[p] = true
}
assert.Len(t, seen, goroutines)
}
func TestConcurrentNewChildDuringCleanup(t *testing.T) {
_, td := Root(context.Background(), "test")
// trigger root creation
_, cleanup, err := td.NewChild("init")
require.NoError(t, err)
cleanup()
// cleanup and concurrent NewChild should not panic
done := make(chan struct{})
go func() {
_ = td.Cleanup()
close(done)
}()
// try creating children concurrently with cleanup — should get errors, not panics
for i := 0; i < 10; i++ {
_, c, _ := td.NewChild("race")
if c != nil {
c()
}
}
<-done
}
func TestLazyCreation(t *testing.T) {
_, td := Root(context.Background(), "test")
// root dir is not created until needed
assert.Equal(t, "", td.root)
_, _, err := td.NewFile("trigger-*")
require.NoError(t, err)
assert.NotEmpty(t, td.root)
require.NoError(t, td.Cleanup())
}
func TestFromPath(t *testing.T) {
dir := t.TempDir()
td := FromPath(dir)
// can create children
child, cleanup, err := td.NewChild("sub")
require.NoError(t, err)
defer cleanup()
assert.DirExists(t, child)
// can create files
f, cleanupFile, err := td.NewFile("file-*.txt")
require.NoError(t, err)
defer cleanupFile()
require.NoError(t, f.Close())
assert.FileExists(t, f.Name())
// root is the provided dir
assert.Equal(t, dir, filepath.Dir(child))
}

View File

@ -12,8 +12,10 @@ import (
"github.com/anchore/go-sync"
"github.com/anchore/syft/internal/bus"
"github.com/anchore/syft/internal/licenses"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/internal/task"
"github.com/anchore/syft/internal/tmpdir"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/cataloging"
"github.com/anchore/syft/syft/event/monitor"
@ -24,6 +26,8 @@ import (
// CreateSBOM creates a software bill-of-materials from the given source. If the CreateSBOMConfig is nil, then
// default options will be used.
//
//nolint:funlen
func CreateSBOM(ctx context.Context, src source.Source, cfg *CreateSBOMConfig) (*sbom.SBOM, error) {
if cfg == nil {
cfg = DefaultCreateSBOMConfig()
@ -65,12 +69,25 @@ func CreateSBOM(ctx context.Context, src source.Source, cfg *CreateSBOMConfig) (
},
}
// setup everything we need in context: license scanner, executors, etc.
// check if the caller already provided a TempDir; if so, don't clean it up (the caller owns it)
callerOwnsTempDir := tmpdir.FromContext(ctx) != nil
// setup everything we need in context: license scanner, executors, temp storage, etc.
ctx, err = setupContext(ctx, cfg)
if err != nil {
return nil, err
}
if !callerOwnsTempDir {
if td := tmpdir.FromContext(ctx); td != nil {
defer func() {
if err := td.Cleanup(); err != nil {
log.Warnf("failed to clean up temp dir: %v", err)
}
}()
}
}
catalogingProgress := monitorCatalogingTask(src.ID(), taskGroups)
packageCatalogingProgress := monitorPackageCatalogingTask()
@ -95,6 +112,11 @@ func setupContext(ctx context.Context, cfg *CreateSBOMConfig) (context.Context,
// configure parallel executors
ctx = setContextExecutors(ctx, cfg)
// configure temp dir factory for catalogers (if not already set)
if tmpdir.FromContext(ctx) == nil {
ctx, _ = tmpdir.Root(ctx, "syft-cataloger")
}
// configure license scanner
// skip injecting a license scanner if one already set on context
if licenses.IsContextLicenseScannerSet(ctx) {

View File

@ -20,7 +20,7 @@ func SeekableReader(reader io.Reader) (io.ReadSeeker, error) {
return getOffsetReadSeeker(r)
}
content, err := io.ReadAll(reader)
content, err := io.ReadAll(reader) //nolint:gocritic // buffering non-seekable to seekable reader
if err != nil {
return nil, err
}

View File

@ -62,7 +62,7 @@ func GetUnionReader(readerCloser io.ReadCloser) (UnionReader, error) {
return newReaderAtAdapter(r), nil
}
b, err := io.ReadAll(readerCloser)
b, err := io.ReadAll(readerCloser) //nolint:gocritic // buffering non-seekable to ReaderAt
if err != nil {
return nil, fmt.Errorf("unable to read contents from binary: %w", err)
}

View File

@ -92,7 +92,7 @@ func tryParseReleaseInfo(resolver file.Resolver, location file.Location, logger
}
defer internal.CloseAndLogError(contentReader, location.AccessPath)
content, err := io.ReadAll(contentReader)
content, err := io.ReadAll(io.LimitReader(contentReader, 5*1024*1024))
if err != nil {
logger.WithFields("error", err, "path", location.RealPath).Trace("unable to read contents")
return nil

View File

@ -38,7 +38,7 @@ func readDebianVersionFile(resolver file.Resolver, location file.Location) strin
return ""
}
defer internal.CloseAndLogError(rdr, location.RealPath)
contents, err := io.ReadAll(rdr)
contents, err := io.ReadAll(io.LimitReader(rdr, 5*1024*1024))
if err != nil {
log.Debugf("error reading %s: %v", location.RealPath, err)
return ""

View File

@ -5,7 +5,6 @@ import (
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"sort"
"strings"
@ -15,6 +14,7 @@ import (
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/tmpdir"
"github.com/anchore/syft/internal/unknown"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
@ -24,17 +24,19 @@ import (
// parseGGUFModel parses a GGUF model file and returns the discovered package.
// This implementation only reads the header portion of the file, not the entire model.
func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
func parseGGUFModel(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
defer internal.CloseAndLogError(reader, reader.Path())
// Create a temporary file for the library to parse
// The library requires a file path, so we create a temp file
tempFile, err := os.CreateTemp("", "syft-gguf-*.gguf")
td := tmpdir.FromContext(ctx)
if td == nil {
return nil, nil, fmt.Errorf("no temp dir factory in context")
}
tempFile, cleanup, err := td.NewFile("syft-gguf-*.gguf")
if err != nil {
return nil, nil, fmt.Errorf("failed to create temp file: %w", err)
}
defer cleanup()
tempPath := tempFile.Name()
defer os.Remove(tempPath)
// Copy and validate the GGUF file header using LimitedReader to prevent OOM
// We use LimitedReader to cap reads at maxHeaderSize (50MB)

View File

@ -171,7 +171,7 @@ func findReleases(resolver file.Resolver, dbPath string) []linux.Release {
func parseReleasesFromAPKRepository(reader file.LocationReadCloser) []linux.Release {
var releases []linux.Release
reposB, err := io.ReadAll(reader)
reposB, err := io.ReadAll(reader) //nolint:gocritic // regex matching requires full buffer
if err != nil {
log.Tracef("unable to read APK repositories file %q: %+v", reader.RealPath, err)
return nil

View File

@ -47,7 +47,7 @@ func node(value interface{}) erlangNode {
// parseErlang basic parser for erlang, used by rebar.lock
func parseErlang(reader io.Reader) (erlangNode, error) {
data, err := io.ReadAll(reader)
data, err := io.ReadAll(reader) //nolint:gocritic // custom parser requires []byte
if err != nil {
return node(nil), err
}

View File

@ -3,7 +3,6 @@ package githubactions
import (
"context"
"fmt"
"io"
"go.yaml.in/yaml/v3"
@ -25,13 +24,9 @@ type compositeActionRunsDef struct {
}
func parseCompositeActionForActionUsage(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
contents, errs := io.ReadAll(reader)
if errs != nil {
return nil, nil, fmt.Errorf("unable to read yaml composite action file: %w", errs)
}
var ca compositeActionDef
if errs = yaml.Unmarshal(contents, &ca); errs != nil {
var errs error
if errs = yaml.NewDecoder(reader).Decode(&ca); errs != nil {
return nil, nil, fmt.Errorf("unable to parse yaml composite action file: %w", errs)
}

View File

@ -3,7 +3,6 @@ package githubactions
import (
"context"
"fmt"
"io"
"regexp"
"go.yaml.in/yaml/v3"
@ -41,14 +40,10 @@ type stepDef struct {
}
func parseWorkflowForWorkflowUsage(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
contents, errs := io.ReadAll(reader)
if errs != nil {
return nil, nil, fmt.Errorf("unable to read yaml workflow file: %w", errs)
}
// parse the yaml file into a generic node to preserve comments
var node yaml.Node
if errs = yaml.Unmarshal(contents, &node); errs != nil {
var errs error
if errs = yaml.NewDecoder(reader).Decode(&node); errs != nil {
return nil, nil, fmt.Errorf("unable to parse yaml workflow file: %w", errs)
}
@ -79,14 +74,10 @@ func parseWorkflowForWorkflowUsage(_ context.Context, _ file.Resolver, _ *generi
}
func parseWorkflowForActionUsage(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
contents, errs := io.ReadAll(reader)
if errs != nil {
return nil, nil, fmt.Errorf("unable to read yaml workflow file: %w", errs)
}
// parse the yaml file into a generic node to preserve comments
var node yaml.Node
if errs = yaml.Unmarshal(contents, &node); errs != nil {
var errs error
if errs = yaml.NewDecoder(reader).Decode(&node); errs != nil {
return nil, nil, fmt.Errorf("unable to parse yaml workflow file: %w", errs)
}

View File

@ -23,6 +23,7 @@ import (
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/cache"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/tmpdir"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
@ -163,7 +164,10 @@ func (c *goLicenseResolver) getLicensesFromRemote(ctx context.Context, moduleNam
return c.licenseCache.Resolve(fmt.Sprintf("%s/%s", moduleName, moduleVersion), func() ([]pkg.License, error) {
proxies := remotesForModule(c.opts.Proxies, c.opts.NoProxy, moduleName)
urlPrefix, fsys, err := getModule(proxies, moduleName, moduleVersion)
urlPrefix, fsys, cleanup, err := getModule(ctx, proxies, moduleName, moduleVersion)
if cleanup != nil {
defer cleanup()
}
if err != nil {
return nil, err
}
@ -258,7 +262,7 @@ func processCaps(s string) string {
})
}
func getModule(proxies []string, moduleName, moduleVersion string) (urlPrefix string, fsys fs.FS, err error) {
func getModule(ctx context.Context, proxies []string, moduleName, moduleVersion string) (urlPrefix string, fsys fs.FS, cleanup func(), err error) {
for _, proxy := range proxies {
u, _ := url.Parse(proxy)
if proxy == "direct" {
@ -267,7 +271,7 @@ func getModule(proxies []string, moduleName, moduleVersion string) (urlPrefix st
}
switch u.Scheme {
case "https", "http":
urlPrefix, fsys, err = getModuleProxy(proxy, moduleName, moduleVersion)
urlPrefix, fsys, cleanup, err = getModuleProxy(ctx, proxy, moduleName, moduleVersion)
case "file":
p := filepath.Join(u.Path, moduleName, "@v", moduleVersion)
urlPrefix = path.Join("file://", p) + "/"
@ -281,42 +285,78 @@ func getModule(proxies []string, moduleName, moduleVersion string) (urlPrefix st
return
}
func getModuleProxy(proxy string, moduleName string, moduleVersion string) (moduleURL string, out fs.FS, _ error) {
// getModuleProxy downloads a Go module zip from the given proxy and returns a filesystem view of its contents.
// The returned cleanup function closes the underlying temp file and removes it; callers must not use the
// returned fs.FS after calling cleanup.
func getModuleProxy(ctx context.Context, proxy string, moduleName string, moduleVersion string) (moduleURL string, out fs.FS, cleanup func(), _ error) {
u := fmt.Sprintf("%s/%s/@v/%s.zip", proxy, moduleName, moduleVersion)
// get the module zip
log.WithFields("url", u).Info("downloading go module from proxy")
resp, err := http.Get(u) //nolint:gosec
if err != nil {
return "", nil, err
return "", nil, nil, err
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != http.StatusOK {
// close the first response before retrying with a lowercased module name
_ = resp.Body.Close()
u = fmt.Sprintf("%s/%s/@v/%s.zip", proxy, strings.ToLower(moduleName), moduleVersion)
// try lowercasing it; some packages have mixed casing that really messes up the proxy
resp, err = http.Get(u) //nolint:gosec
if err != nil {
return "", nil, err
return "", nil, nil, err
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != http.StatusOK {
return "", nil, fmt.Errorf("failed to get module zip: %s", resp.Status)
return "", nil, nil, fmt.Errorf("failed to get module zip: %s", resp.Status)
}
}
// read the zip
b, err := io.ReadAll(resp.Body)
// stream the zip to a temp file to avoid unbounded memory usage
td := tmpdir.FromContext(ctx)
if td == nil {
return "", nil, nil, fmt.Errorf("no temp dir factory in context")
}
tmpFile, cleanupFile, err := td.NewFile("gomodule-*.zip") //nolint:gocritic // cleanup is returned to caller, not deferred here
if err != nil {
return "", nil, err
return "", nil, nil, fmt.Errorf("failed to create temp file for module zip: %w", err)
}
out, err = zip.NewReader(bytes.NewReader(b), resp.ContentLength)
cleanup = func() {
_ = tmpFile.Close()
cleanupFile()
}
// cap downloads at 500MB to prevent disk exhaustion from malicious proxies
const maxModuleZipSize = 500 * 1024 * 1024
size, err := io.Copy(tmpFile, io.LimitReader(resp.Body, maxModuleZipSize))
if err != nil {
cleanup()
return "", nil, nil, fmt.Errorf("failed to download module zip: %w", err)
}
if size >= maxModuleZipSize {
cleanup()
return "", nil, nil, fmt.Errorf("module zip exceeds %d byte size limit", maxModuleZipSize)
}
if _, err := tmpFile.Seek(0, io.SeekStart); err != nil {
cleanup()
return "", nil, nil, fmt.Errorf("failed to seek module zip: %w", err)
}
out, err = zip.NewReader(tmpFile, size)
if err != nil {
cleanup()
return "", nil, nil, fmt.Errorf("failed to read module zip: %w", err)
}
versionPath := findVersionPath(out, ".")
out = getSubFS(out, versionPath)
return u + "#" + versionPath + "/", out, err
return u + "#" + versionPath + "/", out, cleanup, err
}
func findVersionPath(f fs.FS, dir string) string {

View File

@ -24,7 +24,7 @@ import (
)
func Test_LicenseSearch(t *testing.T) {
ctx := pkgtest.Context()
ctx := pkgtest.Context(t)
loc1 := file.NewLocation("github.com/someorg/somename@v0.3.2/LICENSE")
loc2 := file.NewLocation("github.com/!cap!o!r!g/!cap!project@v4.111.5/LICENSE.txt")
@ -321,7 +321,7 @@ func Test_noLocalGoModDir(t *testing.T) {
validTmp := t.TempDir()
require.NoError(t, os.MkdirAll(filepath.Join(validTmp, "mod@ver"), 0700|os.ModeDir))
ctx := pkgtest.Context()
ctx := pkgtest.Context(t)
tests := []struct {
name string
dir string

View File

@ -324,7 +324,7 @@ func buildModuleRelationships(
}
func (c *goModCataloger) parseModFileContents(reader file.LocationReadCloser) (*modfile.File, error) {
contents, err := io.ReadAll(reader)
contents, err := io.ReadAll(reader) //nolint:gocritic // modfile.Parse requires []byte
if err != nil {
return nil, fmt.Errorf("failed to read go module: %w", err)
}

View File

@ -3,7 +3,6 @@ package haskell
import (
"context"
"fmt"
"io"
"strings"
"go.yaml.in/yaml/v3"
@ -42,16 +41,11 @@ type completedSnapshot struct {
// parseStackLock is a parser function for stack.yaml.lock contents, returning all packages discovered.
func parseStackLock(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
bytes, err := io.ReadAll(reader)
if err != nil {
return nil, nil, fmt.Errorf("failed to load stack.yaml.lock file: %w", err)
}
var lockFile stackLock
if err := yaml.Unmarshal(bytes, &lockFile); err != nil {
if err := yaml.NewDecoder(reader).Decode(&lockFile); err != nil {
log.WithFields("error", err, "path", reader.RealPath).Trace("failed to parse stack.yaml.lock")
return nil, nil, fmt.Errorf("failed to parse stack.yaml.lock file")
return nil, nil, fmt.Errorf("failed to parse stack.yaml.lock file: %w", err)
}
var (

View File

@ -3,7 +3,6 @@ package haskell
import (
"context"
"fmt"
"io"
"go.yaml.in/yaml/v3"
@ -23,16 +22,11 @@ type stackYaml struct {
// parseStackYaml is a parser function for stack.yaml contents, returning all packages discovered.
func parseStackYaml(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
bytes, err := io.ReadAll(reader)
if err != nil {
return nil, nil, fmt.Errorf("failed to load stack.yaml file: %w", err)
}
var stackFile stackYaml
if err := yaml.Unmarshal(bytes, &stackFile); err != nil {
if err := yaml.NewDecoder(reader).Decode(&stackFile); err != nil {
log.WithFields("error", err, "path", reader.RealPath).Trace("failed to parse stack.yaml")
return nil, nil, fmt.Errorf("failed to parse stack.yaml file")
return nil, nil, fmt.Errorf("failed to parse stack.yaml file: %w", err)
}
var pkgs []pkg.Package

View File

@ -270,7 +270,7 @@ func (c *NVDAPIClient) fetchPage(ctx context.Context, startIndex int, start, end
// handleRateLimit handles HTTP 429 responses by parsing Retry-After and waiting
func (c *NVDAPIClient) handleRateLimit(ctx context.Context, httpResp *http.Response, attempt int) error {
body, _ := io.ReadAll(httpResp.Body)
body, _ := io.ReadAll(httpResp.Body) //nolint:gocritic // offline code generator
httpResp.Body.Close()
// parse Retry-After header
@ -298,7 +298,7 @@ func checkHTTPStatus(httpResp *http.Response) error {
if httpResp.StatusCode == http.StatusOK {
return nil
}
body, _ := io.ReadAll(httpResp.Body)
body, _ := io.ReadAll(httpResp.Body) //nolint:gocritic // offline code generator
httpResp.Body.Close()
return fmt.Errorf("NVD API error (status %d): %s", httpResp.StatusCode, string(body))
}

View File

@ -24,6 +24,7 @@ import (
"github.com/anchore/syft/internal/cmptest"
"github.com/anchore/syft/internal/licenses"
"github.com/anchore/syft/internal/relationship"
"github.com/anchore/syft/internal/tmpdir"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/fileresolver"
@ -62,7 +63,7 @@ type CatalogTester struct {
skipTestObservations bool
}
func Context() context.Context {
func initLicenseScanner() {
once.Do(func() {
// most of the time in testing is initializing the scanner. Let's do that just once
sc := &licenses.ScannerConfig{Scanner: licensecheck.Scan, CoverageThreshold: 75}
@ -72,13 +73,20 @@ func Context() context.Context {
}
licenseScanner = &scanner
})
}
return licenses.SetContextLicenseScanner(context.Background(), *licenseScanner)
// Context returns a context with a shared license scanner and a TempDir backed by t.TempDir(),
// so cleanup is handled automatically when the test finishes.
func Context(t *testing.T) context.Context {
t.Helper()
initLicenseScanner()
td := tmpdir.FromPath(t.TempDir())
ctx := tmpdir.WithValue(context.Background(), td)
return licenses.SetContextLicenseScanner(ctx, *licenseScanner)
}
func NewCatalogTester() *CatalogTester {
return &CatalogTester{
context: Context(),
locationComparer: cmptest.DefaultLocationComparer,
licenseComparer: cmptest.DefaultLicenseComparer,
packageStringer: stringPackage,
@ -272,9 +280,18 @@ func (p *CatalogTester) WithoutTestObserver() *CatalogTester {
return p
}
func (p *CatalogTester) ensureContext(t *testing.T) context.Context {
t.Helper()
if p.context != nil {
return p.context
}
return Context(t)
}
func (p *CatalogTester) TestParser(t *testing.T, parser generic.Parser) {
t.Helper()
pkgs, relationships, err := parser(p.context, p.resolver, p.env, p.reader)
ctx := p.ensureContext(t)
pkgs, relationships, err := parser(ctx, p.resolver, p.env, p.reader)
// only test for errors if explicitly requested
if p.wantErr != nil {
@ -289,10 +306,11 @@ func (p *CatalogTester) TestParser(t *testing.T, parser generic.Parser) {
func (p *CatalogTester) TestCataloger(t *testing.T, cataloger pkg.Cataloger) {
t.Helper()
ctx := p.ensureContext(t)
resolver := NewObservingResolver(p.resolver)
pkgs, relationships, err := cataloger.Catalog(p.context, resolver)
pkgs, relationships, err := cataloger.Catalog(ctx, resolver)
// this is a minimum set, the resolver may return more that just this list
for _, path := range p.expectedPathResponses {

View File

@ -18,6 +18,7 @@ import (
"github.com/anchore/syft/internal"
intFile "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/tmpdir"
"github.com/anchore/syft/internal/unknown"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
@ -104,7 +105,11 @@ func newJavaArchiveParser(ctx context.Context, reader file.LocationReadCloser, d
virtualElements := strings.Split(reader.Path(), ":")
currentFilepath := virtualElements[len(virtualElements)-1]
contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(currentFilepath, reader)
td := tmpdir.FromContext(ctx)
if td == nil {
return nil, func() {}, fmt.Errorf("no temp dir factory in context")
}
contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(td, currentFilepath, reader)
if err != nil {
return nil, cleanupFn, fmt.Errorf("unable to process java archive: %w", err)
}

View File

@ -2,7 +2,6 @@ package java
import (
"bufio"
"context"
"fmt"
"io"
"os"
@ -30,7 +29,7 @@ import (
func TestSearchMavenForLicenses(t *testing.T) {
url := maventest.MockRepo(t, "internal/maven/testdata/maven-repo")
ctx := pkgtest.Context()
ctx := pkgtest.Context(t)
tests := []struct {
name string
@ -72,7 +71,7 @@ func TestSearchMavenForLicenses(t *testing.T) {
require.NoError(t, err)
// setup parser
ap, cleanupFn, err := newJavaArchiveParser(context.Background(),
ap, cleanupFn, err := newJavaArchiveParser(pkgtest.Context(t),
file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture,
@ -81,17 +80,17 @@ func TestSearchMavenForLicenses(t *testing.T) {
require.NoError(t, err)
// assert licenses are discovered from upstream
_, _, _, parsedPom := ap.discoverMainPackageFromPomInfo(context.Background())
_, _, _, parsedPom := ap.discoverMainPackageFromPomInfo(pkgtest.Context(t))
require.NotNil(t, parsedPom, "expected to find pom information in the fixture")
require.NotNil(t, parsedPom.project, "expected parsedPom to have a project")
resolvedLicenses, _ := ap.maven.ResolveLicenses(context.Background(), parsedPom.project)
resolvedLicenses, _ := ap.maven.ResolveLicenses(pkgtest.Context(t), parsedPom.project)
assert.Equal(t, tc.expectedLicenses, toPkgLicenses(ctx, nil, resolvedLicenses))
})
}
}
func TestParseJar(t *testing.T) {
ctx := pkgtest.Context()
ctx := pkgtest.Context(t)
tests := []struct {
name string
fixture string
@ -372,7 +371,7 @@ func TestParseJar(t *testing.T) {
UseNetwork: false,
UseMavenLocalRepository: false,
}
parser, cleanupFn, err := newJavaArchiveParser(context.Background(),
parser, cleanupFn, err := newJavaArchiveParser(pkgtest.Context(t),
file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture,
@ -662,7 +661,7 @@ func TestParseNestedJar(t *testing.T) {
require.NoError(t, err)
gap := newGenericArchiveParserAdapter(ArchiveCatalogerConfig{})
actual, _, err := gap.processJavaArchive(context.Background(), file.LocationReadCloser{
actual, _, err := gap.processJavaArchive(pkgtest.Context(t), file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture,
}, nil)
@ -1080,7 +1079,7 @@ func Test_newPackageFromMavenData(t *testing.T) {
test.expectedParent.Locations = locations
r := maven.NewResolver(nil, maven.DefaultConfig())
actualPackage := newPackageFromMavenData(context.Background(), r, test.props, test.project, test.parent, file.NewLocation(virtualPath))
actualPackage := newPackageFromMavenData(pkgtest.Context(t), r, test.props, test.project, test.parent, file.NewLocation(virtualPath))
if test.expectedPackage == nil {
require.Nil(t, actualPackage)
} else {
@ -1120,7 +1119,7 @@ func Test_artifactIDMatchesFilename(t *testing.T) {
}
func Test_parseJavaArchive_regressions(t *testing.T) {
ctx := context.TODO()
ctx := pkgtest.Context(t)
apiAll := pkg.Package{
Name: "api-all",
Version: "2.0.0",
@ -1499,7 +1498,7 @@ func Test_deterministicMatchingPomProperties(t *testing.T) {
fixture, err := os.Open(fixturePath)
require.NoError(t, err)
parser, cleanupFn, err := newJavaArchiveParser(context.Background(),
parser, cleanupFn, err := newJavaArchiveParser(pkgtest.Context(t),
file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture,
@ -1507,7 +1506,7 @@ func Test_deterministicMatchingPomProperties(t *testing.T) {
defer cleanupFn()
require.NoError(t, err)
groupID, artifactID, version, _ := parser.discoverMainPackageFromPomInfo(context.TODO())
groupID, artifactID, version, _ := parser.discoverMainPackageFromPomInfo(pkgtest.Context(t))
require.Equal(t, test.expected, maven.NewID(groupID, artifactID, version))
}()
}
@ -1634,9 +1633,9 @@ func Test_jarPomPropertyResolutionDoesNotPanic(t *testing.T) {
fixture, err := os.Open(jarName)
require.NoError(t, err)
ctx := context.TODO()
ctx := pkgtest.Context(t)
// setup parser
ap, cleanupFn, err := newJavaArchiveParser(context.Background(),
ap, cleanupFn, err := newJavaArchiveParser(pkgtest.Context(t),
file.LocationReadCloser{
Location: file.NewLocation(fixture.Name()),
ReadCloser: fixture,

View File

@ -40,7 +40,7 @@ func ParsePomXML(content io.Reader) (project *Project, err error) {
}
func getUtf8Reader(content io.Reader) (io.Reader, error) {
pomContents, err := io.ReadAll(content)
pomContents, err := io.ReadAll(content) //nolint:gocritic // charset detection requires full buffer
if err != nil {
return nil, err
}

View File

@ -463,7 +463,7 @@ func (r *Resolver) cacheResolveReader(key string, resolve func() (io.ReadCloser,
defer internal.CloseAndLogError(contentReader, key)
// store the contents to return a new reader with the same content
contents, err := io.ReadAll(contentReader)
contents, err := io.ReadAll(contentReader) //nolint:gocritic // caching requires full buffer
if err != nil {
return nil, err
}

View File

@ -1,7 +1,6 @@
package java
import (
"context"
"os"
"testing"
@ -194,7 +193,7 @@ func Test_parseCommonsTextPomXMLProject(t *testing.T) {
func Test_parsePomXMLProject(t *testing.T) {
// TODO: ideally we would have the path to the contained pom.xml, not the jar
jarLocation := file.NewLocation("path/to/archive.jar")
ctx := context.TODO()
ctx := pkgtest.Context(t)
tests := []struct {
name string
project *pkg.JavaPomProject
@ -265,11 +264,11 @@ func Test_parsePomXMLProject(t *testing.T) {
pom, err := maven.ParsePomXML(fixture)
require.NoError(t, err)
actual := newPomProject(context.Background(), r, fixture.Name(), pom)
actual := newPomProject(pkgtest.Context(t), r, fixture.Name(), pom)
assert.NoError(t, err)
assert.Equal(t, test.project, actual)
licenses, err := r.ResolveLicenses(context.Background(), pom)
licenses, err := r.ResolveLicenses(pkgtest.Context(t), pom)
//assert.NoError(t, err)
assert.Equal(t, test.licenses, toPkgLicenses(ctx, &jarLocation, licenses))
})
@ -331,7 +330,7 @@ func Test_pomParent(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
r := maven.NewResolver(nil, maven.DefaultConfig())
assert.Equal(t, test.expected, pomParent(context.Background(), r, &maven.Project{Parent: test.input}))
assert.Equal(t, test.expected, pomParent(pkgtest.Context(t), r, &maven.Project{Parent: test.input}))
})
}
}
@ -433,7 +432,7 @@ func Test_resolveLicenses(t *testing.T) {
fr, err := ds.FileResolver(source.AllLayersScope)
require.NoError(t, err)
ctx := context.TODO()
ctx := pkgtest.Context(t)
pkgs, _, err := cat.Catalog(ctx, fr)
require.NoError(t, err)

View File

@ -6,23 +6,16 @@ import (
"os"
"path/filepath"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/tmpdir"
)
func saveArchiveToTmp(archiveVirtualPath string, reader io.Reader) (string, string, func(), error) {
func saveArchiveToTmp(td *tmpdir.TempDir, archiveVirtualPath string, reader io.Reader) (string, string, func(), error) {
name := filepath.Base(archiveVirtualPath)
tempDir, err := os.MkdirTemp("", "syft-archive-contents-")
tempDir, cleanupFn, err := td.NewChild("archive-contents") //nolint:gocritic // cleanup is returned to caller, not deferred here
if err != nil {
return "", "", func() {}, fmt.Errorf("unable to create tempdir for archive processing: %w", err)
}
cleanupFn := func() {
err = os.RemoveAll(tempDir)
if err != nil {
log.Errorf("unable to cleanup archive tempdir: %+v", err)
}
}
archivePath := filepath.Join(tempDir, "archive-"+name)
contentDir := filepath.Join(tempDir, "contents")
@ -37,6 +30,9 @@ func saveArchiveToTmp(archiveVirtualPath string, reader io.Reader) (string, stri
}
defer archiveFile.Close()
// note: no size limit here — the reader comes from a file already enumerated by the
// resolver, not from an untrusted network source. The file size is bounded by the
// source image/directory being scanned.
_, err = io.Copy(archiveFile, reader)
if err != nil {
return contentDir, archivePath, cleanupFn, fmt.Errorf("unable to copy archive: %w", err)

View File

@ -5,6 +5,7 @@ import (
"fmt"
intFile "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/tmpdir"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
@ -58,7 +59,11 @@ func newGenericTarWrappedJavaArchiveParser(cfg ArchiveCatalogerConfig) genericTa
}
func (gtp genericTarWrappedJavaArchiveParser) parseTarWrappedJavaArchive(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(reader.Path(), reader)
td := tmpdir.FromContext(ctx)
if td == nil {
return nil, nil, fmt.Errorf("no temp dir factory in context")
}
contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(td, reader.Path(), reader)
// note: even on error, we should always run cleanup functions
defer cleanupFn()
if err != nil {

View File

@ -1,7 +1,6 @@
package java
import (
"context"
"os"
"path"
"testing"
@ -50,7 +49,7 @@ func Test_parseTarWrappedJavaArchive(t *testing.T) {
}
gtp := newGenericTarWrappedJavaArchiveParser(ArchiveCatalogerConfig{})
actualPkgs, _, err := gtp.parseTarWrappedJavaArchive(context.Background(), nil, nil, file.LocationReadCloser{
actualPkgs, _, err := gtp.parseTarWrappedJavaArchive(pkgtest.Context(t), nil, nil, file.LocationReadCloser{
Location: file.NewLocation(test.fixture),
ReadCloser: fixture,
})

View File

@ -5,6 +5,7 @@ import (
"fmt"
intFile "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/tmpdir"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
@ -30,7 +31,11 @@ func newGenericZipWrappedJavaArchiveParser(cfg ArchiveCatalogerConfig) genericZi
}
func (gzp genericZipWrappedJavaArchiveParser) parseZipWrappedJavaArchive(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(reader.Path(), reader)
td := tmpdir.FromContext(ctx)
if td == nil {
return nil, nil, fmt.Errorf("no temp dir factory in context")
}
contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(td, reader.Path(), reader)
// note: even on error, we should always run cleanup functions
defer cleanupFn()
if err != nil {

View File

@ -1,7 +1,6 @@
package java
import (
"context"
"os"
"path"
"testing"
@ -10,6 +9,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
)
func Test_parseZipWrappedJavaArchive(t *testing.T) {
@ -36,7 +36,7 @@ func Test_parseZipWrappedJavaArchive(t *testing.T) {
gzp := newGenericZipWrappedJavaArchiveParser(ArchiveCatalogerConfig{})
actualPkgs, _, err := gzp.parseZipWrappedJavaArchive(context.Background(), nil, nil, file.LocationReadCloser{
actualPkgs, _, err := gzp.parseZipWrappedJavaArchive(pkgtest.Context(t), nil, nil, file.LocationReadCloser{
Location: file.NewLocation(test.fixture),
ReadCloser: fixture,
})

View File

@ -4,7 +4,6 @@ import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"path"
@ -257,19 +256,12 @@ func getLicenseFromNpmRegistry(baseURL, packageName, version string) (string, er
}
}()
bytes, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("unable to parse package from npm registry: %w", err)
}
dec := json.NewDecoder(strings.NewReader(string(bytes)))
// Read "license" from the response
var license struct {
License string `json:"license"`
}
if err := dec.Decode(&license); err != nil {
if err := json.NewDecoder(resp.Body).Decode(&license); err != nil {
return "", fmt.Errorf("unable to parse license from npm registry: %w", err)
}
@ -324,14 +316,8 @@ func parseLicensesFromLocation(l file.Location, resolver file.Resolver, pkgFile
}
defer internal.CloseAndLogError(contentReader, l.RealPath)
contents, err := io.ReadAll(contentReader)
if err != nil {
log.Debugf("error reading file contents for %s: %v", pkgFile, err)
return nil, err
}
var pkgJSON packageJSON
err = json.Unmarshal(contents, &pkgJSON)
err = json.NewDecoder(contentReader).Decode(&pkgJSON)
if err != nil {
log.Debugf("error parsing %s: %v", pkgFile, err)
return nil, err

View File

@ -179,7 +179,7 @@ func newPnpmLockfileParser(version float64) pnpmLockfileParser {
// parsePnpmLock is the main parser function for pnpm-lock.yaml files.
func (a genericPnpmLockAdapter) parsePnpmLock(ctx context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
data, err := io.ReadAll(reader)
data, err := io.ReadAll(reader) //nolint:gocritic // multi-pass parse requires []byte
if err != nil {
return nil, nil, fmt.Errorf("failed to load pnpm-lock.yaml file: %w", err)
}

View File

@ -166,7 +166,8 @@ func findDevOnlyPkgs(yarnPkgs []yarnPackage, prodDeps, devDeps map[string]string
}
func parseYarnV1LockFile(reader io.ReadCloser) ([]yarnPackage, error) {
content, err := io.ReadAll(reader)
// TODO: refactor to use bufio.Scanner for streaming line-by-line parsing instead of reading the entire file
content, err := io.ReadAll(reader) //nolint:gocritic // stateful multi-line parser; candidate for streaming refactor
if err != nil {
return nil, fmt.Errorf("failed to read yarn.lock file: %w", err)
}
@ -265,7 +266,8 @@ func (a genericYarnLockAdapter) parseYarnLock(ctx context.Context, resolver file
return nil, nil, nil
}
data, err := io.ReadAll(reader)
// TODO: refactor to detect version from the first line via bufio.Scanner, then dispatch to a streaming parser
data, err := io.ReadAll(reader) //nolint:gocritic // two-pass parse: version detection then format-specific parsing
if err != nil {
return nil, nil, fmt.Errorf("failed to load yarn.lock file: %w", err)
}

View File

@ -39,7 +39,7 @@ var noReturn = rockspec{
// parseRockspec basic parser for rockspec
func parseRockspecData(reader io.Reader) (rockspec, error) {
data, err := io.ReadAll(reader)
data, err := io.ReadAll(reader) //nolint:gocritic // custom parser requires []byte
if err != nil {
return noReturn, err
}

View File

@ -46,7 +46,7 @@ func (c cataloger) Name() string {
func (c cataloger) Catalog(ctx context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {
// always try the DB cataloger first (based off of information recorded by actions taken by nix tooling)
pkgs, rels, err := c.dbParser.catalog(resolver)
pkgs, rels, err := c.dbParser.catalog(ctx, resolver)
if err != nil {
return nil, nil, fmt.Errorf("failed to catalog nix packages from database: %w", err)
}

View File

@ -1,8 +1,9 @@
package nix
import (
"bufio"
"context"
"fmt"
"io"
"path"
"strconv"
"strings"
@ -17,7 +18,7 @@ import (
const defaultSchema = 10
type dbProcessor func(config Config, dbLocation file.Location, resolver file.Resolver, catalogerName string) ([]pkg.Package, []artifact.Relationship, error)
type dbProcessor func(ctx context.Context, config Config, dbLocation file.Location, resolver file.Resolver, catalogerName string) ([]pkg.Package, []artifact.Relationship, error)
type dbCataloger struct {
config Config
@ -45,7 +46,7 @@ type dbPackageEntry struct {
Files []string
}
func (c dbCataloger) catalog(resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {
func (c dbCataloger) catalog(ctx context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {
dbLocs, err := resolver.FilesByGlob("**/nix/var/nix/db/db.sqlite")
if err != nil {
return nil, nil, fmt.Errorf("failed to find Nix database: %w", err)
@ -65,7 +66,7 @@ func (c dbCataloger) catalog(resolver file.Resolver) ([]pkg.Package, []artifact.
continue
}
newPkgs, newRelationships, err := parser(c.config, dbLoc, resolver, c.catalogerName)
newPkgs, newRelationships, err := parser(ctx, c.config, dbLoc, resolver, c.catalogerName)
if err != nil {
errs = unknown.Append(errs, dbLoc.Coordinates, err)
continue
@ -92,13 +93,13 @@ func (c dbCataloger) selectDBParser(dbLocation file.Location, resolver file.Reso
return c.schemaProcessor[defaultSchema], 0
}
contents, err := io.ReadAll(schemaContents)
if err != nil {
scanner := bufio.NewScanner(schemaContents)
if !scanner.Scan() {
log.WithFields("path", loc.RealPath).Tracef("failed to read Nix database schema file, assuming %d", defaultSchema)
return c.schemaProcessor[defaultSchema], 0
}
schema, err := strconv.Atoi(strings.TrimSpace(string(contents)))
schema, err := strconv.Atoi(strings.TrimSpace(scanner.Text()))
if err != nil {
log.WithFields("path", loc.RealPath).Tracef("failed to parse Nix database schema file, assuming %d", defaultSchema)
return c.schemaProcessor[defaultSchema], 0

View File

@ -1,6 +1,7 @@
package nix
import (
"context"
"database/sql"
"fmt"
"io"
@ -8,6 +9,7 @@ import (
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/tmpdir"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
@ -15,26 +17,33 @@ import (
var _ dbProcessor = processV10DB
func processV10DB(config Config, dbLocation file.Location, resolver file.Resolver, catalogerName string) ([]pkg.Package, []artifact.Relationship, error) {
func processV10DB(ctx context.Context, config Config, dbLocation file.Location, resolver file.Resolver, catalogerName string) ([]pkg.Package, []artifact.Relationship, error) {
dbContents, err := resolver.FileContentsByLocation(dbLocation)
defer internal.CloseAndLogError(dbContents, dbLocation.RealPath)
if err != nil {
return nil, nil, fmt.Errorf("unable to read Nix database: %w", err)
}
tempDB, err := createTempDB(dbContents)
td := tmpdir.FromContext(ctx)
if td == nil {
return nil, nil, fmt.Errorf("no temp dir factory in context")
}
tempDB, cleanupDB, err := createTempDB(td, dbContents)
if err != nil {
return nil, nil, fmt.Errorf("failed to create temporary database: %w", err)
}
defer os.RemoveAll(tempDB.Name())
// defer order is LIFO: cleanupDB (remove file) must run after db.Close and tempDB.Close
defer cleanupDB()
// close order is LIFO: db.Close() (SQLite conn) → tempDB.Close() (file handle) → cleanupDB (remove file)
defer tempDB.Close()
db, err := sql.Open("sqlite", tempDB.Name())
if err != nil {
return nil, nil, fmt.Errorf("failed to open database: %w", err)
}
db.SetConnMaxLifetime(0)
defer db.Close()
db.SetConnMaxLifetime(0)
packageEntries, err := extractV10DBPackages(config, db, dbLocation, resolver)
if err != nil {
@ -232,18 +241,20 @@ func finalizeV10DBResults(db *sql.DB, packageEntries map[int]*dbPackageEntry, ca
return pkgs, relationships, nil
}
func createTempDB(content io.ReadCloser) (*os.File, error) {
tempFile, err := os.CreateTemp("", "nix-db-*.sqlite")
func createTempDB(td *tmpdir.TempDir, content io.ReadCloser) (*os.File, func(), error) {
noop := func() {}
tempFile, cleanup, err := td.NewFile("nix-db-*.sqlite") //nolint:gocritic // cleanup is returned to caller, not deferred here
if err != nil {
return nil, err
return nil, noop, err
}
_, err = io.Copy(tempFile, content)
if err != nil {
tempFile.Close()
os.Remove(tempFile.Name())
return nil, err
cleanup()
return nil, noop, err
}
return tempFile, nil
return tempFile, cleanup, nil
}

View File

@ -25,7 +25,7 @@ func parseOpamPackage(ctx context.Context, _ file.Resolver, _ *generic.Environme
homepageRe := regexp.MustCompile(`(?m)homepage:\s*"(?P<url>[^"]+)"`)
urlRe := regexp.MustCompile(`(?m)url\s*{(?P<url>[^}]+)}`)
data, err := io.ReadAll(reader)
data, err := io.ReadAll(reader) //nolint:gocritic // regex matching requires full buffer
if err != nil {
log.WithFields("error", err).Trace("unable to read opam package")
return nil, nil, nil

View File

@ -58,7 +58,7 @@ func parsePear(ctx context.Context, _ file.Resolver, _ *generic.Environment, rea
// parsePeclPearSerialized is a parser function for Pear metadata contents, returning "Default" php packages discovered.
func parsePeclPearSerialized(reader file.LocationReadCloser) (*peclPearData, error) {
data, err := io.ReadAll(reader)
data, err := io.ReadAll(reader) //nolint:gocritic // phpserialize requires []byte
if err != nil {
return nil, fmt.Errorf("failed to read file: %w", err)

View File

@ -473,7 +473,7 @@ func Test_PackageCataloger_IgnorePackage(t *testing.T) {
t.Run(test.MetadataFixture, func(t *testing.T) {
resolver := file.NewMockResolverForPaths(test.MetadataFixture)
actual, _, err := NewInstalledPackageCataloger().Catalog(pkgtest.Context(), resolver)
actual, _, err := NewInstalledPackageCataloger().Catalog(pkgtest.Context(t), resolver)
require.NoError(t, err)
if len(actual) != 0 {

View File

@ -4,10 +4,8 @@ import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/anchore/syft/internal/cache"
@ -100,13 +98,6 @@ func getLicenseFromPypiRegistry(baseURL, packageName, version string) (string, e
return "", fmt.Errorf("unable to get package from pypi registry")
}
bytes, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("unable to parse package from pypi registry: %w", err)
}
dec := json.NewDecoder(strings.NewReader(string(bytes)))
// Read "license" from the response
var pypiResponse struct {
Info struct {
@ -115,7 +106,7 @@ func getLicenseFromPypiRegistry(baseURL, packageName, version string) (string, e
} `json:"info"`
}
if err := dec.Decode(&pypiResponse); err != nil {
if err := json.NewDecoder(resp.Body).Decode(&pypiResponse); err != nil {
return "", fmt.Errorf("unable to parse license from pypi registry: %w", err)
}

View File

@ -143,7 +143,7 @@ func (ulp uvLockParser) uvLockPackages(ctx context.Context, reader file.Location
var parsedLockFileVersion uvLockFileVersion
// we cannot use the reader twice, so we read the contents first --uv.lock files tend to be small enough
contents, err := io.ReadAll(reader)
contents, err := io.ReadAll(reader) //nolint:gocritic // multi-pass parse requires []byte
if err != nil {
return nil, unknown.New(reader.Location, fmt.Errorf("failed to read uv lock file: %w", err))
}

View File

@ -4,7 +4,6 @@ import (
"bufio"
"context"
"encoding/json"
"io"
"path"
"path/filepath"
"strings"
@ -174,13 +173,8 @@ func fetchDirectURLData(resolver file.Resolver, metadataLocation file.Location)
}
defer internal.CloseAndLogError(directURLContents, directURLLocation.AccessPath)
buffer, err := io.ReadAll(directURLContents)
if err != nil {
return nil, nil, err
}
var directURLJson directURLOrigin
if err := json.Unmarshal(buffer, &directURLJson); err != nil {
if err := json.NewDecoder(directURLContents).Decode(&directURLJson); err != nil {
return nil, nil, err
}

View File

@ -5,11 +5,11 @@ import (
"errors"
"fmt"
"io"
"os"
"strings"
rpmdb "github.com/anchore/go-rpmdb/pkg"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/tmpdir"
"github.com/anchore/syft/internal/unknown"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
@ -22,21 +22,16 @@ import (
//
//nolint:funlen
func parseRpmDB(ctx context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
f, err := os.CreateTemp("", "rpmdb")
td := tmpdir.FromContext(ctx)
if td == nil {
return nil, nil, fmt.Errorf("no temp dir factory in context")
}
f, cleanup, err := td.NewFile("rpmdb-*")
if err != nil {
return nil, nil, fmt.Errorf("failed to create temp rpmdb file: %w", err)
}
defer func() {
err = f.Close()
if err != nil {
log.Errorf("failed to close temp rpmdb file: %+v", err)
}
err = os.Remove(f.Name())
if err != nil {
log.Errorf("failed to remove temp rpmdb file: %+v", err)
}
}()
defer cleanup()
defer f.Close()
_, err = io.Copy(f, reader)
if err != nil {

View File

@ -85,6 +85,6 @@ func adaptToReadSeeker(reader io.Reader) (io.ReadSeeker, error) {
log.Debug("SBOM cataloger reader is not a ReadSeeker, reading entire SBOM into memory")
var buff bytes.Buffer
_, err := io.Copy(&buff, reader)
_, err := io.Copy(&buff, reader) //nolint:gocritic // buffering to ReadSeeker
return bytes.NewReader(buff.Bytes()), err
}

View File

@ -3,7 +3,6 @@ package swift
import (
"context"
"fmt"
"io"
"strings"
"go.yaml.in/yaml/v3"
@ -28,12 +27,8 @@ type podfileLock struct {
// parsePodfileLock is a parser function for Podfile.lock contents, returning all cocoapods pods discovered.
func parsePodfileLock(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
bytes, err := io.ReadAll(reader)
if err != nil {
return nil, nil, fmt.Errorf("unable to read file: %w", err)
}
var podfile podfileLock
if err = yaml.Unmarshal(bytes, &podfile); err != nil {
if err := yaml.NewDecoder(reader).Decode(&podfile); err != nil {
return nil, nil, fmt.Errorf("unable to parse yaml: %w", err)
}

View File

@ -20,7 +20,7 @@ func parsePackPackage(ctx context.Context, resolver file.Resolver, _ *generic.En
homeRe := regexp.MustCompile(`home\(\s*'([^']+)'\s*\)`)
authorRe := regexp.MustCompile(`(author|packager)\(\s*'([^']+)'\s*(?:,\s*'([^']+)'\s*)?\)`)
data, err := io.ReadAll(reader)
data, err := io.ReadAll(reader) //nolint:gocritic // regex matching requires full buffer
if err != nil {
log.WithFields("error", err).Trace("unable to parse Rockspec app")
return nil, nil, nil

View File

@ -20,7 +20,7 @@ type terraformLockFile struct {
func parseTerraformLock(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
var lockFile terraformLockFile
contents, err := io.ReadAll(reader)
contents, err := io.ReadAll(reader) //nolint:gocritic // hclsimple.Decode requires []byte
if err != nil {
return nil, nil, fmt.Errorf("failed to read terraform lock file: %w", err)
}

View File

@ -411,7 +411,7 @@ func (b *licenseBuilder) licenseFromContentHash(content string) License {
}
func contentFromReader(r io.Reader) (string, error) {
bytes, err := io.ReadAll(r)
bytes, err := io.ReadAll(r) //nolint:gocritic // reading license content for storage
if err != nil {
return "", err
}

View File

@ -3,7 +3,6 @@ package snapsource
import (
"encoding/json"
"fmt"
"io"
"net/http"
"regexp"
"strconv"
@ -215,13 +214,8 @@ func (c *snapcraftClient) GetSnapDownloadURL(id snapIdentity) (string, error) {
return "", fmt.Errorf("API request failed with status code %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("failed to read response body: %w", err)
}
var info snapcraftInfo
if err := json.Unmarshal(body, &info); err != nil {
if err := json.NewDecoder(resp.Body).Decode(&info); err != nil {
return "", fmt.Errorf("failed to parse JSON response: %w", err)
}
@ -259,13 +253,8 @@ func (c *snapcraftClient) CheckSnapExists(snapName string) (bool, string, error)
return false, "", fmt.Errorf("find API request failed with status code %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return false, "", fmt.Errorf("failed to read find response body: %w", err)
}
var findResp snapFindResponse
if err := json.Unmarshal(body, &findResp); err != nil {
if err := json.NewDecoder(resp.Body).Decode(&findResp); err != nil {
return false, "", fmt.Errorf("failed to parse find JSON response: %w", err)
}

View File

@ -24,6 +24,50 @@ func isPtr(ctx *dsl.VarFilterContext) bool {
return strings.HasPrefix(ctx.Type.String(), "*") || strings.HasPrefix(ctx.Type.Underlying().String(), "*")
}
// nolint:unused
func noUnboundedReads(m dsl.Matcher) {
// flag io.ReadAll where the argument is not already wrapped in io.LimitReader
m.Match(`io.ReadAll($reader)`).
Where(!m["reader"].Text.Matches(`(?i)LimitReader|LimitedReader`)).
Report("do not use unbounded io.ReadAll; wrap the reader with io.LimitReader or use a streaming parser")
// flag io.Copy only when the destination is an in-memory buffer
// io.Copy to files, hash writers, encoders, etc. is streaming and safe
m.Match(`io.Copy($dst, $src)`).
Where((m["dst"].Type.Is(`*bytes.Buffer`) || m["dst"].Type.Is(`*strings.Builder`)) && !m["src"].Text.Matches(`(?i)LimitReader|LimitedReader`)).
Report("do not use unbounded io.Copy to in-memory buffer; wrap the source reader with io.LimitReader")
}
// nolint:unused
func noDirectTempFiles(m dsl.Matcher) {
// catalogers must use tmpdir.FromContext(ctx) instead of creating temp files/dirs directly,
// so that all temp storage is centrally managed and cleaned up
m.Match(
`os.CreateTemp($*_)`,
`os.MkdirTemp($*_)`,
).
Where(m.File().PkgPath.Matches(`/cataloger/`)).
Report("do not use os.CreateTemp/os.MkdirTemp in catalogers; use tmpdir.FromContext(ctx) instead")
}
// nolint:unused
func tmpCleanupDeferred(m dsl.Matcher) {
// ensure the cleanup function returned by NewFile/NewChild is deferred, not discarded
m.Match(
`$_, $cleanup, $err := $x.NewFile($*_); if $*_ { $*_ }; $next`,
`$_, $cleanup, $err = $x.NewFile($*_); if $*_ { $*_ }; $next`,
).
Where(!m["next"].Text.Matches(`^defer `)).
Report("defer the cleanup function returned by NewFile immediately after the error check")
m.Match(
`$_, $cleanup, $err := $x.NewChild($*_); if $*_ { $*_ }; $next`,
`$_, $cleanup, $err = $x.NewChild($*_); if $*_ { $*_ }; $next`,
).
Where(!m["next"].Text.Matches(`^defer `)).
Report("defer the cleanup function returned by NewChild immediately after the error check")
}
// nolint:unused
func packagesInRelationshipsAsValues(m dsl.Matcher) {
m.Import("github.com/anchore/syft/syft/artifact")