From cf0c2c2dd652771c76612311872391446e447d23 Mon Sep 17 00:00:00 2001 From: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> Date: Tue, 30 Jun 2026 15:26:22 -0400 Subject: [PATCH] fix: name safetensors models even when the run has a partial parse error safeTensorsMergeProcessor used to early-return whenever it was handed a non-nil error. Returning early left them nameless, and missing-name compliance then silently dropped them, so a single unparseable file could erase every otherwise-valid model from the SBOM. The processor now always names/drops and propagates the error instead. Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> --- syft/pkg/cataloger/ai/parse_safetensors_test.go | 15 +++++++++++---- syft/pkg/cataloger/ai/processor.go | 13 +++++++++---- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/syft/pkg/cataloger/ai/parse_safetensors_test.go b/syft/pkg/cataloger/ai/parse_safetensors_test.go index 14cfa7762..f47d472be 100644 --- a/syft/pkg/cataloger/ai/parse_safetensors_test.go +++ b/syft/pkg/cataloger/ai/parse_safetensors_test.go @@ -600,12 +600,19 @@ spdx-id: Apache-2.0 assertHasLicense(t, out[0], "Apache-2.0") }) - t.Run("passes through upstream error", func(t *testing.T) { + t.Run("names packages and still propagates an upstream error", func(t *testing.T) { + // A non-nil upstream error (e.g. another file in the run failed to parse) + // must NOT short-circuit naming. The package is still nameless at this + // point; skipping the merge would let it flow downstream where missing-name + // compliance silently drops it, turning one bad file into the loss of every + // valid model. So we name/drop as usual and pass the error through. sentinel := assert.AnError p := dirPkg("/models/x/y.safetensors", pkg.SafeTensorsModelInfo{Format: "safetensors", MetadataHash: "h"}) - out, _, err := safeTensorsMergeProcessor(context.Background(), nil, []pkg.Package{p}, nil, sentinel) - assert.Equal(t, sentinel, err) - assert.Equal(t, []pkg.Package{p}, out) + resolver := file.NewMockResolverForPaths() // no config.json -> parent directory base name + out, _, err := safeTensorsMergeProcessor(context.Background(), resolver, []pkg.Package{p}, nil, sentinel) + assert.Equal(t, sentinel, err, "the upstream error is still propagated") + require.Len(t, out, 1) + assert.Equal(t, "x", out[0].Name, "the model is still named (from its parent directory) despite the error") }) } diff --git a/syft/pkg/cataloger/ai/processor.go b/syft/pkg/cataloger/ai/processor.go index c534eb885..fd0e29292 100644 --- a/syft/pkg/cataloger/ai/processor.go +++ b/syft/pkg/cataloger/ai/processor.go @@ -16,20 +16,25 @@ import ( // them per model, merges the per-shard metadata, resolves a name + licenses, and // drops any model it cannot name. func safeTensorsMergeProcessor(ctx context.Context, resolver file.Resolver, pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) { - if err != nil || len(pkgs) == 0 { + if len(pkgs) == 0 { return pkgs, rels, err } - // keep the processor robust if non-safetensors packages ever flow through + // Note: we do NOT early-return when err != nil. A non-nil err here means some + // file in the run failed to parse, but the successfully-parsed packages are + // still nameless until this processor names or drops them. Skipping that work + // would let those nameless packages flow downstream, where they are silently + // dropped by missing-name compliance — turning one bad file into the loss of + // every otherwise-valid model. So we always name/drop and propagate err. stPkgs, other := partitionSafeTensorsPackages(pkgs) if len(stPkgs) == 0 { return pkgs, rels, err } if fromOCIArtifact(stPkgs) { - return append(other, mergeOCIModel(ctx, resolver, stPkgs)...), rels, nil + return append(other, mergeOCIModel(ctx, resolver, stPkgs)...), rels, err } - return append(other, mergeDirModels(ctx, resolver, stPkgs)...), rels, nil + return append(other, mergeDirModels(ctx, resolver, stPkgs)...), rels, err } // partitionSafeTensorsPackages separates safetensors packages from anything else