fix: name safetensors models even when the run has a partial parse error

safeTensorsMergeProcessor used to early-return whenever it was handed a
non-nil error. Returning early left them nameless, and
missing-name compliance then silently dropped them, so a single unparseable
file could erase every otherwise-valid model from the SBOM. The processor
now always names/drops and propagates the error instead.

Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
Christopher Phillips 2026-06-30 15:26:22 -04:00
parent 7e482a26c6
commit cf0c2c2dd6
No known key found for this signature in database
2 changed files with 20 additions and 8 deletions

View File

@ -600,12 +600,19 @@ spdx-id: Apache-2.0
assertHasLicense(t, out[0], "Apache-2.0")
})
t.Run("passes through upstream error", func(t *testing.T) {
t.Run("names packages and still propagates an upstream error", func(t *testing.T) {
// A non-nil upstream error (e.g. another file in the run failed to parse)
// must NOT short-circuit naming. The package is still nameless at this
// point; skipping the merge would let it flow downstream where missing-name
// compliance silently drops it, turning one bad file into the loss of every
// valid model. So we name/drop as usual and pass the error through.
sentinel := assert.AnError
p := dirPkg("/models/x/y.safetensors", pkg.SafeTensorsModelInfo{Format: "safetensors", MetadataHash: "h"})
out, _, err := safeTensorsMergeProcessor(context.Background(), nil, []pkg.Package{p}, nil, sentinel)
assert.Equal(t, sentinel, err)
assert.Equal(t, []pkg.Package{p}, out)
resolver := file.NewMockResolverForPaths() // no config.json -> parent directory base name
out, _, err := safeTensorsMergeProcessor(context.Background(), resolver, []pkg.Package{p}, nil, sentinel)
assert.Equal(t, sentinel, err, "the upstream error is still propagated")
require.Len(t, out, 1)
assert.Equal(t, "x", out[0].Name, "the model is still named (from its parent directory) despite the error")
})
}

View File

@ -16,20 +16,25 @@ import (
// them per model, merges the per-shard metadata, resolves a name + licenses, and
// drops any model it cannot name.
func safeTensorsMergeProcessor(ctx context.Context, resolver file.Resolver, pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
if err != nil || len(pkgs) == 0 {
if len(pkgs) == 0 {
return pkgs, rels, err
}
// keep the processor robust if non-safetensors packages ever flow through
// Note: we do NOT early-return when err != nil. A non-nil err here means some
// file in the run failed to parse, but the successfully-parsed packages are
// still nameless until this processor names or drops them. Skipping that work
// would let those nameless packages flow downstream, where they are silently
// dropped by missing-name compliance — turning one bad file into the loss of
// every otherwise-valid model. So we always name/drop and propagate err.
stPkgs, other := partitionSafeTensorsPackages(pkgs)
if len(stPkgs) == 0 {
return pkgs, rels, err
}
if fromOCIArtifact(stPkgs) {
return append(other, mergeOCIModel(ctx, resolver, stPkgs)...), rels, nil
return append(other, mergeOCIModel(ctx, resolver, stPkgs)...), rels, err
}
return append(other, mergeDirModels(ctx, resolver, stPkgs)...), rels, nil
return append(other, mergeDirModels(ctx, resolver, stPkgs)...), rels, err
}
// partitionSafeTensorsPackages separates safetensors packages from anything else