mirror of
https://github.com/anchore/syft.git
synced 2026-07-04 18:18:26 +02:00
fix: name safetensors models even when the run has a partial parse error
safeTensorsMergeProcessor used to early-return whenever it was handed a non-nil error. Returning early left them nameless, and missing-name compliance then silently dropped them, so a single unparseable file could erase every otherwise-valid model from the SBOM. The processor now always names/drops and propagates the error instead. Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
parent
7e482a26c6
commit
cf0c2c2dd6
@ -600,12 +600,19 @@ spdx-id: Apache-2.0
|
||||
assertHasLicense(t, out[0], "Apache-2.0")
|
||||
})
|
||||
|
||||
t.Run("passes through upstream error", func(t *testing.T) {
|
||||
t.Run("names packages and still propagates an upstream error", func(t *testing.T) {
|
||||
// A non-nil upstream error (e.g. another file in the run failed to parse)
|
||||
// must NOT short-circuit naming. The package is still nameless at this
|
||||
// point; skipping the merge would let it flow downstream where missing-name
|
||||
// compliance silently drops it, turning one bad file into the loss of every
|
||||
// valid model. So we name/drop as usual and pass the error through.
|
||||
sentinel := assert.AnError
|
||||
p := dirPkg("/models/x/y.safetensors", pkg.SafeTensorsModelInfo{Format: "safetensors", MetadataHash: "h"})
|
||||
out, _, err := safeTensorsMergeProcessor(context.Background(), nil, []pkg.Package{p}, nil, sentinel)
|
||||
assert.Equal(t, sentinel, err)
|
||||
assert.Equal(t, []pkg.Package{p}, out)
|
||||
resolver := file.NewMockResolverForPaths() // no config.json -> parent directory base name
|
||||
out, _, err := safeTensorsMergeProcessor(context.Background(), resolver, []pkg.Package{p}, nil, sentinel)
|
||||
assert.Equal(t, sentinel, err, "the upstream error is still propagated")
|
||||
require.Len(t, out, 1)
|
||||
assert.Equal(t, "x", out[0].Name, "the model is still named (from its parent directory) despite the error")
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@ -16,20 +16,25 @@ import (
|
||||
// them per model, merges the per-shard metadata, resolves a name + licenses, and
|
||||
// drops any model it cannot name.
|
||||
func safeTensorsMergeProcessor(ctx context.Context, resolver file.Resolver, pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
if err != nil || len(pkgs) == 0 {
|
||||
if len(pkgs) == 0 {
|
||||
return pkgs, rels, err
|
||||
}
|
||||
|
||||
// keep the processor robust if non-safetensors packages ever flow through
|
||||
// Note: we do NOT early-return when err != nil. A non-nil err here means some
|
||||
// file in the run failed to parse, but the successfully-parsed packages are
|
||||
// still nameless until this processor names or drops them. Skipping that work
|
||||
// would let those nameless packages flow downstream, where they are silently
|
||||
// dropped by missing-name compliance — turning one bad file into the loss of
|
||||
// every otherwise-valid model. So we always name/drop and propagate err.
|
||||
stPkgs, other := partitionSafeTensorsPackages(pkgs)
|
||||
if len(stPkgs) == 0 {
|
||||
return pkgs, rels, err
|
||||
}
|
||||
|
||||
if fromOCIArtifact(stPkgs) {
|
||||
return append(other, mergeOCIModel(ctx, resolver, stPkgs)...), rels, nil
|
||||
return append(other, mergeOCIModel(ctx, resolver, stPkgs)...), rels, err
|
||||
}
|
||||
return append(other, mergeDirModels(ctx, resolver, stPkgs)...), rels, nil
|
||||
return append(other, mergeDirModels(ctx, resolver, stPkgs)...), rels, err
|
||||
}
|
||||
|
||||
// partitionSafeTensorsPackages separates safetensors packages from anything else
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user