test: test cleanup

Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2026-07-05 02:28:25 +02:00 · 2026-06-01 21:49:15 -04:00 · 2026-06-01 21:49:15 -04:00 · 4352ac4691
commit 4352ac4691
parent e88d6d019e
4 changed files with 481 additions and 354 deletions
--- a/syft/pkg/cataloger/ai/parse_safetensors_test.go
+++ b/syft/pkg/cataloger/ai/parse_safetensors_test.go
@ -13,6 +13,7 @@ import (
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/anchore/syft/syft/artifact"
 	"github.com/anchore/syft/syft/file"
 	"github.com/anchore/syft/syft/pkg"
 	"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
@ -39,7 +40,7 @@ func buildSafeTensorsFile(t *testing.T, metadata map[string]string, tensors map[
 	return out
 }
-func TestSafeTensorsCataloger_singleFile(t *testing.T) {
+func TestSafeTensorsCataloger(t *testing.T) {
 	userMeta := map[string]string{"format": "pt"}
 	tensors := map[string]safeTensorsEntry{
 		"model.embed.weight": {DType: "BF16", Shape: []int64{1000, 16}, DataOffsets: []int64{0, 32000}},
@ -49,43 +50,61 @@ func TestSafeTensorsCataloger_singleFile(t *testing.T) {
 	// cataloger wires the header hash through to the package metadata.
 	wantHash := (&safeTensorsHeader{metadata: userMeta, tensors: tensors}).metadataHash()
-	dir := t.TempDir()
+	tests := []struct {
-	modelDir := filepath.Join(dir, "models")
+		name                  string
-	require.NoError(t, os.MkdirAll(modelDir, 0o755))
+		setup                 func(t *testing.T) string
-	require.NoError(t, os.WriteFile(filepath.Join(modelDir, "model.safetensors"), buildSafeTensorsFile(t, userMeta, tensors), 0o644))
+		expectedPackages      []pkg.Package
-	require.NoError(t, os.WriteFile(filepath.Join(modelDir, "config.json"),
+		expectedRelationships []artifact.Relationship
-		[]byte(`{"architectures":["LlamaForCausalLM"],"torch_dtype":"bfloat16","transformers_version":"4.40.0","_name_or_path":"meta-llama/Llama-3-8B"}`), 0o644))
+	}{
 	require.NoError(t, os.WriteFile(filepath.Join(modelDir, "README.md"),
 		[]byte("---\nlicense: Apache-2.0\nbase_model:\n  - meta-llama/Llama-3\n---\n# Llama 3\n"), 0o644))
 	expected := []pkg.Package{
 		{
-			Name: "Llama-3-8B",
+			name: "single-file model directory with config.json and README",
-			Type: pkg.ModelPkg,
+			setup: func(t *testing.T) string {
-			Licenses: pkg.NewLicenseSet(
+				dir := t.TempDir()
-				pkg.NewLicenseFromFields("Apache-2.0", "", nil),
+				modelDir := filepath.Join(dir, "models")
-			),
+				require.NoError(t, os.MkdirAll(modelDir, 0o755))
-			Metadata: pkg.SafeTensorsModelInfo{
+				require.NoError(t, os.WriteFile(filepath.Join(modelDir, "model.safetensors"), buildSafeTensorsFile(t, userMeta, tensors), 0o644))
-				Format:              "safetensors",
+				require.NoError(t, os.WriteFile(filepath.Join(modelDir, "config.json"),
-				Architecture:        "LlamaForCausalLM",
+					[]byte(`{"architectures":["LlamaForCausalLM"],"torch_dtype":"bfloat16","transformers_version":"4.40.0","_name_or_path":"meta-llama/Llama-3-8B"}`), 0o644))
-				Quantization:        "BF16",
+				require.NoError(t, os.WriteFile(filepath.Join(modelDir, "README.md"),
-				Parameters:          "16.26K",
+					[]byte("---\nlicense: Apache-2.0\nbase_model:\n  - meta-llama/Llama-3\n---\n# Llama 3\n"), 0o644))
-				TensorCount:         2,
+				return dir
-				TorchDtype:          "bfloat16",
+			},
-				TransformersVersion: "4.40.0",
+			expectedPackages: []pkg.Package{
-				ShardCount:          1,
+				{
-				UserMetadata:        pkg.KeyValues{{Key: "format", Value: "pt"}},
+					Name: "Llama-3-8B",
-				MetadataHash:        wantHash,
+					Type: pkg.ModelPkg,
 					Licenses: pkg.NewLicenseSet(
 						pkg.NewLicenseFromFields("Apache-2.0", "", nil),
 					),
 					Metadata: pkg.SafeTensorsModelInfo{
 						Format:              "safetensors",
 						Architecture:        "LlamaForCausalLM",
 						Quantization:        "BF16",
 						Parameters:          "16.26K",
 						TensorCount:         2,
 						TorchDtype:          "bfloat16",
 						TransformersVersion: "4.40.0",
 						ShardCount:          1,
 						UserMetadata:        pkg.KeyValues{{Key: "format", Value: "pt"}},
 						MetadataHash:        wantHash,
 					},
 				},
 			},
 		},
 	}
-	pkgtest.NewCatalogTester().
+	for _, tt := range tests {
-		FromDirectory(t, dir).
+		t.Run(tt.name, func(t *testing.T) {
-		Expects(expected, nil).
+			fixtureDir := tt.setup(t)
-		IgnoreLocationLayer().
+
-		IgnorePackageFields("FoundBy", "Locations").
+			pkgtest.NewCatalogTester().
-		TestCataloger(t, NewSafeTensorsCataloger())
+				FromDirectory(t, fixtureDir).
 				Expects(tt.expectedPackages, tt.expectedRelationships).
 				IgnoreLocationLayer().
 				IgnorePackageFields("FoundBy", "Locations").
 				TestCataloger(t, NewSafeTensorsCataloger())
 		})
 	}
 }
 // TestParseSafeTensorsOCIConfig covers the parser in isolation: it should emit
@ -94,31 +113,48 @@ func TestSafeTensorsCataloger_singleFile(t *testing.T) {
 // artifact. Naming and license resolution happen in the merge processor and are
 // tested separately under TestSafeTensorsMergeProcessor.
 func TestParseSafeTensorsOCIConfig(t *testing.T) {
-	t.Run("emits a nameless package with config-blob fields", func(t *testing.T) {
+	tests := []struct {
-		blob := []byte(`{"config":{"format":"safetensors","quantization":"Q4_K_M","parameters":"8B","size":"16.00GB","safetensors":{"tensor_count":291}}}`)
+		name             string
 		blob             string
 		expectedPackages []pkg.Package // nil => parser must emit nothing
 	}{
 		{
 			name: "emits a nameless package with config-blob fields",
 			blob: `{"config":{"format":"safetensors","quantization":"Q4_K_M","parameters":"8B","size":"16.00GB","safetensors":{"tensor_count":291}}}`,
 			expectedPackages: []pkg.Package{
 				{
 					// nameless: the merge processor assigns the name and resolves
 					// licenses. Config blobs carry no header content, so
 					// MetadataHash stays empty.
 					Type: pkg.ModelPkg,
 					Metadata: pkg.SafeTensorsModelInfo{
 						Format:       "safetensors",
 						Quantization: "Q4_K_M",
 						Parameters:   "8B",
 						TotalSize:    "16.00GB",
 						TensorCount:  291,
 					},
 				},
 			},
 		},
 		{
 			// non-safetensors formats emit nothing so the GGUF cataloger can claim
 			// the artifact.
 			name:             "ignores non-safetensors format",
 			blob:             `{"config":{"format":"gguf","quantization":"Q4_K_M"}}`,
 			expectedPackages: nil,
 		},
 	}
-		pkgs, _, err := parseSafeTensorsOCIConfig(context.Background(), nil, nil, configReader(blob))
+	for _, tt := range tests {
-		require.NoError(t, err)
+		t.Run(tt.name, func(t *testing.T) {
-		require.Len(t, pkgs, 1)
+			pkgtest.NewCatalogTester().
-
+				FromString("/config.json", tt.blob).
-		p := pkgs[0]
+				Expects(tt.expectedPackages, nil).
-		assert.Empty(t, p.Name, "config-blob parser must emit nameless; the merge processor names it")
+				IgnorePackageFields("FoundBy", "Locations").
-		assert.Empty(t, p.Licenses.ToSlice(), "license resolution belongs to the merge processor")
+				TestParser(t, parseSafeTensorsOCIConfig)
-		md := p.Metadata.(pkg.SafeTensorsModelInfo)
+		})
-		assert.Equal(t, "safetensors", md.Format)
+	}
 		assert.Equal(t, "Q4_K_M", md.Quantization)
 		assert.Equal(t, "8B", md.Parameters)
 		assert.Equal(t, "16.00GB", md.TotalSize)
 		assert.Equal(t, uint64(291), md.TensorCount)
 		assert.Empty(t, md.MetadataHash, "config blobs have no header content to hash")
 	})
 	t.Run("ignores non-safetensors format", func(t *testing.T) {
 		ggufBlob := []byte(`{"config":{"format":"gguf","quantization":"Q4_K_M"}}`)
 		pkgs, _, err := parseSafeTensorsOCIConfig(context.Background(), nil, nil, configReader(ggufBlob))
 		require.NoError(t, err)
 		assert.Empty(t, pkgs)
 	})
 }
 // TestSafeTensorsMergeProcessor exercises the merge processor directly with
@ -147,10 +183,10 @@ func TestSafeTensorsMergeProcessor(t *testing.T) {
 		}
 	}
-	t.Run("dir scan: dropped when no sibling config.json carries _name_or_path", func(t *testing.T) {
+	t.Run("dir scan: parent directory base name names the group when no config.json is present", func(t *testing.T) {
-		// Without a config.json the dir-scan path has no name source. There is
+		// Without a config.json the dir-scan path falls through to the
-		// intentionally no parent-dir fallback (or any opaque fallback), so the
+		// parent directory base name. hugginface style model dir is named after the
-		// group is dropped rather than named after the filesystem layout.
+		// model, so "/models/tiny-llama/weights.safetensors" → "tiny-llama".
 		p := dirPkg("/models/tiny-llama/weights.safetensors", pkg.SafeTensorsModelInfo{
 			Format:       "safetensors",
 			TensorCount:  4,
@ -160,33 +196,46 @@ func TestSafeTensorsMergeProcessor(t *testing.T) {
 		resolver := file.NewMockResolverForPaths() // no config.json / README available
 		out, _, err := safeTensorsMergeProcessor(context.Background(), resolver, []pkg.Package{p}, nil, nil)
 		require.NoError(t, err)
-		assert.Empty(t, out, "dir-scan group with no config.json must be dropped")
+		require.Len(t, out, 1)
 		assert.Equal(t, "tiny-llama", out[0].Name, "rung 2: parent directory base name")
 	})
-	t.Run("dir scan: Architecture-Parameters alone does not name the package", func(t *testing.T) {
+	t.Run("dir scan: nested model dirs group and name by immediate parent", func(t *testing.T) {
-		// Even with rich content-derived metadata (Architecture + Parameters),
+		top := dirPkg("/namea/1.safetensors", pkg.SafeTensorsModelInfo{
-		// the package must be dropped when there is no producer-declared name.
+			Format: "safetensors", TensorCount: 1, MetadataHash: "aaaa",
-		// The Arch-Params synthetic rung was removed because it produced labels
+		})
-		// like "LlamaForCausalLM-2.68B" that SBOM consumers couldn't trace back
+		nested := dirPkg("/namea/nameb/2.safetensors", pkg.SafeTensorsModelInfo{
-		// to a recognizable model.
+			Format: "safetensors", TensorCount: 1, MetadataHash: "bbbb",
 		p := dirPkg("/models/tiny/weights.safetensors", pkg.SafeTensorsModelInfo{
 			Format:       "safetensors",
 			Architecture: "LlamaForCausalLM",
 			Parameters:   "2.68B",
 			TensorCount:  4,
 			MetadataHash: "abc",
 		})
 		resolver := file.NewMockResolverForPaths()
 		out, _, err := safeTensorsMergeProcessor(context.Background(), resolver, []pkg.Package{top, nested}, nil, nil)
 		require.NoError(t, err)
 		require.Len(t, out, 2)
 		names := []string{out[0].Name, out[1].Name}
 		assert.ElementsMatch(t, []string{"namea", "nameb"}, names)
 	})
 	t.Run("dir scan: config.json _name_or_path beats the parent directory fallback", func(t *testing.T) {
 		// When a sibling config.json carries _name_or_path
 		dir := t.TempDir()
 		require.NoError(t, os.WriteFile(filepath.Join(dir, "config.json"),
 			[]byte(`{"_name_or_path":"org/preferred-name"}`), 0o644))
 		stPath := filepath.Join(dir, "weights.safetensors")
 		p := dirPkg(stPath, pkg.SafeTensorsModelInfo{
 			Format: "safetensors", TensorCount: 1, MetadataHash: "abc",
 		})
 		resolver := file.NewMockResolverForPaths(filepath.Join(dir, "config.json"))
 		out, _, err := safeTensorsMergeProcessor(context.Background(), resolver, []pkg.Package{p}, nil, nil)
 		require.NoError(t, err)
-		assert.Empty(t, out, "Arch-Params alone is not a name source")
+		require.Len(t, out, 1)
 		assert.Equal(t, "preferred-name", out[0].Name, "rung 1 (config.json) wins over rung 2 (parent dir)")
 	})
 	t.Run("OCI: dropped when no name source is available", func(t *testing.T) {
 		// The vllm-style shape: config-blob package + a weight-layer package,
 		// both at virtual path "/", no model.file companions on the resolver
-		// AND no image ref. With nothing to derive a name from, the group is
+		// AND no image ref. With nothing to derive a name from, the package is
-		// dropped — no opaque fallback.
+		// dropped
 		configMd := pkg.SafeTensorsModelInfo{
 			Format:      "safetensors",
 			TensorCount: 5,
@ -209,9 +258,7 @@ func TestSafeTensorsMergeProcessor(t *testing.T) {
 	t.Run("OCI: image-ref last segment names the group when config.json is absent", func(t *testing.T) {
 		// vllm-style artifact: a repacked model whose embedded config.json has
-		// been stripped of _name_or_path. The merge processor falls through to
+		// been stripped of _name_or_path.
 		// the second rung — the image-reference last segment — so we still emit
 		// a recognizable model name instead of dropping it.
 		configMd := pkg.SafeTensorsModelInfo{
 			Format:      "safetensors",
 			TensorCount: 290,
@ -235,75 +282,6 @@ func TestSafeTensorsMergeProcessor(t *testing.T) {
 		assert.Equal(t, "smollm2-vllm", out[0].Name, "rung 2: image-ref repository basename")
 	})
 	t.Run("OCI: config.json _name_or_path beats the image-ref fallback", func(t *testing.T) {
 		// When the embedded config.json carries _name_or_path, rung 1 wins over
 		// the image ref even if both are present.
 		dir := t.TempDir()
 		hfConfigPath := filepath.Join(dir, "config.json")
 		require.NoError(t, os.WriteFile(hfConfigPath,
 			[]byte(`{"_name_or_path":"org/preferred-name"}`), 0o644))
 		resolver := file.NewMockResolverForOCIArtifact(
 			"docker.io/ai/smollm2-vllm:360M",
 			map[string][]file.Location{
 				dockerAIModelFileMediaType: {file.NewLocation(hfConfigPath)},
 			},
 		)
 		configMd := pkg.SafeTensorsModelInfo{Format: "safetensors", TensorCount: 1}
 		out, _, err := safeTensorsMergeProcessor(
 			context.Background(), resolver,
 			[]pkg.Package{ociPkg(configMd)}, nil, nil,
 		)
 		require.NoError(t, err)
 		require.Len(t, out, 1)
 		assert.Equal(t, "preferred-name", out[0].Name, "rung 1 (config.json) wins over rung 2 (image ref)")
 	})
 	t.Run("OCI: merges config + shard and names from companion config.json", func(t *testing.T) {
 		// Write a single model.file companion blob containing HF config.json so
 		// the processor can derive _name_or_path and Architecture from it.
 		dir := t.TempDir()
 		hfConfigPath := filepath.Join(dir, "config.json")
 		require.NoError(t, os.WriteFile(hfConfigPath,
 			[]byte(`{"architectures":["Qwen3ForCausalLM"],"torch_dtype":"bfloat16","_name_or_path":"org/qwen-tiny"}`), 0o644))
 		resolver := file.NewMockResolverForMediaTypes(map[string][]file.Location{
 			dockerAIModelFileMediaType: {file.NewLocation(hfConfigPath)},
 		})
 		configMd := pkg.SafeTensorsModelInfo{
 			Format:       "safetensors",
 			Quantization: "Q4_K_M", // raw producer-declared value
 			Parameters:   "8B",
 			TotalSize:    "16.00GB",
 			TensorCount:  291,
 		}
 		shardMd := pkg.SafeTensorsModelInfo{
 			Format:       "safetensors",
 			TensorCount:  100, // per-shard count — must NOT be summed onto the aggregate's 291
 			Quantization: "BF16",
 			MetadataHash: "deadbeef",
 			UserMetadata: pkg.KeyValues{{Key: "format", Value: "pt"}},
 		}
 		out, _, err := safeTensorsMergeProcessor(
 			context.Background(), resolver,
 			[]pkg.Package{ociPkg(configMd), ociPkg(shardMd)}, nil, nil,
 		)
 		require.NoError(t, err)
 		require.Len(t, out, 1)
 		got := out[0]
 		assert.Equal(t, "qwen-tiny", got.Name, "name comes from path.Base(_name_or_path)")
 		md := got.Metadata.(pkg.SafeTensorsModelInfo)
 		assert.Equal(t, uint64(291), md.TensorCount, "aggregate TensorCount must win — never double-count by summing the shard")
 		assert.Equal(t, "16.00GB", md.TotalSize)
 		assert.Equal(t, "8B", md.Parameters)
 		assert.Equal(t, "Qwen3ForCausalLM", md.Architecture, "Architecture enriched from companion config.json")
 		assert.Equal(t, "bfloat16", md.TorchDtype)
 		assert.Equal(t, "Q4_K_M", md.Quantization, "aggregate Quantization wins over shard's normalized dtype when both present")
 		assert.Equal(t, "deadbeef", md.MetadataHash, "single-shard rollup is the lone shard's hash")
 		assert.Equal(t, pkg.KeyValues{{Key: "format", Value: "pt"}}, md.UserMetadata)
 		assert.Nil(t, md.Parts, "single-shard groups skip Parts; the outer view already exposes everything")
 	})
 	t.Run("OCI: multi-shard rollup hashes are stable and sorted", func(t *testing.T) {
 		dir := t.TempDir()
 		hfConfigPath := filepath.Join(dir, "config.json")
@ -389,20 +367,21 @@ spdx-id: Apache-2.0
 //
 // Precedence (highest → lowest):
 //  1. config.json _name_or_path  (path.Base applied; both dir-scan and OCI)
-//  2. OCI image-ref last segment (OCI only — empty string for dir scans)
+//  2. fallback name — OCI image-ref last segment, or dir-scan parent directory
 //     base name (the merge processor computes the right one per group)
 //     → drop (empty name) when nothing matches
 func TestSafeTensorsNamingPrecedence(t *testing.T) {
-	cases := []struct {
+	tests := []struct {
 		name         string
 		nameOrPath   string
-		imageRefName string
+		fallbackName string
 		want         string
 	}{
 		// rung 1
 		{
-			name:         "rung 1: _name_or_path beats the image-ref fallback",
+			name:         "rung 1: _name_or_path beats the fallback",
 			nameOrPath:   "org/MyModel",
-			imageRefName: "fallback-ref",
+			fallbackName: "fallback-name",
 			want:         "MyModel",
 		},
 		{
@ -418,10 +397,15 @@ func TestSafeTensorsNamingPrecedence(t *testing.T) {
 		// rung 2
 		{
-			name:         "rung 2: image-ref last segment used when _name_or_path is empty",
+			name:         "rung 2: OCI image-ref last segment used when _name_or_path is empty",
-			imageRefName: "smollm2-vllm",
+			fallbackName: "smollm2-vllm",
 			want:         "smollm2-vllm",
 		},
 		{
 			name:         "rung 2: dir-scan parent directory name used when _name_or_path is empty",
 			fallbackName: "tiny-llama",
 			want:         "tiny-llama",
 		},
 		// drops
 		{
@ -430,10 +414,31 @@ func TestSafeTensorsNamingPrecedence(t *testing.T) {
 		},
 	}
-	for _, tc := range cases {
+	for _, tt := range tests {
-		t.Run(tc.name, func(t *testing.T) {
+		t.Run(tt.name, func(t *testing.T) {
-			got := pickSafeTensorsName(tc.nameOrPath, tc.imageRefName)
+			got := pickSafeTensorsName(tt.nameOrPath, tt.fallbackName)
-			assert.Equal(t, tc.want, got)
+			assert.Equal(t, tt.want, got)
 		})
 	}
 }
 // TestSafeTensorsDirName covers the directory-scan fallback name derivation,
 // including the degenerate roots that must yield no name.
 func TestSafeTensorsDirName(t *testing.T) {
 	tests := []struct {
 		groupKey string
 		want     string
 	}{
 		{groupKey: "/models/tiny-llama", want: "tiny-llama"},
 		{groupKey: "/namea", want: "namea"},
 		{groupKey: "/namea/nameb", want: "nameb"},
 		{groupKey: "/", want: ""},
 		{groupKey: ".", want: ""},
 		{groupKey: "", want: ""},
 	}
 	for _, tt := range tests {
 		t.Run(tt.groupKey, func(t *testing.T) {
 			assert.Equal(t, tt.want, safeTensorsDirName(tt.groupKey))
 		})
 	}
 }
@ -449,19 +454,26 @@ func TestParseSafeTensorsOCILayer(t *testing.T) {
 	wantHash := (&safeTensorsHeader{metadata: userMeta, tensors: tensors}).metadataHash()
 	t.Run("emits a nameless package with header-derived metadata", func(t *testing.T) {
-		reader := file.NewLocationReadCloser(file.NewLocation("/"), io.NopCloser(bytes.NewReader(blob)))
+		// nameless: the merge processor assigns the name. Parameters is the
-		pkgs, _, err := parseSafeTensorsOCILayer(context.Background(), nil, nil, reader)
+		// summed element count of the two tensors (1024*16 + 16*16 = 16640).
-		require.NoError(t, err)
+		expected := []pkg.Package{
-		require.Len(t, pkgs, 1)
+			{
-
+				Type: pkg.ModelPkg,
-		p := pkgs[0]
+				Metadata: pkg.SafeTensorsModelInfo{
-		assert.Empty(t, p.Name, "weight-layer parser must emit nameless; the merge processor names it")
+					Format:       "safetensors",
-		md := p.Metadata.(pkg.SafeTensorsModelInfo)
+					Parameters:   "16.64K",
-		assert.Equal(t, "safetensors", md.Format)
+					Quantization: "BF16",
-		assert.Equal(t, uint64(2), md.TensorCount)
+					TensorCount:  2,
-		assert.Equal(t, "BF16", md.Quantization)
+					UserMetadata: wantUserMetadata,
-		assert.Equal(t, wantUserMetadata, md.UserMetadata)
+					MetadataHash: wantHash,
-		assert.Equal(t, wantHash, md.MetadataHash)
+				},
 			},
 		}
 		pkgtest.NewCatalogTester().
 			FromString("/", string(blob)).
 			Expects(expected, nil).
 			IgnorePackageFields("FoundBy", "Locations").
 			TestParser(t, parseSafeTensorsOCILayer)
 	})
 	t.Run("merged via processor: aggregate fields preserved, hash lifted from single shard", func(t *testing.T) {
@ -535,27 +547,30 @@ func TestParseSafeTensorsOCILayer(t *testing.T) {
 // Locking in the field values guards against changes to the header parser
 // silently breaking on real-world content shape.
 func TestParseSafeTensorsOCILayer_realFixture(t *testing.T) {
-	data, err := os.ReadFile(filepath.Join("testdata", "safetensors", "nomic-embed-475M.header.safetensors"))
+	// nameless before the merge processor runs. The fixture is immutable on disk;
-	require.NoError(t, err)
+	// the locked field values (notably MetadataHash) guard against the header
-	require.Greater(t, len(data), 8, "fixture must include the 8-byte length prefix")
+	// parser silently breaking on real-world content shape — if MetadataHash
 	// changes, either the hash algorithm or the canonicalization changed, both of
 	// which callers may rely on for cross-source identity.
 	expected := []pkg.Package{
 		{
 			Type: pkg.ModelPkg,
 			Metadata: pkg.SafeTensorsModelInfo{
 				Format:       "safetensors",
 				Parameters:   "475.29M",
 				Quantization: "F32", // every tensor in the captured shard is F32
 				TensorCount:  148,   // nomic-embed-v2-moe 475M ships 148 tensor entries in this shard
 				UserMetadata: pkg.KeyValues{{Key: "format", Value: "pt"}},
 				MetadataHash: "051a14e686673dea",
 			},
 		},
 	}
-	reader := file.NewLocationReadCloser(file.NewLocation("/"), io.NopCloser(bytes.NewReader(data)))
+	pkgtest.NewCatalogTester().
-	pkgs, _, err := parseSafeTensorsOCILayer(context.Background(), nil, nil, reader)
+		FromFile(t, filepath.Join("testdata", "safetensors", "nomic-embed-475M.header.safetensors")).
-	require.NoError(t, err)
+		Expects(expected, nil).
-	require.Len(t, pkgs, 1)
+		IgnorePackageFields("FoundBy", "Locations").
-	assert.Empty(t, pkgs[0].Name, "weight-layer packages are nameless before the merge processor runs")
+		TestParser(t, parseSafeTensorsOCILayer)
 	md := pkgs[0].Metadata.(pkg.SafeTensorsModelInfo)
 	assert.Equal(t, "safetensors", md.Format)
 	assert.Equal(t, uint64(148), md.TensorCount, "nomic-embed-v2-moe 475M ships 148 tensor entries in this shard")
 	assert.Equal(t, "F32", md.Quantization, "every tensor in the captured shard is F32")
 	assert.Equal(t, "475.29M", md.Parameters)
 	assert.Equal(t, pkg.KeyValues{{Key: "format", Value: "pt"}}, md.UserMetadata)
 	// MetadataHash is locked to the exact value the parser produces for this
 	// captured input. The fixture is immutable on disk; if this value changes
 	// either the hash algorithm or the canonicalization changed, both of which
 	// callers may rely on for cross-source identity.
 	assert.Equal(t, "051a14e686673dea", md.MetadataHash)
 }
 func TestSafeTensorsCrossSourceHashParity(t *testing.T) {
@ -595,10 +610,6 @@ func TestSafeTensorsCrossSourceHashParity(t *testing.T) {
 	assert.Equal(t, dirHash, ociHash, "same content via dir scan and OCI weight-layer scan must hash equal")
 }
 func configReader(blob []byte) file.LocationReadCloser {
 	return file.NewLocationReadCloser(file.NewLocation("/config.json"), io.NopCloser(bytes.NewReader(blob)))
 }
 func assertHasLicense(t *testing.T, p pkg.Package, value string) {
 	t.Helper()
 	for _, l := range p.Licenses.ToSlice() {
@ -610,28 +621,50 @@ func assertHasLicense(t *testing.T, p pkg.Package, value string) {
 }
 func TestReadSafeTensorsHeader(t *testing.T) {
-	t.Run("valid header", func(t *testing.T) {
+	zeroLength := make([]byte, 8) // length prefix of 0
-		data := buildSafeTensorsFile(t, map[string]string{"format": "pt"}, map[string]safeTensorsEntry{
+
-			"w": {DType: "F32", Shape: []int64{2, 2}, DataOffsets: []int64{0, 16}},
+	truncatedBody := make([]byte, 8)
 	binary.LittleEndian.PutUint64(truncatedBody, 100) // claims 100 bytes but supplies none
 	tests := []struct {
 		name    string
 		data    []byte
 		wantErr bool
 		assert  func(t *testing.T, h *safeTensorsHeader)
 	}{
 		{
 			name: "valid header",
 			data: buildSafeTensorsFile(t, map[string]string{"format": "pt"}, map[string]safeTensorsEntry{
 				"w": {DType: "F32", Shape: []int64{2, 2}, DataOffsets: []int64{0, 16}},
 			}),
 			assert: func(t *testing.T, h *safeTensorsHeader) {
 				assert.Len(t, h.tensors, 1)
 				assert.Equal(t, "pt", h.metadata["format"])
 			},
 		},
 		{
 			name:    "zero-length header",
 			data:    zeroLength,
 			wantErr: true,
 		},
 		{
 			name:    "truncated body",
 			data:    truncatedBody,
 			wantErr: true,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			h, err := readSafeTensorsHeader(bytes.NewReader(tt.data))
 			if tt.wantErr {
 				require.Error(t, err)
 				return
 			}
 			require.NoError(t, err)
 			tt.assert(t, h)
 		})
-		h, err := readSafeTensorsHeader(bytes.NewReader(data))
+	}
 		require.NoError(t, err)
 		assert.Len(t, h.tensors, 1)
 		assert.Equal(t, "pt", h.metadata["format"])
 	})
 	t.Run("zero-length header", func(t *testing.T) {
 		var buf [8]byte // length prefix of 0
 		_, err := readSafeTensorsHeader(bytes.NewReader(buf[:]))
 		require.Error(t, err)
 	})
 	t.Run("truncated body", func(t *testing.T) {
 		var buf [8]byte
 		binary.LittleEndian.PutUint64(buf[:], 100) // claims 100 bytes but supplies none
 		_, err := readSafeTensorsHeader(bytes.NewReader(buf[:]))
 		require.Error(t, err)
 	})
 }
 func TestSafeTensorsHeader_metadataHash(t *testing.T) {
@ -680,71 +713,112 @@ func TestSafeTensorsHeader_parameterCountAndDType(t *testing.T) {
 }
 func TestNormalizeDType(t *testing.T) {
-	cases := map[string]string{
+	tests := []struct {
-		"BF16":    "BF16",
+		name string
-		"float16": "F16",
+		in   string
-		"FP32":    "F32",
+		want string
-		"int8":    "I8",
+	}{
-		"U8":      "U8",
+		{name: "already canonical BF16", in: "BF16", want: "BF16"},
-		"bool":    "BOOL",
+		{name: "float16 alias", in: "float16", want: "F16"},
-		"weird":   "WEIRD",
+		{name: "FP32 alias", in: "FP32", want: "F32"},
 		{name: "int8 alias", in: "int8", want: "I8"},
 		{name: "U8 passthrough", in: "U8", want: "U8"},
 		{name: "bool", in: "bool", want: "BOOL"},
 		{name: "unknown value uppercased", in: "weird", want: "WEIRD"},
 	}
-	for in, want := range cases {
+	for _, tt := range tests {
-		assert.Equalf(t, want, normalizeDType(in), "normalizeDType(%q)", in)
+		t.Run(tt.name, func(t *testing.T) {
 			assert.Equal(t, tt.want, normalizeDType(tt.in))
 		})
 	}
 }
 func TestFormatParameterCount(t *testing.T) {
-	cases := map[uint64]string{
+	tests := []struct {
-		512:           "512",
+		name string
-		16256:         "16.26K",
+		in   uint64
-		2_680_000_000: "2.68B",
+		want string
-		35_000_000:    "35.00M",
+	}{
 		{name: "raw count under 1K", in: 512, want: "512"},
 		{name: "thousands", in: 16256, want: "16.26K"},
 		{name: "billions", in: 2_680_000_000, want: "2.68B"},
 		{name: "millions", in: 35_000_000, want: "35.00M"},
 	}
-	for in, want := range cases {
+	for _, tt := range tests {
-		assert.Equalf(t, want, formatParameterCount(in), "formatParameterCount(%d)", in)
+		t.Run(tt.name, func(t *testing.T) {
 			assert.Equal(t, tt.want, formatParameterCount(tt.in))
 		})
 	}
 }
 func TestParseFrontmatter(t *testing.T) {
-	t.Run("list base_model", func(t *testing.T) {
+	tests := []struct {
-		fm := parseFrontmatter([]byte("---\nlicense: mit\nbase_model:\n  - org/Model\n---\nbody"))
+		name          string
-		require.NotNil(t, fm)
+		input         string
-		assert.Equal(t, "mit", fm.License)
+		wantNil       bool
-		assert.Equal(t, []string{"org/Model"}, fm.BaseModel)
+		wantLicense   string
-	})
+		wantBaseModel []string
 	}{
 		{
 			name:          "list base_model",
 			input:         "---\nlicense: mit\nbase_model:\n  - org/Model\n---\nbody",
 			wantLicense:   "mit",
 			wantBaseModel: []string{"org/Model"},
 		},
 		{
 			name:          "scalar base_model",
 			input:         "---\nlicense: apache-2.0\nbase_model: org/Model\n---\n",
 			wantLicense:   "apache-2.0",
 			wantBaseModel: []string{"org/Model"},
 		},
 		{
 			name:        "leading BOM",
 			input:       "\xef\xbb\xbf---\nlicense: mit\n---\n",
 			wantLicense: "mit",
 		},
 		{
 			name:    "no frontmatter",
 			input:   "# just a heading\n",
 			wantNil: true,
 		},
 		{
 			name:    "unterminated frontmatter",
 			input:   "---\nlicense: mit\n",
 			wantNil: true,
 		},
 	}
-	t.Run("scalar base_model", func(t *testing.T) {
+	for _, tt := range tests {
-		fm := parseFrontmatter([]byte("---\nlicense: apache-2.0\nbase_model: org/Model\n---\n"))
+		t.Run(tt.name, func(t *testing.T) {
-		require.NotNil(t, fm)
+			fm := parseFrontmatter([]byte(tt.input))
-		assert.Equal(t, "apache-2.0", fm.License)
+			if tt.wantNil {
-		assert.Equal(t, []string{"org/Model"}, fm.BaseModel)
+				assert.Nil(t, fm)
-	})
+				return
-
+			}
-	t.Run("leading BOM", func(t *testing.T) {
+			require.NotNil(t, fm)
-		fm := parseFrontmatter([]byte("\xef\xbb\xbf---\nlicense: mit\n---\n"))
+			assert.Equal(t, tt.wantLicense, fm.License)
-		require.NotNil(t, fm)
+			if tt.wantBaseModel != nil {
-		assert.Equal(t, "mit", fm.License)
+				assert.Equal(t, tt.wantBaseModel, fm.BaseModel)
-	})
+			}
-
+		})
-	t.Run("no frontmatter", func(t *testing.T) {
+	}
 		assert.Nil(t, parseFrontmatter([]byte("# just a heading\n")))
 	})
 	t.Run("unterminated frontmatter", func(t *testing.T) {
 		assert.Nil(t, parseFrontmatter([]byte("---\nlicense: mit\n")))
 	})
 }
 // TestParseLicenseFrontmatter covers the choosealicense.com-style YAML
 // frontmatter Docker Model Runner uses for its license layers. Only spdx-id
 // is consumed; everything else in the block is ignored.
 func TestParseLicenseFrontmatter(t *testing.T) {
-	t.Run("Apache-2.0 (the canonical choosealicense.com shape)", func(t *testing.T) {
+	// The Apache-2.0 case is the exact frontmatter shape from
-		// This is the exact frontmatter shape from
+	// https://github.com/github/choosealicense.com/blob/gh-pages/_licenses/apache-2.0.txt
-		// https://github.com/github/choosealicense.com/blob/gh-pages/_licenses/apache-2.0.txt
+	// Docker AI license layers ship a near-identical block.
-		// Docker AI license layers ship a near-identical block.
+	tests := []struct {
-		buf := []byte(`---
+		name  string
 		input string
 		want  string
 	}{
 		{
 			name: "Apache-2.0 (the canonical choosealicense.com shape)",
 			input: `---
 title: Apache License 2.0
 spdx-id: Apache-2.0
 redirect_from: /licenses/apache/
@ -780,29 +854,36 @@ limitations:
                                 Apache License
                           Version 2.0, January 2004
-`)
+`,
-		assert.Equal(t, "Apache-2.0", parseLicenseFrontmatter(buf))
+			want: "Apache-2.0",
-	})
+		},
 		{
 			name:  "MIT with BOM prefix",
 			input: "\xef\xbb\xbf---\ntitle: MIT License\nspdx-id: MIT\n---\nThe MIT License...\n",
 			want:  "MIT",
 		},
 		{
 			name:  "frontmatter without spdx-id falls through (returns empty)",
 			input: "---\ntitle: Something\ndescription: no spdx-id here\n---\nbody\n",
 			want:  "",
 		},
 		{
 			name:  "plain license text without any frontmatter",
 			input: "                                 Apache License\n                           Version 2.0, January 2004\n",
 			want:  "",
 		},
 		{
 			name:  "unterminated frontmatter block",
 			input: "---\nspdx-id: MIT\n(never closes)\n",
 			want:  "",
 		},
 	}
-	t.Run("MIT with BOM prefix", func(t *testing.T) {
+	for _, tt := range tests {
-		buf := []byte("\xef\xbb\xbf---\ntitle: MIT License\nspdx-id: MIT\n---\nThe MIT License...\n")
+		t.Run(tt.name, func(t *testing.T) {
-		assert.Equal(t, "MIT", parseLicenseFrontmatter(buf))
+			assert.Equal(t, tt.want, parseLicenseFrontmatter([]byte(tt.input)))
-	})
+		})
-
+	}
 	t.Run("frontmatter without spdx-id falls through (returns empty)", func(t *testing.T) {
 		buf := []byte("---\ntitle: Something\ndescription: no spdx-id here\n---\nbody\n")
 		assert.Empty(t, parseLicenseFrontmatter(buf))
 	})
 	t.Run("plain license text without any frontmatter", func(t *testing.T) {
 		buf := []byte("                                 Apache License\n                           Version 2.0, January 2004\n")
 		assert.Empty(t, parseLicenseFrontmatter(buf))
 	})
 	t.Run("unterminated frontmatter block", func(t *testing.T) {
 		buf := []byte("---\nspdx-id: MIT\n(never closes)\n")
 		assert.Empty(t, parseLicenseFrontmatter(buf))
 	})
 }
 func TestDockerAIModelConfigMediaTypes(t *testing.T) {
@ -816,14 +897,27 @@ func TestDockerAIModelConfigMediaTypes(t *testing.T) {
 		}
 		return false
 	}
-	// the known, verified schema versions are consumed
+
-	assert.True(t, supported("application/vnd.docker.ai.model.config.v0.1+json"))
+	tests := []struct {
-	assert.True(t, supported("application/vnd.docker.ai.model.config.v0.2+json"))
+		name      string
-	// unknown/future schema versions are intentionally NOT consumed, to avoid
+		mediaType string
-	// silently ingesting a potentially breaking config change
+		want      bool
-	assert.False(t, supported("application/vnd.docker.ai.model.config.v0.3+json"))
+	}{
-	assert.False(t, supported("application/vnd.docker.ai.model.config.v9.9+json"))
+		// the known, verified schema versions are consumed
-	// sibling layer media types are not matched either
+		{name: "known schema v0.1 is consumed", mediaType: "application/vnd.docker.ai.model.config.v0.1+json", want: true},
-	assert.False(t, supported("application/vnd.docker.ai.model.file"))
+		{name: "known schema v0.2 is consumed", mediaType: "application/vnd.docker.ai.model.config.v0.2+json", want: true},
-	assert.False(t, supported("application/vnd.docker.ai.gguf.v3"))
+		// unknown/future schema versions are intentionally NOT consumed, to avoid
 		// silently ingesting a potentially breaking config change
 		{name: "unknown schema v0.3 is rejected", mediaType: "application/vnd.docker.ai.model.config.v0.3+json", want: false},
 		{name: "far-future schema v9.9 is rejected", mediaType: "application/vnd.docker.ai.model.config.v9.9+json", want: false},
 		// sibling layer media types are not matched either
 		{name: "sibling model.file layer is not matched", mediaType: "application/vnd.docker.ai.model.file", want: false},
 		{name: "sibling gguf layer is not matched", mediaType: "application/vnd.docker.ai.gguf.v3", want: false},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			assert.Equal(t, tt.want, supported(tt.mediaType))
 		})
 	}
 }
--- a/syft/pkg/cataloger/ai/processor.go
+++ b/syft/pkg/cataloger/ai/processor.go
@ -127,10 +127,10 @@ func safeTensorsMergeProcessor(ctx context.Context, resolver file.Resolver, pkgs
 	out := other
 	for _, key := range keys {
 		merged := mergeSafeTensorsGroup(groups[key])
-		nameOrPath, imageRefName := enrichSafeTensorsGroup(ctx, resolver, key, &merged)
+		nameOrPath, fallbackName := enrichSafeTensorsGroup(ctx, resolver, key, &merged)
-		name := pickSafeTensorsName(nameOrPath, imageRefName)
+		name := pickSafeTensorsName(nameOrPath, fallbackName)
 		if name == "" {
-			continue // drop unnameable groups, per design (no opaque fallback)
+			continue // drop groups with no name source and no usable fallback
 		}
 		merged.Name = name
 		merged.SetID()
@ -142,8 +142,6 @@ func safeTensorsMergeProcessor(ctx context.Context, resolver file.Resolver, pkgs
 // groupSafeTensorsPackages buckets packages by the parent directory of their
 // primary-evidence location, or the OCI sentinel when the location lives at
 // the ContainerImageModel resolver's virtual "/" path.
 // TODO: assemble a test where there are cases for DIR ran into for a single scan
 // - safe tensors at the top level as well as sub directories
 func groupSafeTensorsPackages(pkgs []pkg.Package) map[string][]pkg.Package {
 	out := make(map[string][]pkg.Package)
 	for _, p := range pkgs {
@ -253,19 +251,15 @@ func mergeAggregatesInto(merged *pkg.SafeTensorsModelInfo, aggregates []pkg.Safe
 // mergeShardsInto folds the per-shard header metadata into merged, returning
 // the summed shard TensorCount and the list of non-empty per-shard hashes for
-// the rollup. Architecture / TorchDtype / TransformersVersion are accepted as
+// the rollup. Shards carry only the content-derived fields (Quantization,
-// fallbacks if a shard ever carries them (the current parsers don't, but the
+// Parameters, UserMetadata); producer-declared fields like Architecture come
-// resolver-backed enrichment runs afterwards and won't overwrite anything
+// from the resolver-backed enrichment that runs afterwards.
 // already set, so it's safe to populate them earlier).
 func mergeShardsInto(merged *pkg.SafeTensorsModelInfo, shards []pkg.SafeTensorsModelInfo) (shardTensorTotal uint64, hashes []string) {
 	seenKV := map[string]bool{}
 	for _, s := range shards {
 		shardTensorTotal += s.TensorCount
 		firstNonEmpty(&merged.Quantization, s.Quantization)
 		firstNonEmpty(&merged.Parameters, s.Parameters)
 		firstNonEmpty(&merged.Architecture, s.Architecture)
 		firstNonEmpty(&merged.TorchDtype, s.TorchDtype)
 		firstNonEmpty(&merged.TransformersVersion, s.TransformersVersion)
 		for _, kv := range s.UserMetadata {
 			if seenKV[kv.Key] {
 				continue
@ -336,10 +330,11 @@ func rollupHash(hashes []string) string {
 // merged metadata's Architecture / TorchDtype / TransformersVersion, set the
 // licenses on the merged package, and attach the location of every consulted
 // supporting file as SupportingEvidence. Returns two name candidates for the
-// merge processor: nameOrPath (raw _name_or_path from a config.json) and
+// merge processor: nameOrPath (raw _name_or_path from a config.json) and a
-// imageRefName (the last path segment of the OCI image reference, empty for
+// fallbackName used when no _name_or_path is available — the last path segment
-// dir-scan groups).
+// of the OCI image reference for OCI groups, or the parent directory's base
-func enrichSafeTensorsGroup(ctx context.Context, resolver file.Resolver, groupKey string, merged *pkg.Package) (nameOrPath, imageRefName string) {
+// name for directory-scan groups.
 func enrichSafeTensorsGroup(ctx context.Context, resolver file.Resolver, groupKey string, merged *pkg.Package) (nameOrPath, fallbackName string) {
 	md := merged.Metadata.(pkg.SafeTensorsModelInfo)
 	var (
@ -347,9 +342,10 @@ func enrichSafeTensorsGroup(ctx context.Context, resolver file.Resolver, groupKe
 		supporting []file.Location
 	)
 	if groupKey == ociGroupKey {
-		nameOrPath, imageRefName, lics, supporting = enrichSafeTensorsOCI(ctx, resolver, &md)
+		nameOrPath, fallbackName, lics, supporting = enrichSafeTensorsOCI(ctx, resolver, &md)
 	} else {
 		nameOrPath, lics, supporting = enrichSafeTensorsDir(ctx, resolver, groupKey, &md)
 		fallbackName = safeTensorsDirName(groupKey)
 	}
 	merged.Metadata = md
@ -359,13 +355,28 @@ func enrichSafeTensorsGroup(ctx context.Context, resolver file.Resolver, groupKe
 	for _, loc := range supporting {
 		merged.Locations.Add(loc.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation))
 	}
-	return nameOrPath, imageRefName
+	return nameOrPath, fallbackName
 }
-// enrichSafeTensorsDir handles the directory-scan case: look for sibling
+// safeTensorsDirName returns the directory-scan naming fallback: the base name
-// config.json and README.md next to the model files.
+// of the group's parent directory (the group key is already that directory).
 // For "/models/tiny-llama" this returns "tiny-llama". Degenerate roots that
 // carry no meaningful model name ("/", ".", "") return "", so the group is
 // dropped rather than labeled with a filesystem artifact.
 func safeTensorsDirName(groupKey string) string {
 	base := path.Base(groupKey)
 	switch base {
 	case "/", ".", "":
 		return ""
 	}
 	return base
 }
 // enrichSafeTensorsDir handles the directory-scan case: look for a config.json
 // beside the model files (walking up parent directories to the scanned source
 // root if no sibling exists) and a sibling README.md.
 func enrichSafeTensorsDir(ctx context.Context, resolver file.Resolver, dir string, md *pkg.SafeTensorsModelInfo) (nameOrPath string, lics []pkg.License, supporting []file.Location) {
-	if loc, cfg := readDirHFConfig(resolver, path.Join(dir, "config.json")); cfg != nil {
+	if loc, cfg := findDirHFConfig(resolver, dir); cfg != nil {
 		applyHFConfig(md, cfg)
 		nameOrPath = cfg.NameOrPath
 		supporting = append(supporting, *loc)
@ -518,9 +529,9 @@ func classifyOCIModelFileLayer(resolver file.Resolver, loc file.Location, md *pk
 	if err != nil {
 		return false
 	}
-	trimmed := trimLeadingWhitespace(buf)
+	trimmed := bytes.TrimLeft(buf, "\xef\xbb\xbf \t\r\n")
 	switch {
-	case hasPrefix(trimmed, "---"):
+	case bytes.HasPrefix(trimmed, []byte("---")):
 		fm := parseFrontmatter(buf)
 		if fm == nil {
 			return false
@ -532,7 +543,7 @@ func classifyOCIModelFileLayer(resolver file.Resolver, loc file.Location, md *pk
 			*readmeName = fm.BaseModel[0]
 		}
 		return true
-	case hasPrefix(trimmed, "{"):
+	case bytes.HasPrefix(trimmed, []byte("{")):
 		var cfg hfConfig
 		if err := json.Unmarshal(buf, &cfg); err != nil {
 			return false
@ -566,26 +577,29 @@ func applyHFConfig(md *pkg.SafeTensorsModelInfo, cfg *hfConfig) {
 //
 //  1. config.json _name_or_path  (path.Base, so "org/Model" → "Model";
 //     applies to both dir-scan and OCI groups)
-//  2. OCI image-ref last segment (OCI-only; the user-supplied artifact
+//  2. fallback name — the group's source-specific positional identifier:
-//     reference's repository basename, e.g.
+//     the OCI image-ref repository basename for OCI groups (e.g.
-//     "docker.io/ai/smollm2-vllm:360M" → "smollm2-vllm")
+//     "docker.io/ai/smollm2-vllm:360M" → "smollm2-vllm"), or the parent
 //     directory base name for directory-scan groups (e.g.
 //     "/models/tiny-llama/*.safetensors" → "tiny-llama")
 //
 // Returns "" to signal the merge processor should drop the group. There is
-// intentionally no Architecture-Parameters synthetic or parent-directory
+// intentionally no Architecture-Parameters synthetic and no opaque hash label:
-// fallback: an unnameable model is recorded as absent rather than under a
+// when neither a producer-declared name nor a positional fallback is available
-// label the SBOM consumer would not recognize.
+// the model is recorded as absent rather than under a label the SBOM consumer
-func pickSafeTensorsName(nameOrPath, imageRefName string) string {
+// would not recognize.
 func pickSafeTensorsName(nameOrPath, fallbackName string) string {
 	if nameOrPath != "" {
 		return path.Base(nameOrPath)
 	}
-	return imageRefName
+	return fallbackName
 }
-// --- Relocated enrichment helpers ----------------------------------------
+// --- Enrichment helpers ---------------------------------------------------
 //
-// These types and functions used to live in the parser files; they moved here
+// The parsers decode only the safetensors-specific format; every resolver-backed
-// when the parsers shrank to "just decode the safetensors-specific format" and
+// read (config.json, README, license layers) is centralized here in the merge
-// every resolver-backed read centralized in the merge processor.
+// processor, along with the types those reads decode into.
 // hfConfig is a minimal projection of Hugging Face config.json fields.
 type hfConfig struct {
@ -601,6 +615,27 @@ type readmeFrontmatter struct {
 	BaseModel []string `yaml:"base_model"`
 }
 // findDirHFConfig looks for a config.json beside the model files, walking up
 // parent directories until it reaches the scanned source root. The walk needs
 // no explicit depth bound: the resolver only resolves paths within the scanned
 // source, so an ancestor above the scan root simply yields no config, and
 // path.Dir converges on a fixed point ("/" or ".") that terminates the loop.
 // The first config.json found wins, so the closest one — a sibling, then the
 // nearest ancestor — supplies both the producer-declared name and the HF fields
 // applied to the model.
 func findDirHFConfig(resolver file.Resolver, dir string) (*file.Location, *hfConfig) {
 	for {
 		if loc, cfg := readDirHFConfig(resolver, path.Join(dir, "config.json")); cfg != nil {
 			return loc, cfg
 		}
 		parent := path.Dir(dir)
 		if parent == dir {
 			return nil, nil // reached the source root
 		}
 		dir = parent
 	}
 }
 func readDirHFConfig(resolver file.Resolver, p string) (*file.Location, *hfConfig) {
 	locations, err := resolver.FilesByPath(p)
 	if err != nil || len(locations) == 0 {
@ -715,18 +750,3 @@ func parseLicenseFrontmatter(buf []byte) string {
 	}
 	return fm.SPDXID
 }
 func hasPrefix(b []byte, s string) bool {
 	return len(b) >= len(s) && string(b[:len(s)]) == s
 }
 func trimLeadingWhitespace(b []byte) []byte {
 	i := 0
 	for i < len(b) && (b[i] == ' ' || b[i] == '\t' || b[i] == '\r' || b[i] == '\n') {
 		i++
 	}
 	if len(b)-i >= 3 && b[i] == 0xEF && b[i+1] == 0xBB && b[i+2] == 0xBF {
 		i += 3
 	}
 	return b[i:]
 }
--- a/syft/pkg/cataloger/ai/test_helpers_test.go
+++ b/syft/pkg/cataloger/ai/test_helpers_test.go
@ -52,6 +52,11 @@ func (b *testGGUFBuilder) withVersion(v uint32) *testGGUFBuilder {
 	return b
 }
 func (b *testGGUFBuilder) withTensorCount(count uint64) *testGGUFBuilder {
 	b.tensorCount = count
 	return b
 }
 func (b *testGGUFBuilder) withStringKV(key, value string) *testGGUFBuilder {
 	b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeString, value: value})
 	return b
@ -114,3 +119,10 @@ func (b *testGGUFBuilder) build() []byte {
 	return b.buf.Bytes()
 }
 // buildInvalidMagic creates a file with invalid magic number
 func (b *testGGUFBuilder) buildInvalidMagic() []byte {
 	buf := new(bytes.Buffer)
 	binary.Write(buf, binary.LittleEndian, uint32(0x12345678))
 	return buf.Bytes()
 }
--- a/syft/pkg/safetensors.go
+++ b/syft/pkg/safetensors.go
@ -3,13 +3,14 @@ package pkg
 // SafeTensorsModelInfo holds the model details extracted from SafeTensors content.
 // SafeTensors is a simple, safe serialization format for storing tensors, used
 // as the default weight format for Hugging Face transformer models. Syft may
-// populate this struct from three sources:
+// populate this struct from these sources:
 //   - a single .safetensors file (header-only parse)
-//   - a sharded model described by model.safetensors.index.json
+//   - the per-shard headers of a multi-shard model, merged into one package
-//   - a Docker AI OCI model artifact config blob (vnd.docker.ai.model.config.v0.1+json)
+//   - a Docker AI OCI model artifact: the config blob
 //     (vnd.docker.ai.model.config.v0.1+json) plus each weight layer's header
 //
-// The Model Name, License, and Version fields have all been lifted up to be on
+// Model name, license, and version live on the enclosing syft Package rather
-// the syft Package.
+// than in this struct.
 type SafeTensorsModelInfo struct {
 	// Format is the source format label (always "safetensors" for this metadata type).
 	// Present because the Docker AI model config blob carries an explicit format field