2025-11-19 17:33:18 +01:00
11 changed files with 235 additions and 180 deletions
--- a/internal/packagemetadata/names.go
+++ b/internal/packagemetadata/names.go
@ -124,7 +124,7 @@ var jsonTypes = makeJSONTypes(
 	jsonNames(pkg.TerraformLockProviderEntry{}, "terraform-lock-provider-entry"),
 	jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"),
 	jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"),
-	jsonNames(pkg.GGUFFileHeader{}, "gguf-file-header"),
+	jsonNames(pkg.GGUFFileHeader{}, "gguf-file-metadata"),
 )
 func expandLegacyNameVariants(names ...string) []string {
--- a/schema/json/schema-16.0.43.json
+++ b/schema/json/schema-16.0.43.json
@ -1433,16 +1433,24 @@
      ],
      "description": "FileMetadataEntry contains filesystem-level metadata attributes such as permissions, ownership, type, and size for a cataloged file."
    },
-    "GgufFileHeader": {
+    "GgufFileMetadata": {
      "properties": {
        "ggufVersion": {
          "type": "integer",
          "description": "GGUFVersion is the GGUF format version (e.g., 3)"
        },
        "modelName": {
          "type": "string",
          "description": "ModelName is the name of the model (from general.name or filename)"
        },
        "fileSize": {
          "type": "integer",
          "description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
        },
        "license": {
          "type": "string",
          "description": "License is the license identifier (from general.license if present)"
        },
        "architecture": {
          "type": "string",
          "description": "Architecture is the model architecture (from general.architecture, e.g., \"qwen3moe\", \"llama\")"
@ -1461,16 +1469,17 @@
        },
        "header": {
          "type": "object",
-          "description": "RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
+          "description": "Header contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
        },
        "metadataHash": {
          "type": "string",
-          "description": "MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames."
+          "description": "MetadataHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames."
        }
      },
      "type": "object",
      "required": [
        "ggufVersion",
        "modelName",
        "tensorCount"
      ],
      "description": "GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file."
@ -2622,7 +2631,7 @@
              "$ref": "#/$defs/ErlangRebarLockEntry"
            },
            {
-              "$ref": "#/$defs/GgufFileHeader"
+              "$ref": "#/$defs/GgufFileMetadata"
            },
            {
              "$ref": "#/$defs/GithubActionsUseStatement"
--- a/schema/json/schema-latest.json
+++ b/schema/json/schema-latest.json
@ -1433,16 +1433,24 @@
      ],
      "description": "FileMetadataEntry contains filesystem-level metadata attributes such as permissions, ownership, type, and size for a cataloged file."
    },
-    "GgufFileHeader": {
+    "GgufFileMetadata": {
      "properties": {
        "ggufVersion": {
          "type": "integer",
          "description": "GGUFVersion is the GGUF format version (e.g., 3)"
        },
        "modelName": {
          "type": "string",
          "description": "ModelName is the name of the model (from general.name or filename)"
        },
        "fileSize": {
          "type": "integer",
          "description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
        },
        "license": {
          "type": "string",
          "description": "License is the license identifier (from general.license if present)"
        },
        "architecture": {
          "type": "string",
          "description": "Architecture is the model architecture (from general.architecture, e.g., \"qwen3moe\", \"llama\")"
@ -1461,16 +1469,17 @@
        },
        "header": {
          "type": "object",
-          "description": "RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
+          "description": "Header contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
        },
        "metadataHash": {
          "type": "string",
-          "description": "MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames."
+          "description": "MetadataHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames."
        }
      },
      "type": "object",
      "required": [
        "ggufVersion",
        "modelName",
        "tensorCount"
      ],
      "description": "GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file."
@ -2622,7 +2631,7 @@
              "$ref": "#/$defs/ErlangRebarLockEntry"
            },
            {
-              "$ref": "#/$defs/GgufFileHeader"
+              "$ref": "#/$defs/GgufFileMetadata"
            },
            {
              "$ref": "#/$defs/GithubActionsUseStatement"
--- a/syft/pkg/cataloger/ai/cataloger_test.go
+++ b/syft/pkg/cataloger/ai/cataloger_test.go
@ -5,6 +5,8 @@ import (
 	"path/filepath"
 	"testing"
 	"github.com/google/go-cmp/cmp/cmpopts"
 	"github.com/anchore/syft/syft/artifact"
 	"github.com/anchore/syft/syft/pkg"
 	"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
@ -35,7 +37,7 @@ func TestGGUFCataloger_Globs(t *testing.T) {
 	}
 }
-func TestGGUFCataloger(t *testing.T) {
+func TestGGUFCataloger_Integration(t *testing.T) {
 	tests := []struct {
 		name                  string
 		setup                 func(t *testing.T) string
@ -54,7 +56,6 @@ func TestGGUFCataloger(t *testing.T) {
 					withStringKV("general.license", "Apache-2.0").
 					withStringKV("general.quantization", "Q4_K_M").
 					withUint64KV("general.parameter_count", 8030000000).
 					withStringKV("general.some_random_kv", "foobar").
 					build()
 				path := filepath.Join(dir, "llama3-8b.gguf")
@ -70,53 +71,14 @@ func TestGGUFCataloger(t *testing.T) {
 						pkg.NewLicenseFromFields("Apache-2.0", "", nil),
 					),
 					Metadata: pkg.GGUFFileHeader{
-						Architecture:          "llama",
+						ModelName:    "llama3-8b",
-						Quantization:          "Unknown",
+						License:      "Apache-2.0",
-						Parameters:            0,
+						Architecture: "llama",
-						GGUFVersion:           3,
+						Quantization: "Unknown",
-						TensorCount:           0,
+						Parameters:   0,
-						MetadataKeyValuesHash: "6e3d368066455ce4",
+						GGUFVersion:  3,
-						RemainingKeyValues: map[string]interface{}{
+						TensorCount:  0,
-							"general.some_random_kv": "foobar",
+						Header:       map[string]interface{}{},
 						},
 					},
 				},
 			},
 			expectedRelationships: nil,
 		},
 		{
 			name: "catalog GGUF file with minimal metadata",
 			setup: func(t *testing.T) string {
 				dir := t.TempDir()
 				data := newTestGGUFBuilder().
 					withVersion(3).
 					withStringKV("general.architecture", "gpt2").
 					withStringKV("general.name", "gpt2-small").
 					withStringKV("gpt2.context_length", "1024").
 					withUint32KV("gpt2.embedding_length", 768).
 					build()
 				path := filepath.Join(dir, "gpt2-small.gguf")
 				os.WriteFile(path, data, 0644)
 				return dir
 			},
 			expectedPackages: []pkg.Package{
 				{
 					Name:     "gpt2-small",
 					Version:  "",
 					Type:     pkg.ModelPkg,
 					Licenses: pkg.NewLicenseSet(),
 					Metadata: pkg.GGUFFileHeader{
 						Architecture:          "gpt2",
 						Quantization:          "Unknown",
 						Parameters:            0,
 						GGUFVersion:           3,
 						TensorCount:           0,
 						MetadataKeyValuesHash: "9dc6f23591062a27",
 						RemainingKeyValues: map[string]interface{}{
 							"gpt2.context_length":   "1024",
 							"gpt2.embedding_length": uint32(768),
 						},
 					},
 				},
 			},
@ -129,12 +91,17 @@ func TestGGUFCataloger(t *testing.T) {
 			fixtureDir := tt.setup(t)
 			// Use pkgtest to catalog and compare
-			pkgtest.NewCatalogTester().
+			tester := pkgtest.NewCatalogTester().
 				FromDirectory(t, fixtureDir).
 				Expects(tt.expectedPackages, tt.expectedRelationships).
 				IgnoreLocationLayer().
-				IgnorePackageFields("FoundBy", "Locations").
+				IgnorePackageFields("FoundBy", "Locations"). // These are set by the cataloger
-				TestCataloger(t, NewGGUFCataloger())
+				WithCompareOptions(
 					// Ignore MetadataHash as it's computed dynamically
 					cmpopts.IgnoreFields(pkg.GGUFFileHeader{}, "MetadataHash"),
 				)
 			tester.TestCataloger(t, NewGGUFCataloger())
 		})
 	}
 }
--- a/syft/pkg/cataloger/ai/package.go
+++ b/syft/pkg/cataloger/ai/package.go
@ -5,17 +5,23 @@ import (
 	"github.com/anchore/syft/syft/pkg"
 )
-func newGGUFPackage(metadata *pkg.GGUFFileHeader, modelName, version, license string, locations ...file.Location) pkg.Package {
+func newGGUFPackage(metadata *pkg.GGUFFileHeader, version string, locations ...file.Location) pkg.Package {
 	p := pkg.Package{
-		Name:      modelName,
+		Name:      metadata.ModelName,
 		Version:   version,
 		Locations: file.NewLocationSet(locations...),
 		Type:      pkg.ModelPkg,
-		Licenses:  pkg.NewLicenseSet(pkg.NewLicensesFromValues(license)...),
+		Licenses:  pkg.NewLicenseSet(),
 		Metadata:  *metadata,
 		// NOTE: PURL is intentionally not set as the package-url spec
 		// has not yet finalized support for ML model packages
 	}
 	// Add license to the package if present in metadata
 	if metadata.License != "" {
 		p.Licenses.Add(pkg.NewLicenseFromFields(metadata.License, "", nil))
 	}
 	p.SetID()
 	return p
--- a/syft/pkg/cataloger/ai/package_test.go
+++ b/syft/pkg/cataloger/ai/package_test.go
@ -3,119 +3,121 @@ package ai
 import (
 	"testing"
 	"github.com/google/go-cmp/cmp"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/anchore/syft/syft/file"
 	"github.com/anchore/syft/syft/pkg"
 	"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
 )
 func TestNewGGUFPackage(t *testing.T) {
 	tests := []struct {
-		name     string
+		name      string
-		metadata *pkg.GGUFFileHeader
+		metadata  *pkg.GGUFFileHeader
-		input    struct {
+		version   string
-			modelName string
+		locations []file.Location
-			version   string
+		checkFunc func(t *testing.T, p pkg.Package)
 			license   string
 			locations []file.Location
 		}
 		expected pkg.Package
 	}{
 		{
-			name: "complete GGUF package with all fields",
+			name:    "complete GGUF package with all fields",
-			input: struct {
+			version: "3.0",
 				modelName string
 				version   string
 				license   string
 				locations []file.Location
 			}{
 				modelName: "llama3-8b",
 				version:   "3.0",
 				license:   "Apache-2.0",
 				locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
 			},
 			metadata: &pkg.GGUFFileHeader{
 				ModelName:    "llama3-8b-instruct",
 				License:      "Apache-2.0",
 				Architecture: "llama",
 				Quantization: "Q4_K_M",
 				Parameters:   8030000000,
 				GGUFVersion:  3,
 				TensorCount:  291,
-				RemainingKeyValues: map[string]any{
+				Header:       map[string]any{},
 					"general.random_kv": "foobar",
 				},
 			},
-			expected: pkg.Package{
+			locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
-				Name:    "llama3-8b",
+			checkFunc: func(t *testing.T, p pkg.Package) {
-				Version: "3.0",
+				if d := cmp.Diff("llama3-8b-instruct", p.Name); d != "" {
-				Type:    pkg.ModelPkg,
+					t.Errorf("Name mismatch (-want +got):\n%s", d)
-				Licenses: pkg.NewLicenseSet(
+				}
-					pkg.NewLicenseFromFields("Apache-2.0", "", nil),
+				if d := cmp.Diff("3.0", p.Version); d != "" {
-				),
+					t.Errorf("Version mismatch (-want +got):\n%s", d)
-				Metadata: pkg.GGUFFileHeader{
+				}
-					Architecture: "llama",
+				if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
-					Quantization: "Q4_K_M",
+					t.Errorf("Type mismatch (-want +got):\n%s", d)
-					Parameters:   8030000000,
+				}
-					GGUFVersion:  3,
+				assert.Empty(t, p.PURL, "PURL should not be set for model packages")
-					TensorCount:  291,
+				assert.Len(t, p.Licenses.ToSlice(), 1)
-					RemainingKeyValues: map[string]any{
+				if d := cmp.Diff("Apache-2.0", p.Licenses.ToSlice()[0].Value); d != "" {
-						"general.random_kv": "foobar",
+					t.Errorf("License value mismatch (-want +got):\n%s", d)
-					},
+				}
-				},
+				assert.NotEmpty(t, p.ID())
 				Locations: file.NewLocationSet(file.NewLocation("/models/llama3-8b.gguf")),
 			},
 		},
 		{
-			name: "minimal GGUF package",
+			name:    "minimal GGUF package",
-			input: struct {
+			version: "1.0",
 				modelName string
 				version   string
 				license   string
 				locations []file.Location
 			}{
 				modelName: "gpt2-small",
 				version:   "1.0",
 				license:   "MIT",
 				locations: []file.Location{file.NewLocation("/models/simple.gguf")},
 			},
 			metadata: &pkg.GGUFFileHeader{
 				ModelName:    "simple-model",
 				Architecture: "gpt2",
 				GGUFVersion:  3,
 				TensorCount:  50,
 			},
-			expected: pkg.Package{
+			locations: []file.Location{file.NewLocation("/models/simple.gguf")},
-				Name:    "gpt2-small",
+			checkFunc: func(t *testing.T, p pkg.Package) {
-				Version: "1.0",
+				if d := cmp.Diff("simple-model", p.Name); d != "" {
-				Type:    pkg.ModelPkg,
+					t.Errorf("Name mismatch (-want +got):\n%s", d)
-				Licenses: pkg.NewLicenseSet(
+				}
-					pkg.NewLicenseFromFields("MIT", "", nil),
+				if d := cmp.Diff("1.0", p.Version); d != "" {
-				),
+					t.Errorf("Version mismatch (-want +got):\n%s", d)
-				Metadata: pkg.GGUFFileHeader{
+				}
-					Architecture: "gpt2",
+				if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
-					GGUFVersion:  3,
+					t.Errorf("Type mismatch (-want +got):\n%s", d)
-					TensorCount:  50,
+				}
-				},
+				assert.Empty(t, p.PURL, "PURL should not be set for model packages")
-				Locations: file.NewLocationSet(file.NewLocation("/models/simple.gguf")),
+				assert.Empty(t, p.Licenses.ToSlice())
 			},
 		},
 		{
 			name:    "GGUF package with multiple locations",
 			version: "1.5",
 			metadata: &pkg.GGUFFileHeader{
 				ModelName:    "multi-location-model",
 				Architecture: "llama",
 				GGUFVersion:  3,
 				TensorCount:  150,
 			},
 			locations: []file.Location{
 				file.NewLocation("/models/model1.gguf"),
 				file.NewLocation("/models/model2.gguf"),
 			},
 			checkFunc: func(t *testing.T, p pkg.Package) {
 				assert.Len(t, p.Locations.ToSlice(), 2)
 			},
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			actual := newGGUFPackage(
+			p := newGGUFPackage(tt.metadata, tt.version, tt.locations...)
 				tt.metadata,
 				tt.input.modelName,
 				tt.input.version,
 				tt.input.license,
 				tt.input.locations...,
 			)
-			// Verify metadata type
+			if d := cmp.Diff(tt.metadata.ModelName, p.Name); d != "" {
-			_, ok := actual.Metadata.(pkg.GGUFFileHeader)
+				t.Errorf("Name mismatch (-want +got):\n%s", d)
 			}
 			if d := cmp.Diff(tt.version, p.Version); d != "" {
 				t.Errorf("Version mismatch (-want +got):\n%s", d)
 			}
 			if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
 				t.Errorf("Type mismatch (-want +got):\n%s", d)
 			}
 			// Verify metadata is attached
 			metadata, ok := p.Metadata.(pkg.GGUFFileHeader)
 			require.True(t, ok, "metadata should be GGUFFileHeader")
 			if d := cmp.Diff(*tt.metadata, metadata); d != "" {
 				t.Errorf("Metadata mismatch (-want +got):\n%s", d)
 			}
-			// Use AssertPackagesEqual for comprehensive comparison
+			if tt.checkFunc != nil {
-			pkgtest.AssertPackagesEqual(t, tt.expected, actual)
+				tt.checkFunc(t, p)
 			}
 		})
 	}
 }
--- a/syft/pkg/cataloger/ai/parse_gguf.go
+++ b/syft/pkg/cataloger/ai/parse_gguf.go
@ -14,35 +14,46 @@ const (
 	maxHeaderSize   = 50 * 1024 * 1024 // 50MB for large tokenizer vocabularies
 )
-// copyHeader copies the GGUF header from the reader to the writer.
+// readHeader reads only the GGUF header (metadata) without reading tensor data
-// It validates the magic number first, then copies the rest of the data.
+// This is much more efficient than reading the entire file
-// The reader should be wrapped with io.LimitedReader to prevent OOM issues.
+// The reader should be wrapped with io.LimitedReader to prevent OOM issues
-func copyHeader(w io.Writer, r io.Reader) error {
+func readHeader(r io.Reader) ([]byte, error) {
-	// Read initial chunk to validate magic number
+	// Read initial chunk to determine header size
 	// GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info
 	initialBuf := make([]byte, 24) // Enough for magic, version, tensor count, and kv count
 	if _, err := io.ReadFull(r, initialBuf); err != nil {
-		return fmt.Errorf("failed to read GGUF header prefix: %w", err)
+		return nil, fmt.Errorf("failed to read GGUF header prefix: %w", err)
 	}
 	// Verify magic number
 	magic := binary.LittleEndian.Uint32(initialBuf[0:4])
 	if magic != ggufMagicNumber {
-		return fmt.Errorf("invalid GGUF magic number: 0x%08X", magic)
+		return nil, fmt.Errorf("invalid GGUF magic number: 0x%08X", magic)
 	}
-	// Write the initial buffer to the writer
+	// We need to read the metadata KV pairs to know the full header size
-	if _, err := w.Write(initialBuf); err != nil {
+	// The io.LimitedReader wrapping this reader ensures we don't read more than maxHeaderSize
-		return fmt.Errorf("failed to write GGUF header prefix: %w", err)
+	headerData := make([]byte, 0, 1024*1024) // Start with 1MB capacity
-	}
+	headerData = append(headerData, initialBuf...)
-	// Copy the rest of the header from reader to writer
+	// Read the rest of the header in larger chunks for efficiency
 	// The LimitedReader will return EOF once maxHeaderSize is reached
-	if _, err := io.Copy(w, r); err != nil {
+	buf := make([]byte, 64*1024) // 64KB chunks
-		return fmt.Errorf("failed to copy GGUF header: %w", err)
+	for {
 		n, err := r.Read(buf)
 		if n > 0 {
 			headerData = append(headerData, buf[:n]...)
 		}
 		if err == io.EOF {
 			// Reached end of file or limit, we have all available data
 			break
 		}
 		if err != nil {
 			return nil, fmt.Errorf("failed to read GGUF header: %w", err)
 		}
 	}
-	return nil
+	return headerData, nil
 }
 // Helper to convert gguf_parser metadata to simpler types
--- a/syft/pkg/cataloger/ai/parse_gguf_model.go
+++ b/syft/pkg/cataloger/ai/parse_gguf_model.go
@ -27,6 +27,14 @@ import (
 func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
 	defer internal.CloseAndLogError(reader, reader.Path())
 	// Read and validate the GGUF file header using LimitedReader to prevent OOM
 	// We use LimitedReader to cap reads at maxHeaderSize (50MB)
 	limitedReader := &io.LimitedReader{R: reader, N: maxHeaderSize}
 	headerData, err := readHeader(limitedReader)
 	if err != nil {
 		return nil, nil, fmt.Errorf("failed to read GGUF header: %w", err)
 	}
 	// Create a temporary file for the library to parse
 	// The library requires a file path, so we create a temp file
 	tempFile, err := os.CreateTemp("", "syft-gguf-*.gguf")
@ -36,12 +44,10 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
 	tempPath := tempFile.Name()
 	defer os.Remove(tempPath)
-	// Copy and validate the GGUF file header using LimitedReader to prevent OOM
+	// Write the validated header data to temp file
-	// We use LimitedReader to cap reads at maxHeaderSize (50MB)
+	if _, err := tempFile.Write(headerData); err != nil {
 	limitedReader := &io.LimitedReader{R: reader, N: maxHeaderSize}
 	if err := copyHeader(tempFile, limitedReader); err != nil {
 		tempFile.Close()
-		return nil, nil, fmt.Errorf("failed to copy GGUF header: %w", err)
+		return nil, nil, fmt.Errorf("failed to write to temp file: %w", err)
 	}
 	tempFile.Close()
@ -61,26 +67,26 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
 	// Convert to syft metadata structure
 	syftMetadata := &pkg.GGUFFileHeader{
-		Architecture:          metadata.Architecture,
+		ModelName:    metadata.Name,
-		Quantization:          metadata.FileTypeDescriptor,
+		License:      metadata.License,
-		Parameters:            uint64(metadata.Parameters),
+		Architecture: metadata.Architecture,
-		GGUFVersion:           uint32(ggufFile.Header.Version),
+		Quantization: metadata.FileTypeDescriptor,
-		TensorCount:           ggufFile.Header.TensorCount,
+		Parameters:   uint64(metadata.Parameters),
-		RemainingKeyValues:    convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
+		GGUFVersion:  uint32(ggufFile.Header.Version),
-		MetadataKeyValuesHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
+		TensorCount:  ggufFile.Header.TensorCount,
 		Header:       convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
 		MetadataHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
 	}
 	// If model name is not in metadata, use filename
-	if metadata.Name == "" {
+	if syftMetadata.ModelName == "" {
-		metadata.Name = extractModelNameFromPath(reader.Path())
+		syftMetadata.ModelName = extractModelNameFromPath(reader.Path())
 	}
 	// Create package from metadata
 	p := newGGUFPackage(
 		syftMetadata,
 		metadata.Name,
 		modelVersion,
 		metadata.License,
 		reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
 	)
--- a/syft/pkg/cataloger/ai/test_builder_test.go
+++ b/syft/pkg/cataloger/ai/test_builder_test.go
@ -0,0 +1,41 @@
 package ai
 import (
 	"fmt"
 	"os"
 	gguf_parser "github.com/gpustack/gguf-parser-go"
 )
 func main() {
 	// Create a test GGUF file
 	data := newTestGGUFBuilder().
 		withVersion(3).
 		withStringKV("general.architecture", "llama").
 		withStringKV("general.name", "test-model").
 		build()
 	// Write to temp file
 	tempFile, err := os.CreateTemp("", "test-*.gguf")
 	if err != nil {
 		panic(err)
 	}
 	defer os.Remove(tempFile.Name())
 	if _, err := tempFile.Write(data); err != nil {
 		panic(err)
 	}
 	tempFile.Close()
 	fmt.Printf("Wrote %d bytes to %s\n", len(data), tempFile.Name())
 	// Try to parse it
 	fmt.Println("Attempting to parse...")
 	gf, err := gguf_parser.ParseGGUFFile(tempFile.Name(), gguf_parser.SkipLargeMetadata())
 	if err != nil {
 		fmt.Printf("Parse error: %v\n", err)
 		return
 	}
 	fmt.Printf("Success! Model: %s\n", gf.Metadata().Name)
 }
--- a/syft/pkg/cataloger/ai/test_helpers_test.go
+++ b/syft/pkg/cataloger/ai/test_helpers_test.go
@ -6,7 +6,6 @@ import (
 )
 // GGUF type constants for test builder
 // https://github.com/ggml-org/ggml/blob/master/docs/gguf.md
 const (
 	ggufMagic       = 0x46554747 // "GGUF" in little-endian
 	ggufTypeUint8   = 0
--- a/syft/pkg/gguf.go
+++ b/syft/pkg/gguf.go
@ -3,14 +3,19 @@ package pkg
 // GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file.
 // GGUF is a binary file format used for storing model weights for the GGML library, designed for fast
 // loading and saving of models, particularly quantized large language models.
 // The Model Name, License, and Version fields have all been lifted up to be on the syft Package.
 type GGUFFileHeader struct {
 	// GGUFVersion is the GGUF format version (e.g., 3)
 	GGUFVersion uint32 `json:"ggufVersion" cyclonedx:"ggufVersion"`
 	// ModelName is the name of the model (from general.name or filename)
 	ModelName string `json:"modelName" cyclonedx:"modelName"`
 	// FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)
 	FileSize int64 `json:"fileSize,omitempty" cyclonedx:"fileSize"`
 	// License is the license identifier (from general.license if present)
 	License string `json:"license,omitempty" cyclonedx:"license"`
 	// Architecture is the model architecture (from general.architecture, e.g., "qwen3moe", "llama")
 	Architecture string `json:"architecture,omitempty" cyclonedx:"architecture"`
@ -23,15 +28,15 @@ type GGUFFileHeader struct {
 	// TensorCount is the number of tensors in the model
 	TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"`
-	// RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already
+	// Header contains the remaining key-value pairs from the GGUF header that are not already
 	// represented as typed fields above. This preserves additional metadata fields for reference
 	// (namespaced with general.*, llama.*, etc.) while avoiding duplication.
-	RemainingKeyValues map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
+	Header map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
-	// MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.
+	// MetadataHash is a xx64 hash of all key-value pairs from the GGUF header metadata.
 	// This hash is computed over the complete header metadata (including the fields extracted
 	// into typed fields above) and provides a stable identifier for the model configuration
 	// across different file locations or remotes. It allows matching identical models even
 	// when stored in different repositories or with different filenames.
-	MetadataKeyValuesHash string `json:"metadataHash,omitempty" cyclonedx:"metadataHash"`
+	MetadataHash string `json:"metadataHash,omitempty" cyclonedx:"metadataHash"`
 }