fix: pr comments

Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-11-17 08:23:15 +01:00 · 2025-11-12 23:56:18 -05:00 · 2025-11-12 23:56:18 -05:00 · 6daea43c32
commit 6daea43c32
parent 9b31c0480f
9 changed files with 56 additions and 349 deletions
--- a/go.mod
+++ b/go.mod
@ -286,7 +286,10 @@ require (
 	modernc.org/memory v1.11.0 // indirect
 )

-require github.com/gpustack/gguf-parser-go v0.22.1
+require (
+	github.com/cespare/xxhash/v2 v2.3.0
+	github.com/gpustack/gguf-parser-go v0.22.1
+)

 require (
 	cyphar.com/go-pathrs v0.2.1 // indirect
--- a/go.sum
+++ b/go.sum
@ -229,7 +229,6 @@ github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqy
 github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
 github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
-github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
 github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
 github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
--- a/internal/packagemetadata/generated.go
+++ b/internal/packagemetadata/generated.go
@ -27,7 +27,6 @@ func AllTypes() []any {
 		pkg.ELFBinaryPackageNoteJSONPayload{},
 		pkg.ElixirMixLockEntry{},
 		pkg.ErlangRebarLockEntry{},
-		pkg.GGUFFileHeader{},
 		pkg.GitHubActionsUseStatement{},
 		pkg.GolangBinaryBuildinfoEntry{},
 		pkg.GolangModuleEntry{},
--- a/syft/pkg/cataloger/ai/cataloger_test.go
+++ b/syft/pkg/cataloger/ai/cataloger_test.go
@ -7,7 +7,6 @@ import (

 	"github.com/google/go-cmp/cmp/cmpopts"
 	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"

 	"github.com/anchore/syft/syft/artifact"
 	"github.com/anchore/syft/syft/pkg"
@ -15,72 +14,6 @@ import (
 )

 func TestGGUFCataloger_Globs(t *testing.T) {
-	tests := []struct {
-		name     string
-		setup    func(t *testing.T) string // returns fixture directory
-		expected []string
-	}{
-		{
-			name: "finds GGUF files in root",
-			setup: func(t *testing.T) string {
-				dir := t.TempDir()
-				createTestGGUFInDir(t, dir, "model1.gguf")
-				createTestGGUFInDir(t, dir, "model2.gguf")
-				return dir
-			},
-			expected: []string{
-				"model1.gguf",
-				"model2.gguf",
-			},
-		},
-		{
-			name: "finds GGUF files in subdirectories",
-			setup: func(t *testing.T) string {
-				dir := t.TempDir()
-				modelsDir := filepath.Join(dir, "models")
-				os.MkdirAll(modelsDir, 0755)
-				createTestGGUFInDir(t, modelsDir, "llama.gguf")
-
-				deepDir := filepath.Join(dir, "deep", "nested", "path")
-				os.MkdirAll(deepDir, 0755)
-				createTestGGUFInDir(t, deepDir, "mistral.gguf")
-
-				return dir
-			},
-			expected: []string{
-				"models/llama.gguf",
-				"deep/nested/path/mistral.gguf",
-			},
-		},
-		{
-			name: "ignores non-GGUF files",
-			setup: func(t *testing.T) string {
-				dir := t.TempDir()
-				createTestGGUFInDir(t, dir, "model.gguf")
-
-				// Create non-GGUF files
-				os.WriteFile(filepath.Join(dir, "readme.txt"), []byte("readme"), 0644)
-				os.WriteFile(filepath.Join(dir, "model.bin"), []byte("binary"), 0644)
-				os.WriteFile(filepath.Join(dir, "config.json"), []byte("{}"), 0644)
-
-				return dir
-			},
-			expected: []string{
-				"model.gguf",
-			},
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			fixtureDir := tt.setup(t)
-
-			pkgtest.NewCatalogTester().
-				FromDirectory(t, fixtureDir).
-				ExpectsResolverContentQueries(tt.expected).
-				TestCataloger(t, NewGGUFCataloger())
-		})
-	}
 }

 func TestGGUFCataloger_Integration(t *testing.T) {
@ -117,114 +50,15 @@ func TestGGUFCataloger_Integration(t *testing.T) {
 						pkg.NewLicenseFromFields("Apache-2.0", "", nil),
 					),
 					Metadata: pkg.GGUFFileHeader{
-						ModelFormat:     "gguf",
 						ModelName:       "llama3-8b",
 						ModelVersion:    "3.0",
 						License:         "Apache-2.0",
 						Architecture:    "llama",
 						Quantization:    "Unknown",
 						Parameters:      0,
-						GGUFVersion:     3,
-						TensorCount:     0,
-						Header:          map[string]interface{}{},
-						TruncatedHeader: false,
-					},
-				},
-			},
-			expectedRelationships: nil,
-		},
-		{
-			name: "catalog multiple GGUF files",
-			setup: func(t *testing.T) string {
-				dir := t.TempDir()
-
-				// Create first model
-				data1 := newTestGGUFBuilder().
-					withVersion(3).
-					withStringKV("general.architecture", "llama").
-					withStringKV("general.name", "model1").
-					withStringKV("general.version", "1.0").
-					build()
-				os.WriteFile(filepath.Join(dir, "model1.gguf"), data1, 0644)
-
-				// Create second model
-				data2 := newTestGGUFBuilder().
-					withVersion(3).
-					withStringKV("general.architecture", "mistral").
-					withStringKV("general.name", "model2").
-					withStringKV("general.version", "2.0").
-					build()
-				os.WriteFile(filepath.Join(dir, "model2.gguf"), data2, 0644)
-
-				return dir
-			},
-			expectedPackages: []pkg.Package{
-				{
-					Name:    "model1",
-					Version: "1.0",
-					Type:    pkg.ModelPkg,
-					Metadata: pkg.GGUFFileHeader{
-						ModelFormat:     "gguf",
-						ModelName:       "model1",
-						ModelVersion:    "1.0",
-						Architecture:    "llama",
-						Quantization:    "Unknown",
-						GGUFVersion:     3,
-						TensorCount:     0,
-						Header:          map[string]interface{}{},
-						TruncatedHeader: false,
-					},
-				},
-				{
-					Name:    "model2",
-					Version: "2.0",
-					Type:    pkg.ModelPkg,
-					Metadata: pkg.GGUFFileHeader{
-						ModelFormat:     "gguf",
-						ModelName:       "model2",
-						ModelVersion:    "2.0",
-						Architecture:    "mistral",
-						Quantization:    "Unknown",
-						GGUFVersion:     3,
-						TensorCount:     0,
-						Header:          map[string]interface{}{},
-						TruncatedHeader: false,
-					},
-				},
-			},
-			expectedRelationships: nil,
-		},
-		{
-			name: "catalog GGUF in nested directories",
-			setup: func(t *testing.T) string {
-				dir := t.TempDir()
-				nestedDir := filepath.Join(dir, "models", "quantized")
-				os.MkdirAll(nestedDir, 0755)
-
-				data := newTestGGUFBuilder().
-					withVersion(3).
-					withStringKV("general.architecture", "qwen").
-					withStringKV("general.name", "qwen-nested").
-					build()
-
-				os.WriteFile(filepath.Join(nestedDir, "qwen.gguf"), data, 0644)
-				return dir
-			},
-			expectedPackages: []pkg.Package{
-				{
-					Name:    "qwen-nested",
-					Version: unknownGGUFData,
-					Type:    pkg.ModelPkg,
-					Metadata: pkg.GGUFFileHeader{
-						ModelFormat:     "gguf",
-						ModelName:       "qwen-nested",
-						ModelVersion:    unknownGGUFData,
-						Architecture:    "qwen",
-						Quantization:    "Unknown",
-						GGUFVersion:     3,
-						TensorCount:     0,
-						Header:          map[string]interface{}{},
-						TruncatedHeader: false,
+						GGUFVersion: 3,
+						TensorCount: 0,
+						Header:      map[string]interface{}{},
 					},
 				},
 			},
@ -252,122 +86,7 @@ func TestGGUFCataloger_Integration(t *testing.T) {
 	}
 }

-func TestGGUFCataloger_SkipsInvalidFiles(t *testing.T) {
-	dir := t.TempDir()
-
-	// Create a valid GGUF
-	validData := newTestGGUFBuilder().
-		withVersion(3).
-		withStringKV("general.architecture", "llama").
-		withStringKV("general.name", "valid-model").
-		build()
-	os.WriteFile(filepath.Join(dir, "valid.gguf"), validData, 0644)
-
-	// Create an invalid GGUF (wrong magic)
-	invalidData := newTestGGUFBuilder().buildInvalidMagic()
-	os.WriteFile(filepath.Join(dir, "invalid.gguf"), invalidData, 0644)
-
-	// Create a truncated GGUF
-	os.WriteFile(filepath.Join(dir, "truncated.gguf"), []byte{0x47}, 0644)
-
-	// Catalog should succeed and only return the valid package
-	tester := pkgtest.NewCatalogTester().
-		FromDirectory(t, dir).
-		ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
-			// Should only find the valid model
-			require.Len(t, pkgs, 1)
-			assert.Equal(t, "valid-model", pkgs[0].Name)
-		})
-
-	tester.TestCataloger(t, NewGGUFCataloger())
-}
-
 func TestGGUFCataloger_Name(t *testing.T) {
 	cataloger := NewGGUFCataloger()
 	assert.Equal(t, "gguf-cataloger", cataloger.Name())
 }
-
-func TestGGUFCataloger_EmptyDirectory(t *testing.T) {
-	dir := t.TempDir()
-	// Create a subdirectory to ensure glob still runs
-	os.MkdirAll(filepath.Join(dir, "models"), 0755)
-
-	tester := pkgtest.NewCatalogTester().
-		FromDirectory(t, dir).
-		ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, rels []artifact.Relationship) {
-			assert.Empty(t, pkgs)
-			assert.Empty(t, rels)
-		})
-
-	tester.TestCataloger(t, NewGGUFCataloger())
-}
-
-func TestGGUFCataloger_MixedFiles(t *testing.T) {
-	dir := t.TempDir()
-
-	// Create GGUF file
-	ggufData := newTestGGUFBuilder().
-		withVersion(3).
-		withStringKV("general.architecture", "llama").
-		withStringKV("general.name", "test-model").
-		build()
-	os.WriteFile(filepath.Join(dir, "model.gguf"), ggufData, 0644)
-
-	// Create other file types
-	os.WriteFile(filepath.Join(dir, "README.md"), []byte("# Models"), 0644)
-	os.WriteFile(filepath.Join(dir, "config.json"), []byte("{}"), 0644)
-	os.WriteFile(filepath.Join(dir, "weights.bin"), []byte("weights"), 0644)
-	os.MkdirAll(filepath.Join(dir, "subdir"), 0755)
-
-	tester := pkgtest.NewCatalogTester().
-		FromDirectory(t, dir).
-		ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
-			// Should only find the GGUF model
-			require.Len(t, pkgs, 1)
-			assert.Equal(t, "test-model", pkgs[0].Name)
-			assert.Equal(t, pkg.ModelPkg, pkgs[0].Type)
-		})
-
-	tester.TestCataloger(t, NewGGUFCataloger())
-}
-
-func TestGGUFCataloger_CaseInsensitiveGlob(t *testing.T) {
-	// Test that the glob pattern is case-sensitive (as expected for **/*.gguf)
-	dir := t.TempDir()
-
-	// Create lowercase .gguf
-	data := newTestGGUFBuilder().
-		withVersion(3).
-		withStringKV("general.architecture", "llama").
-		withStringKV("general.name", "lowercase").
-		build()
-	os.WriteFile(filepath.Join(dir, "model.gguf"), data, 0644)
-
-	// Create uppercase .GGUF (should not match the glob)
-	os.WriteFile(filepath.Join(dir, "MODEL.GGUF"), data, 0644)
-
-	tester := pkgtest.NewCatalogTester().
-		FromDirectory(t, dir).
-		ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
-			// Depending on filesystem case-sensitivity, we may get 1 or 2 packages
-			// On case-insensitive filesystems (macOS), both might match
-			// On case-sensitive filesystems (Linux), only lowercase matches
-			assert.GreaterOrEqual(t, len(pkgs), 1, "should find at least the lowercase file")
-		})
-
-	tester.TestCataloger(t, NewGGUFCataloger())
-}
-
-// createTestGGUFInDir creates a minimal test GGUF file in the specified directory
-func createTestGGUFInDir(t *testing.T, dir, filename string) {
-	t.Helper()
-	data := newTestGGUFBuilder().
-		withVersion(3).
-		withStringKV("general.architecture", "llama").
-		withStringKV("general.name", "test-model").
-		build()
-
-	path := filepath.Join(dir, filename)
-	err := os.WriteFile(path, data, 0644)
-	require.NoError(t, err)
-}
--- a/syft/pkg/cataloger/ai/package.go
+++ b/syft/pkg/cataloger/ai/package.go
@ -1,10 +1,11 @@
 package ai

 import (
-	"crypto/sha256"
 	"encoding/json"
 	"fmt"

+	"github.com/cespare/xxhash/v2"
+
 	"github.com/anchore/syft/internal/log"
 	"github.com/anchore/syft/syft/file"
 	"github.com/anchore/syft/syft/pkg"
@ -48,7 +49,6 @@ func computeMetadataHash(metadata *pkg.GGUFFileHeader) string {
 		GGUFVersion  uint32
 		TensorCount  uint64
 	}{
-		Format:       metadata.ModelFormat,
 		Name:         metadata.ModelName,
 		Version:      metadata.ModelVersion,
 		Architecture: metadata.Architecture,
@ -63,7 +63,7 @@ func computeMetadataHash(metadata *pkg.GGUFFileHeader) string {
 		return ""
 	}

-	// Compute SHA256 hash
-	hash := sha256.Sum256(jsonBytes)
-	return fmt.Sprintf("%x", hash[:8]) // Use first 8 bytes (16 hex chars)
+	// Compute xxhash
+	hash := xxhash.Sum64(jsonBytes)
+	return fmt.Sprintf("%016x", hash) // 16 hex chars (64 bits)
 }
--- a/syft/pkg/cataloger/ai/package_test.go
+++ b/syft/pkg/cataloger/ai/package_test.go
@ -21,17 +21,15 @@ func TestNewGGUFPackage(t *testing.T) {
 		{
 			name: "complete GGUF package with all fields",
 			metadata: &pkg.GGUFFileHeader{
-				ModelFormat:     "gguf",
 				ModelName:       "llama3-8b-instruct",
 				ModelVersion:    "3.0",
 				License:         "Apache-2.0",
 				Architecture:    "llama",
 				Quantization:    "Q4_K_M",
 				Parameters:      8030000000,
-				GGUFVersion:     3,
-				TensorCount:     291,
-				Header:          map[string]any{},
-				TruncatedHeader: false,
+				GGUFVersion: 3,
+				TensorCount: 291,
+				Header:      map[string]any{},
 			},
 			locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
 			checkFunc: func(t *testing.T, p pkg.Package) {
@ -55,7 +53,6 @@ func TestNewGGUFPackage(t *testing.T) {
 		{
 			name: "minimal GGUF package",
 			metadata: &pkg.GGUFFileHeader{
-				ModelFormat:  "gguf",
 				ModelName:    "simple-model",
 				ModelVersion: "1.0",
 				Architecture: "gpt2",
@ -80,7 +77,6 @@ func TestNewGGUFPackage(t *testing.T) {
 		{
 			name: "GGUF package with multiple locations",
 			metadata: &pkg.GGUFFileHeader{
-				ModelFormat:  "gguf",
 				ModelName:    "multi-location-model",
 				ModelVersion: "1.5",
 				Architecture: "llama",
--- a/syft/pkg/cataloger/ai/parse_gguf.go
+++ b/syft/pkg/cataloger/ai/parse_gguf.go
@ -14,19 +14,14 @@ const (
 	maxHeaderSize   = 50 * 1024 * 1024 // 50MB for large tokenizer vocabularies
 )

-// ggufHeaderReader reads just the header portion of a GGUF file efficiently
-type ggufHeaderReader struct {
-	reader io.Reader
-}
-
 // readHeader reads only the GGUF header (metadata) without reading tensor data
 // This is much more efficient than reading the entire file
 // The reader should be wrapped with io.LimitedReader to prevent OOM issues
-func (r *ggufHeaderReader) readHeader() ([]byte, error) {
+func readHeader(r io.Reader) ([]byte, error) {
 	// Read initial chunk to determine header size
 	// GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info
 	initialBuf := make([]byte, 24) // Enough for magic, version, tensor count, and kv count
-	if _, err := io.ReadFull(r.reader, initialBuf); err != nil {
+	if _, err := io.ReadFull(r, initialBuf); err != nil {
 		return nil, fmt.Errorf("failed to read GGUF header prefix: %w", err)
 	}

@ -45,7 +40,7 @@ func (r *ggufHeaderReader) readHeader() ([]byte, error) {
 	// The LimitedReader will return EOF once maxHeaderSize is reached
 	buf := make([]byte, 64*1024) // 64KB chunks
 	for {
-		n, err := r.reader.Read(buf)
+		n, err := r.Read(buf)
 		if n > 0 {
 			headerData = append(headerData, buf[:n]...)
 		}
@ -65,24 +60,14 @@ func (r *ggufHeaderReader) readHeader() ([]byte, error) {
 func convertGGUFMetadataKVs(kvs gguf_parser.GGUFMetadataKVs) map[string]interface{} {
 	result := make(map[string]interface{})

-	// Limit KV pairs to avoid bloat
-	const maxKVPairs = 200
-	count := 0
-
 	for _, kv := range kvs {
-		if count >= maxKVPairs {
-			break
-		}
-
 		// Skip standard fields that are extracted separately
 		switch kv.Key {
 		case "general.architecture", "general.name", "general.license",
 			"general.version", "general.parameter_count", "general.quantization":
 			continue
 		}
-
 		result[kv.Key] = kv.Value
-		count++
 	}

 	return result
--- a/syft/pkg/cataloger/ai/parse_gguf_model.go
+++ b/syft/pkg/cataloger/ai/parse_gguf_model.go
@ -2,15 +2,19 @@ package ai

 import (
 	"context"
+	"encoding/json"
 	"fmt"
 	"io"
 	"os"
 	"path/filepath"
+	"sort"
 	"strings"

+	"github.com/cespare/xxhash/v2"
 	gguf_parser "github.com/gpustack/gguf-parser-go"

 	"github.com/anchore/syft/internal"
+	"github.com/anchore/syft/internal/log"
 	"github.com/anchore/syft/internal/unknown"
 	"github.com/anchore/syft/syft/artifact"
 	"github.com/anchore/syft/syft/file"
@ -18,8 +22,6 @@ import (
 	"github.com/anchore/syft/syft/pkg/cataloger/generic"
 )

-const unknownGGUFData = "unknown"
-
 // parseGGUFModel parses a GGUF model file and returns the discovered package.
 // This implementation only reads the header portion of the file, not the entire model.
 func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
@ -28,8 +30,7 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
 	// Read and validate the GGUF file header using LimitedReader to prevent OOM
 	// We use LimitedReader to cap reads at maxHeaderSize (50MB)
 	limitedReader := &io.LimitedReader{R: reader, N: maxHeaderSize}
-	headerReader := &ggufHeaderReader{reader: limitedReader}
-	headerData, err := headerReader.readHeader()
+	headerData, err := readHeader(limitedReader)
 	if err != nil {
 		return nil, nil, fmt.Errorf("failed to read GGUF header: %w", err)
 	}
@ -63,7 +64,6 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,

 	// Convert to syft metadata structure
 	syftMetadata := &pkg.GGUFFileHeader{
-		ModelFormat:     "gguf",
 		ModelName:       metadata.Name,
 		ModelVersion:    extractVersion(ggufFile.Header.MetadataKV),
 		License:         metadata.License,
@ -71,10 +71,9 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
 		Quantization:    metadata.FileTypeDescriptor,
 		Parameters:      uint64(metadata.Parameters),
 		GGUFVersion:     uint32(ggufFile.Header.Version),
-		TensorCount:     ggufFile.Header.TensorCount,
-		Header:          convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
-		TruncatedHeader: false, // We read the full header
-		Hash:            "",    // Will be computed in newGGUFPackage
+		TensorCount:  ggufFile.Header.TensorCount,
+		Header:       convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
+		MetadataHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
 	}

 	// If model name is not in metadata, use filename
@ -82,11 +81,6 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
 		syftMetadata.ModelName = extractModelNameFromPath(reader.Path())
 	}

-	// If version is still unknown, try to infer from name
-	if syftMetadata.ModelVersion == unknownGGUFData {
-		syftMetadata.ModelVersion = extractVersionFromName(syftMetadata.ModelName)
-	}
-
 	// Create package from metadata
 	p := newGGUFPackage(
 		syftMetadata,
@ -96,6 +90,27 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
 	return []pkg.Package{p}, nil, unknown.IfEmptyf([]pkg.Package{p}, "unable to parse GGUF file")
 }

+// computeKVMetadataHash computes a stable hash of the KV metadata for use as a global identifier
+func computeKVMetadataHash(metadata gguf_parser.GGUFMetadataKVs) string {
+	// Sort the KV pairs by key for stable hashing
+	sortedKVs := make([]gguf_parser.GGUFMetadataKV, len(metadata))
+	copy(sortedKVs, metadata)
+	sort.Slice(sortedKVs, func(i, j int) bool {
+		return sortedKVs[i].Key < sortedKVs[j].Key
+	})
+
+	// Marshal sorted KVs to JSON for stable hashing
+	jsonBytes, err := json.Marshal(sortedKVs)
+	if err != nil {
+		log.Debugf("failed to marshal metadata for hashing: %v", err)
+		return ""
+	}
+
+	// Compute xxhash
+	hash := xxhash.Sum64(jsonBytes)
+	return fmt.Sprintf("%016x", hash) // 16 hex chars (64 bits)
+}
+
 // extractVersion attempts to extract version from metadata KV pairs
 func extractVersion(kvs gguf_parser.GGUFMetadataKVs) string {
 	for _, kv := range kvs {
@ -105,14 +120,7 @@ func extractVersion(kvs gguf_parser.GGUFMetadataKVs) string {
 			}
 		}
 	}
-	return unknownGGUFData
-}
-
-// extractVersionFromName tries to extract version from model name
-func extractVersionFromName(_ string) string {
-	// Look for version patterns like "v1.0", "1.5b", "3.0", etc.
-	// For now, return unknown - this could be enhanced with regex
-	return unknownGGUFData
+	return ""
 }

 // extractModelNameFromPath extracts the model name from the file path
--- a/syft/pkg/gguf.go
+++ b/syft/pkg/gguf.go
@ -4,8 +4,8 @@ package pkg
 // GGUF is a binary file format used for storing model weights for the GGML library, designed for fast
 // loading and saving of models, particularly quantized large language models.
 type GGUFFileHeader struct {
-	// ModelFormat is always "gguf"
-	ModelFormat string `json:"modelFormat" cyclonedx:"modelFormat"`
+	// GGUFVersion is the GGUF format version (e.g., 3)
+	GGUFVersion uint32 `json:"ggufVersion" cyclonedx:"ggufVersion"`

 	// ModelName is the name of the model (from general.name or filename)
 	ModelName string `json:"modelName" cyclonedx:"modelName"`
@ -16,15 +16,9 @@ type GGUFFileHeader struct {
 	// FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)
 	FileSize int64 `json:"fileSize,omitempty" cyclonedx:"fileSize"`

-	// Hash is a content hash of the metadata (for stable global identifiers across remotes)
-	Hash string `json:"hash,omitempty" cyclonedx:"hash"`
-
 	// License is the license identifier (from general.license if present)
 	License string `json:"license,omitempty" cyclonedx:"license"`

-	// GGUFVersion is the GGUF format version (e.g., 3)
-	GGUFVersion uint32 `json:"ggufVersion" cyclonedx:"ggufVersion"`
-
 	// Architecture is the model architecture (from general.architecture, e.g., "qwen3moe", "llama")
 	Architecture string `json:"architecture,omitempty" cyclonedx:"architecture"`

@ -42,6 +36,10 @@ type GGUFFileHeader struct {
 	// (namespaced with general.*, llama.*, etc.) while avoiding duplication.
 	Header map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`

-	// TruncatedHeader indicates if the header was truncated during parsing (for very large headers)
-	TruncatedHeader bool `json:"truncatedHeader,omitempty" cyclonedx:"truncatedHeader"`
+	// MetadataHash is a xx64 hash of all key-value pairs from the GGUF header metadata.
+	// This hash is computed over the complete header metadata (including the fields extracted
+	// into typed fields above) and provides a stable identifier for the model configuration
+	// across different file locations or remotes. It allows matching identical models even
+	// when stored in different repositories or with different filenames.
+	MetadataHash string `json:"metadataHash,omitempty" cyclonedx:"metadataHash"`
 }