chore: schema updates with new names

Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
chore: more idiomatic copy/reader usage
2025-11-17 08:23:15 +01:00 · 2025-11-13 15:28:59 -05:00 · 2025-11-13 15:18:21 -05:00 · 2025-11-13 15:06:45 -05:00 · 2025-11-13 14:57:07 -05:00
11 changed files with 180 additions and 235 deletions
--- a/internal/packagemetadata/names.go
+++ b/internal/packagemetadata/names.go
@ -124,7 +124,7 @@ var jsonTypes = makeJSONTypes(
 	jsonNames(pkg.TerraformLockProviderEntry{}, "terraform-lock-provider-entry"),
 	jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"),
 	jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"),
-	jsonNames(pkg.GGUFFileHeader{}, "gguf-file-metadata"),
+	jsonNames(pkg.GGUFFileHeader{}, "gguf-file-header"),
 )
 func expandLegacyNameVariants(names ...string) []string {
--- a/schema/json/schema-16.0.43.json
+++ b/schema/json/schema-16.0.43.json
@ -1433,24 +1433,16 @@
      ],
      "description": "FileMetadataEntry contains filesystem-level metadata attributes such as permissions, ownership, type, and size for a cataloged file."
    },
-    "GgufFileMetadata": {
+    "GgufFileHeader": {
      "properties": {
        "ggufVersion": {
          "type": "integer",
          "description": "GGUFVersion is the GGUF format version (e.g., 3)"
        },
        "modelName": {
          "type": "string",
          "description": "ModelName is the name of the model (from general.name or filename)"
        },
        "fileSize": {
          "type": "integer",
          "description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
        },
        "license": {
          "type": "string",
          "description": "License is the license identifier (from general.license if present)"
        },
        "architecture": {
          "type": "string",
          "description": "Architecture is the model architecture (from general.architecture, e.g., \"qwen3moe\", \"llama\")"
@ -1469,17 +1461,16 @@
        },
        "header": {
          "type": "object",
-          "description": "Header contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
+          "description": "RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
        },
        "metadataHash": {
          "type": "string",
-          "description": "MetadataHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames."
+          "description": "MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames."
        }
      },
      "type": "object",
      "required": [
        "ggufVersion",
        "modelName",
        "tensorCount"
      ],
      "description": "GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file."
@ -2631,7 +2622,7 @@
              "$ref": "#/$defs/ErlangRebarLockEntry"
            },
            {
-              "$ref": "#/$defs/GgufFileMetadata"
+              "$ref": "#/$defs/GgufFileHeader"
            },
            {
              "$ref": "#/$defs/GithubActionsUseStatement"
--- a/schema/json/schema-latest.json
+++ b/schema/json/schema-latest.json
@ -1433,24 +1433,16 @@
      ],
      "description": "FileMetadataEntry contains filesystem-level metadata attributes such as permissions, ownership, type, and size for a cataloged file."
    },
-    "GgufFileMetadata": {
+    "GgufFileHeader": {
      "properties": {
        "ggufVersion": {
          "type": "integer",
          "description": "GGUFVersion is the GGUF format version (e.g., 3)"
        },
        "modelName": {
          "type": "string",
          "description": "ModelName is the name of the model (from general.name or filename)"
        },
        "fileSize": {
          "type": "integer",
          "description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
        },
        "license": {
          "type": "string",
          "description": "License is the license identifier (from general.license if present)"
        },
        "architecture": {
          "type": "string",
          "description": "Architecture is the model architecture (from general.architecture, e.g., \"qwen3moe\", \"llama\")"
@ -1469,17 +1461,16 @@
        },
        "header": {
          "type": "object",
-          "description": "Header contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
+          "description": "RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
        },
        "metadataHash": {
          "type": "string",
-          "description": "MetadataHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames."
+          "description": "MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames."
        }
      },
      "type": "object",
      "required": [
        "ggufVersion",
        "modelName",
        "tensorCount"
      ],
      "description": "GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file."
@ -2631,7 +2622,7 @@
              "$ref": "#/$defs/ErlangRebarLockEntry"
            },
            {
-              "$ref": "#/$defs/GgufFileMetadata"
+              "$ref": "#/$defs/GgufFileHeader"
            },
            {
              "$ref": "#/$defs/GithubActionsUseStatement"
--- a/syft/pkg/cataloger/ai/cataloger_test.go
+++ b/syft/pkg/cataloger/ai/cataloger_test.go
@ -5,8 +5,6 @@ import (
 	"path/filepath"
 	"testing"
 	"github.com/google/go-cmp/cmp/cmpopts"
 	"github.com/anchore/syft/syft/artifact"
 	"github.com/anchore/syft/syft/pkg"
 	"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
@ -37,7 +35,7 @@ func TestGGUFCataloger_Globs(t *testing.T) {
 	}
 }
-func TestGGUFCataloger_Integration(t *testing.T) {
+func TestGGUFCataloger(t *testing.T) {
 	tests := []struct {
 		name                  string
 		setup                 func(t *testing.T) string
@ -56,6 +54,7 @@ func TestGGUFCataloger_Integration(t *testing.T) {
 					withStringKV("general.license", "Apache-2.0").
 					withStringKV("general.quantization", "Q4_K_M").
 					withUint64KV("general.parameter_count", 8030000000).
 					withStringKV("general.some_random_kv", "foobar").
 					build()
 				path := filepath.Join(dir, "llama3-8b.gguf")
@ -71,14 +70,53 @@ func TestGGUFCataloger_Integration(t *testing.T) {
 						pkg.NewLicenseFromFields("Apache-2.0", "", nil),
 					),
 					Metadata: pkg.GGUFFileHeader{
-						ModelName:    "llama3-8b",
+						Architecture:          "llama",
-						License:      "Apache-2.0",
+						Quantization:          "Unknown",
-						Architecture: "llama",
+						Parameters:            0,
-						Quantization: "Unknown",
+						GGUFVersion:           3,
-						Parameters:   0,
+						TensorCount:           0,
-						GGUFVersion:  3,
+						MetadataKeyValuesHash: "6e3d368066455ce4",
-						TensorCount:  0,
+						RemainingKeyValues: map[string]interface{}{
-						Header:       map[string]interface{}{},
+							"general.some_random_kv": "foobar",
 						},
 					},
 				},
 			},
 			expectedRelationships: nil,
 		},
 		{
 			name: "catalog GGUF file with minimal metadata",
 			setup: func(t *testing.T) string {
 				dir := t.TempDir()
 				data := newTestGGUFBuilder().
 					withVersion(3).
 					withStringKV("general.architecture", "gpt2").
 					withStringKV("general.name", "gpt2-small").
 					withStringKV("gpt2.context_length", "1024").
 					withUint32KV("gpt2.embedding_length", 768).
 					build()
 				path := filepath.Join(dir, "gpt2-small.gguf")
 				os.WriteFile(path, data, 0644)
 				return dir
 			},
 			expectedPackages: []pkg.Package{
 				{
 					Name:     "gpt2-small",
 					Version:  "",
 					Type:     pkg.ModelPkg,
 					Licenses: pkg.NewLicenseSet(),
 					Metadata: pkg.GGUFFileHeader{
 						Architecture:          "gpt2",
 						Quantization:          "Unknown",
 						Parameters:            0,
 						GGUFVersion:           3,
 						TensorCount:           0,
 						MetadataKeyValuesHash: "9dc6f23591062a27",
 						RemainingKeyValues: map[string]interface{}{
 							"gpt2.context_length":   "1024",
 							"gpt2.embedding_length": uint32(768),
 						},
 					},
 				},
 			},
@ -91,17 +129,12 @@ func TestGGUFCataloger_Integration(t *testing.T) {
 			fixtureDir := tt.setup(t)
 			// Use pkgtest to catalog and compare
-			tester := pkgtest.NewCatalogTester().
+			pkgtest.NewCatalogTester().
 				FromDirectory(t, fixtureDir).
 				Expects(tt.expectedPackages, tt.expectedRelationships).
 				IgnoreLocationLayer().
-				IgnorePackageFields("FoundBy", "Locations"). // These are set by the cataloger
+				IgnorePackageFields("FoundBy", "Locations").
-				WithCompareOptions(
+				TestCataloger(t, NewGGUFCataloger())
 					// Ignore MetadataHash as it's computed dynamically
 					cmpopts.IgnoreFields(pkg.GGUFFileHeader{}, "MetadataHash"),
 				)
 			tester.TestCataloger(t, NewGGUFCataloger())
 		})
 	}
 }
--- a/syft/pkg/cataloger/ai/package.go
+++ b/syft/pkg/cataloger/ai/package.go
@ -5,23 +5,17 @@ import (
 	"github.com/anchore/syft/syft/pkg"
 )
-func newGGUFPackage(metadata *pkg.GGUFFileHeader, version string, locations ...file.Location) pkg.Package {
+func newGGUFPackage(metadata *pkg.GGUFFileHeader, modelName, version, license string, locations ...file.Location) pkg.Package {
 	p := pkg.Package{
-		Name:      metadata.ModelName,
+		Name:      modelName,
 		Version:   version,
 		Locations: file.NewLocationSet(locations...),
 		Type:      pkg.ModelPkg,
-		Licenses:  pkg.NewLicenseSet(),
+		Licenses:  pkg.NewLicenseSet(pkg.NewLicensesFromValues(license)...),
 		Metadata:  *metadata,
 		// NOTE: PURL is intentionally not set as the package-url spec
 		// has not yet finalized support for ML model packages
 	}
 	// Add license to the package if present in metadata
 	if metadata.License != "" {
 		p.Licenses.Add(pkg.NewLicenseFromFields(metadata.License, "", nil))
 	}
 	p.SetID()
 	return p
--- a/syft/pkg/cataloger/ai/package_test.go
+++ b/syft/pkg/cataloger/ai/package_test.go
@ -3,121 +3,119 @@ package ai
 import (
 	"testing"
 	"github.com/google/go-cmp/cmp"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/anchore/syft/syft/file"
 	"github.com/anchore/syft/syft/pkg"
 	"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
 )
 func TestNewGGUFPackage(t *testing.T) {
 	tests := []struct {
-		name      string
+		name     string
-		metadata  *pkg.GGUFFileHeader
+		metadata *pkg.GGUFFileHeader
-		version   string
+		input    struct {
-		locations []file.Location
+			modelName string
-		checkFunc func(t *testing.T, p pkg.Package)
+			version   string
 			license   string
 			locations []file.Location
 		}
 		expected pkg.Package
 	}{
 		{
-			name:    "complete GGUF package with all fields",
+			name: "complete GGUF package with all fields",
-			version: "3.0",
+			input: struct {
 				modelName string
 				version   string
 				license   string
 				locations []file.Location
 			}{
 				modelName: "llama3-8b",
 				version:   "3.0",
 				license:   "Apache-2.0",
 				locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
 			},
 			metadata: &pkg.GGUFFileHeader{
 				ModelName:    "llama3-8b-instruct",
 				License:      "Apache-2.0",
 				Architecture: "llama",
 				Quantization: "Q4_K_M",
 				Parameters:   8030000000,
 				GGUFVersion:  3,
 				TensorCount:  291,
-				Header:       map[string]any{},
+				RemainingKeyValues: map[string]any{
 					"general.random_kv": "foobar",
 				},
 			},
-			locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
+			expected: pkg.Package{
-			checkFunc: func(t *testing.T, p pkg.Package) {
+				Name:    "llama3-8b",
-				if d := cmp.Diff("llama3-8b-instruct", p.Name); d != "" {
+				Version: "3.0",
-					t.Errorf("Name mismatch (-want +got):\n%s", d)
+				Type:    pkg.ModelPkg,
-				}
+				Licenses: pkg.NewLicenseSet(
-				if d := cmp.Diff("3.0", p.Version); d != "" {
+					pkg.NewLicenseFromFields("Apache-2.0", "", nil),
-					t.Errorf("Version mismatch (-want +got):\n%s", d)
+				),
-				}
+				Metadata: pkg.GGUFFileHeader{
-				if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
+					Architecture: "llama",
-					t.Errorf("Type mismatch (-want +got):\n%s", d)
+					Quantization: "Q4_K_M",
-				}
+					Parameters:   8030000000,
-				assert.Empty(t, p.PURL, "PURL should not be set for model packages")
+					GGUFVersion:  3,
-				assert.Len(t, p.Licenses.ToSlice(), 1)
+					TensorCount:  291,
-				if d := cmp.Diff("Apache-2.0", p.Licenses.ToSlice()[0].Value); d != "" {
+					RemainingKeyValues: map[string]any{
-					t.Errorf("License value mismatch (-want +got):\n%s", d)
+						"general.random_kv": "foobar",
-				}
+					},
-				assert.NotEmpty(t, p.ID())
+				},
 				Locations: file.NewLocationSet(file.NewLocation("/models/llama3-8b.gguf")),
 			},
 		},
 		{
-			name:    "minimal GGUF package",
+			name: "minimal GGUF package",
-			version: "1.0",
+			input: struct {
 				modelName string
 				version   string
 				license   string
 				locations []file.Location
 			}{
 				modelName: "gpt2-small",
 				version:   "1.0",
 				license:   "MIT",
 				locations: []file.Location{file.NewLocation("/models/simple.gguf")},
 			},
 			metadata: &pkg.GGUFFileHeader{
 				ModelName:    "simple-model",
 				Architecture: "gpt2",
 				GGUFVersion:  3,
 				TensorCount:  50,
 			},
-			locations: []file.Location{file.NewLocation("/models/simple.gguf")},
+			expected: pkg.Package{
-			checkFunc: func(t *testing.T, p pkg.Package) {
+				Name:    "gpt2-small",
-				if d := cmp.Diff("simple-model", p.Name); d != "" {
+				Version: "1.0",
-					t.Errorf("Name mismatch (-want +got):\n%s", d)
+				Type:    pkg.ModelPkg,
-				}
+				Licenses: pkg.NewLicenseSet(
-				if d := cmp.Diff("1.0", p.Version); d != "" {
+					pkg.NewLicenseFromFields("MIT", "", nil),
-					t.Errorf("Version mismatch (-want +got):\n%s", d)
+				),
-				}
+				Metadata: pkg.GGUFFileHeader{
-				if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
+					Architecture: "gpt2",
-					t.Errorf("Type mismatch (-want +got):\n%s", d)
+					GGUFVersion:  3,
-				}
+					TensorCount:  50,
-				assert.Empty(t, p.PURL, "PURL should not be set for model packages")
+				},
-				assert.Empty(t, p.Licenses.ToSlice())
+				Locations: file.NewLocationSet(file.NewLocation("/models/simple.gguf")),
 			},
 		},
 		{
 			name:    "GGUF package with multiple locations",
 			version: "1.5",
 			metadata: &pkg.GGUFFileHeader{
 				ModelName:    "multi-location-model",
 				Architecture: "llama",
 				GGUFVersion:  3,
 				TensorCount:  150,
 			},
 			locations: []file.Location{
 				file.NewLocation("/models/model1.gguf"),
 				file.NewLocation("/models/model2.gguf"),
 			},
 			checkFunc: func(t *testing.T, p pkg.Package) {
 				assert.Len(t, p.Locations.ToSlice(), 2)
 			},
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			p := newGGUFPackage(tt.metadata, tt.version, tt.locations...)
+			actual := newGGUFPackage(
 				tt.metadata,
 				tt.input.modelName,
 				tt.input.version,
 				tt.input.license,
 				tt.input.locations...,
 			)
-			if d := cmp.Diff(tt.metadata.ModelName, p.Name); d != "" {
+			// Verify metadata type
-				t.Errorf("Name mismatch (-want +got):\n%s", d)
+			_, ok := actual.Metadata.(pkg.GGUFFileHeader)
 			}
 			if d := cmp.Diff(tt.version, p.Version); d != "" {
 				t.Errorf("Version mismatch (-want +got):\n%s", d)
 			}
 			if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
 				t.Errorf("Type mismatch (-want +got):\n%s", d)
 			}
 			// Verify metadata is attached
 			metadata, ok := p.Metadata.(pkg.GGUFFileHeader)
 			require.True(t, ok, "metadata should be GGUFFileHeader")
 			if d := cmp.Diff(*tt.metadata, metadata); d != "" {
 				t.Errorf("Metadata mismatch (-want +got):\n%s", d)
 			}
-			if tt.checkFunc != nil {
+			// Use AssertPackagesEqual for comprehensive comparison
-				tt.checkFunc(t, p)
+			pkgtest.AssertPackagesEqual(t, tt.expected, actual)
 			}
 		})
 	}
 }
--- a/syft/pkg/cataloger/ai/parse_gguf.go
+++ b/syft/pkg/cataloger/ai/parse_gguf.go
@ -14,46 +14,35 @@ const (
 	maxHeaderSize   = 50 * 1024 * 1024 // 50MB for large tokenizer vocabularies
 )
-// readHeader reads only the GGUF header (metadata) without reading tensor data
+// copyHeader copies the GGUF header from the reader to the writer.
-// This is much more efficient than reading the entire file
+// It validates the magic number first, then copies the rest of the data.
-// The reader should be wrapped with io.LimitedReader to prevent OOM issues
+// The reader should be wrapped with io.LimitedReader to prevent OOM issues.
-func readHeader(r io.Reader) ([]byte, error) {
+func copyHeader(w io.Writer, r io.Reader) error {
-	// Read initial chunk to determine header size
+	// Read initial chunk to validate magic number
 	// GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info
 	initialBuf := make([]byte, 24) // Enough for magic, version, tensor count, and kv count
 	if _, err := io.ReadFull(r, initialBuf); err != nil {
-		return nil, fmt.Errorf("failed to read GGUF header prefix: %w", err)
+		return fmt.Errorf("failed to read GGUF header prefix: %w", err)
 	}
 	// Verify magic number
 	magic := binary.LittleEndian.Uint32(initialBuf[0:4])
 	if magic != ggufMagicNumber {
-		return nil, fmt.Errorf("invalid GGUF magic number: 0x%08X", magic)
+		return fmt.Errorf("invalid GGUF magic number: 0x%08X", magic)
 	}
-	// We need to read the metadata KV pairs to know the full header size
+	// Write the initial buffer to the writer
-	// The io.LimitedReader wrapping this reader ensures we don't read more than maxHeaderSize
+	if _, err := w.Write(initialBuf); err != nil {
-	headerData := make([]byte, 0, 1024*1024) // Start with 1MB capacity
+		return fmt.Errorf("failed to write GGUF header prefix: %w", err)
-	headerData = append(headerData, initialBuf...)
+	}
-	// Read the rest of the header in larger chunks for efficiency
+	// Copy the rest of the header from reader to writer
 	// The LimitedReader will return EOF once maxHeaderSize is reached
-	buf := make([]byte, 64*1024) // 64KB chunks
+	if _, err := io.Copy(w, r); err != nil {
-	for {
+		return fmt.Errorf("failed to copy GGUF header: %w", err)
 		n, err := r.Read(buf)
 		if n > 0 {
 			headerData = append(headerData, buf[:n]...)
 		}
 		if err == io.EOF {
 			// Reached end of file or limit, we have all available data
 			break
 		}
 		if err != nil {
 			return nil, fmt.Errorf("failed to read GGUF header: %w", err)
 		}
 	}
-	return headerData, nil
+	return nil
 }
 // Helper to convert gguf_parser metadata to simpler types
--- a/syft/pkg/cataloger/ai/parse_gguf_model.go
+++ b/syft/pkg/cataloger/ai/parse_gguf_model.go
@ -27,14 +27,6 @@ import (
 func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
 	defer internal.CloseAndLogError(reader, reader.Path())
 	// Read and validate the GGUF file header using LimitedReader to prevent OOM
 	// We use LimitedReader to cap reads at maxHeaderSize (50MB)
 	limitedReader := &io.LimitedReader{R: reader, N: maxHeaderSize}
 	headerData, err := readHeader(limitedReader)
 	if err != nil {
 		return nil, nil, fmt.Errorf("failed to read GGUF header: %w", err)
 	}
 	// Create a temporary file for the library to parse
 	// The library requires a file path, so we create a temp file
 	tempFile, err := os.CreateTemp("", "syft-gguf-*.gguf")
@ -44,10 +36,12 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
 	tempPath := tempFile.Name()
 	defer os.Remove(tempPath)
-	// Write the validated header data to temp file
+	// Copy and validate the GGUF file header using LimitedReader to prevent OOM
-	if _, err := tempFile.Write(headerData); err != nil {
+	// We use LimitedReader to cap reads at maxHeaderSize (50MB)
 	limitedReader := &io.LimitedReader{R: reader, N: maxHeaderSize}
 	if err := copyHeader(tempFile, limitedReader); err != nil {
 		tempFile.Close()
-		return nil, nil, fmt.Errorf("failed to write to temp file: %w", err)
+		return nil, nil, fmt.Errorf("failed to copy GGUF header: %w", err)
 	}
 	tempFile.Close()
@ -67,26 +61,26 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
 	// Convert to syft metadata structure
 	syftMetadata := &pkg.GGUFFileHeader{
-		ModelName:    metadata.Name,
+		Architecture:          metadata.Architecture,
-		License:      metadata.License,
+		Quantization:          metadata.FileTypeDescriptor,
-		Architecture: metadata.Architecture,
+		Parameters:            uint64(metadata.Parameters),
-		Quantization: metadata.FileTypeDescriptor,
+		GGUFVersion:           uint32(ggufFile.Header.Version),
-		Parameters:   uint64(metadata.Parameters),
+		TensorCount:           ggufFile.Header.TensorCount,
-		GGUFVersion:  uint32(ggufFile.Header.Version),
+		RemainingKeyValues:    convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
-		TensorCount:  ggufFile.Header.TensorCount,
+		MetadataKeyValuesHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
 		Header:       convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
 		MetadataHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
 	}
 	// If model name is not in metadata, use filename
-	if syftMetadata.ModelName == "" {
+	if metadata.Name == "" {
-		syftMetadata.ModelName = extractModelNameFromPath(reader.Path())
+		metadata.Name = extractModelNameFromPath(reader.Path())
 	}
 	// Create package from metadata
 	p := newGGUFPackage(
 		syftMetadata,
 		metadata.Name,
 		modelVersion,
 		metadata.License,
 		reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
 	)
--- a/syft/pkg/cataloger/ai/test_builder_test.go
+++ b/syft/pkg/cataloger/ai/test_builder_test.go
@ -1,41 +0,0 @@
 package ai
 import (
 	"fmt"
 	"os"
 	gguf_parser "github.com/gpustack/gguf-parser-go"
 )
 func main() {
 	// Create a test GGUF file
 	data := newTestGGUFBuilder().
 		withVersion(3).
 		withStringKV("general.architecture", "llama").
 		withStringKV("general.name", "test-model").
 		build()
 	// Write to temp file
 	tempFile, err := os.CreateTemp("", "test-*.gguf")
 	if err != nil {
 		panic(err)
 	}
 	defer os.Remove(tempFile.Name())
 	if _, err := tempFile.Write(data); err != nil {
 		panic(err)
 	}
 	tempFile.Close()
 	fmt.Printf("Wrote %d bytes to %s\n", len(data), tempFile.Name())
 	// Try to parse it
 	fmt.Println("Attempting to parse...")
 	gf, err := gguf_parser.ParseGGUFFile(tempFile.Name(), gguf_parser.SkipLargeMetadata())
 	if err != nil {
 		fmt.Printf("Parse error: %v\n", err)
 		return
 	}
 	fmt.Printf("Success! Model: %s\n", gf.Metadata().Name)
 }
--- a/syft/pkg/cataloger/ai/test_helpers_test.go
+++ b/syft/pkg/cataloger/ai/test_helpers_test.go
@ -6,6 +6,7 @@ import (
 )
 // GGUF type constants for test builder
 // https://github.com/ggml-org/ggml/blob/master/docs/gguf.md
 const (
 	ggufMagic       = 0x46554747 // "GGUF" in little-endian
 	ggufTypeUint8   = 0
--- a/syft/pkg/gguf.go
+++ b/syft/pkg/gguf.go
@ -3,19 +3,14 @@ package pkg
 // GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file.
 // GGUF is a binary file format used for storing model weights for the GGML library, designed for fast
 // loading and saving of models, particularly quantized large language models.
 // The Model Name, License, and Version fields have all been lifted up to be on the syft Package.
 type GGUFFileHeader struct {
 	// GGUFVersion is the GGUF format version (e.g., 3)
 	GGUFVersion uint32 `json:"ggufVersion" cyclonedx:"ggufVersion"`
 	// ModelName is the name of the model (from general.name or filename)
 	ModelName string `json:"modelName" cyclonedx:"modelName"`
 	// FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)
 	FileSize int64 `json:"fileSize,omitempty" cyclonedx:"fileSize"`
 	// License is the license identifier (from general.license if present)
 	License string `json:"license,omitempty" cyclonedx:"license"`
 	// Architecture is the model architecture (from general.architecture, e.g., "qwen3moe", "llama")
 	Architecture string `json:"architecture,omitempty" cyclonedx:"architecture"`
@ -28,15 +23,15 @@ type GGUFFileHeader struct {
 	// TensorCount is the number of tensors in the model
 	TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"`
-	// Header contains the remaining key-value pairs from the GGUF header that are not already
+	// RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already
 	// represented as typed fields above. This preserves additional metadata fields for reference
 	// (namespaced with general.*, llama.*, etc.) while avoiding duplication.
-	Header map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
+	RemainingKeyValues map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
-	// MetadataHash is a xx64 hash of all key-value pairs from the GGUF header metadata.
+	// MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.
 	// This hash is computed over the complete header metadata (including the fields extracted
 	// into typed fields above) and provides a stable identifier for the model configuration
 	// across different file locations or remotes. It allows matching identical models even
 	// when stored in different repositories or with different filenames.
-	MetadataHash string `json:"metadataHash,omitempty" cyclonedx:"metadataHash"`
+	MetadataKeyValuesHash string `json:"metadataHash,omitempty" cyclonedx:"metadataHash"`
 }
Author	SHA1	Message	Date
Christopher Phillips	f1839215c6	chore: schema updates with new names Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>	2025-11-13 15:28:59 -05:00
Christopher Phillips	8706ff8310	chore: more idiomatic copy/reader usage Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>	2025-11-13 15:18:21 -05:00
Christopher Phillips	e58e6317d2	chore: pr comments Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>	2025-11-13 15:06:45 -05:00
Christopher Phillips	b1c8478d55	chore: pr comments Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>	2025-11-13 14:57:07 -05:00