chore: schema updates with new names

Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
chore: more idiomatic copy/reader usage
2025-11-17 00:13:15 +01:00 · 2025-11-13 15:28:59 -05:00 · 2025-11-13 15:18:21 -05:00 · 2025-11-13 15:06:45 -05:00 · 2025-11-13 14:57:07 -05:00
11 changed files with 180 additions and 235 deletions
--- a/internal/packagemetadata/names.go
+++ b/internal/packagemetadata/names.go
@ -124,7 +124,7 @@ var jsonTypes = makeJSONTypes(
 	jsonNames(pkg.TerraformLockProviderEntry{}, "terraform-lock-provider-entry"),
 	jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"),
 	jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"),
-	jsonNames(pkg.GGUFFileHeader{}, "gguf-file-metadata"),
+	jsonNames(pkg.GGUFFileHeader{}, "gguf-file-header"),
 )

 func expandLegacyNameVariants(names ...string) []string {
--- a/schema/json/schema-16.0.43.json
+++ b/schema/json/schema-16.0.43.json
@ -1433,24 +1433,16 @@
      ],
      "description": "FileMetadataEntry contains filesystem-level metadata attributes such as permissions, ownership, type, and size for a cataloged file."
    },
-    "GgufFileMetadata": {
+    "GgufFileHeader": {
      "properties": {
        "ggufVersion": {
          "type": "integer",
          "description": "GGUFVersion is the GGUF format version (e.g., 3)"
        },
-        "modelName": {
-          "type": "string",
-          "description": "ModelName is the name of the model (from general.name or filename)"
-        },
        "fileSize": {
          "type": "integer",
          "description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
        },
-        "license": {
-          "type": "string",
-          "description": "License is the license identifier (from general.license if present)"
-        },
        "architecture": {
          "type": "string",
          "description": "Architecture is the model architecture (from general.architecture, e.g., \"qwen3moe\", \"llama\")"
@ -1469,17 +1461,16 @@
        },
        "header": {
          "type": "object",
-          "description": "Header contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
+          "description": "RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
        },
        "metadataHash": {
          "type": "string",
-          "description": "MetadataHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames."
+          "description": "MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames."
        }
      },
      "type": "object",
      "required": [
        "ggufVersion",
-        "modelName",
        "tensorCount"
      ],
      "description": "GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file."
@ -2631,7 +2622,7 @@
              "$ref": "#/$defs/ErlangRebarLockEntry"
            },
            {
-              "$ref": "#/$defs/GgufFileMetadata"
+              "$ref": "#/$defs/GgufFileHeader"
            },
            {
              "$ref": "#/$defs/GithubActionsUseStatement"
--- a/schema/json/schema-latest.json
+++ b/schema/json/schema-latest.json
@ -1433,24 +1433,16 @@
      ],
      "description": "FileMetadataEntry contains filesystem-level metadata attributes such as permissions, ownership, type, and size for a cataloged file."
    },
-    "GgufFileMetadata": {
+    "GgufFileHeader": {
      "properties": {
        "ggufVersion": {
          "type": "integer",
          "description": "GGUFVersion is the GGUF format version (e.g., 3)"
        },
-        "modelName": {
-          "type": "string",
-          "description": "ModelName is the name of the model (from general.name or filename)"
-        },
        "fileSize": {
          "type": "integer",
          "description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
        },
-        "license": {
-          "type": "string",
-          "description": "License is the license identifier (from general.license if present)"
-        },
        "architecture": {
          "type": "string",
          "description": "Architecture is the model architecture (from general.architecture, e.g., \"qwen3moe\", \"llama\")"
@ -1469,17 +1461,16 @@
        },
        "header": {
          "type": "object",
-          "description": "Header contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
+          "description": "RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
        },
        "metadataHash": {
          "type": "string",
-          "description": "MetadataHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames."
+          "description": "MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames."
        }
      },
      "type": "object",
      "required": [
        "ggufVersion",
-        "modelName",
        "tensorCount"
      ],
      "description": "GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file."
@ -2631,7 +2622,7 @@
              "$ref": "#/$defs/ErlangRebarLockEntry"
            },
            {
-              "$ref": "#/$defs/GgufFileMetadata"
+              "$ref": "#/$defs/GgufFileHeader"
            },
            {
              "$ref": "#/$defs/GithubActionsUseStatement"
--- a/syft/pkg/cataloger/ai/cataloger_test.go
+++ b/syft/pkg/cataloger/ai/cataloger_test.go
@ -5,8 +5,6 @@ import (
 	"path/filepath"
 	"testing"

-	"github.com/google/go-cmp/cmp/cmpopts"
-
 	"github.com/anchore/syft/syft/artifact"
 	"github.com/anchore/syft/syft/pkg"
 	"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
@ -37,7 +35,7 @@ func TestGGUFCataloger_Globs(t *testing.T) {
 	}
 }

-func TestGGUFCataloger_Integration(t *testing.T) {
+func TestGGUFCataloger(t *testing.T) {
 	tests := []struct {
 		name                  string
 		setup                 func(t *testing.T) string
@ -56,6 +54,7 @@ func TestGGUFCataloger_Integration(t *testing.T) {
 					withStringKV("general.license", "Apache-2.0").
 					withStringKV("general.quantization", "Q4_K_M").
 					withUint64KV("general.parameter_count", 8030000000).
+					withStringKV("general.some_random_kv", "foobar").
 					build()

 				path := filepath.Join(dir, "llama3-8b.gguf")
@ -71,14 +70,53 @@ func TestGGUFCataloger_Integration(t *testing.T) {
 						pkg.NewLicenseFromFields("Apache-2.0", "", nil),
 					),
 					Metadata: pkg.GGUFFileHeader{
-						ModelName:    "llama3-8b",
-						License:      "Apache-2.0",
 						Architecture:          "llama",
 						Quantization:          "Unknown",
 						Parameters:            0,
 						GGUFVersion:           3,
 						TensorCount:           0,
-						Header:       map[string]interface{}{},
+						MetadataKeyValuesHash: "6e3d368066455ce4",
+						RemainingKeyValues: map[string]interface{}{
+							"general.some_random_kv": "foobar",
+						},
+					},
+				},
+			},
+			expectedRelationships: nil,
+		},
+		{
+			name: "catalog GGUF file with minimal metadata",
+			setup: func(t *testing.T) string {
+				dir := t.TempDir()
+				data := newTestGGUFBuilder().
+					withVersion(3).
+					withStringKV("general.architecture", "gpt2").
+					withStringKV("general.name", "gpt2-small").
+					withStringKV("gpt2.context_length", "1024").
+					withUint32KV("gpt2.embedding_length", 768).
+					build()
+
+				path := filepath.Join(dir, "gpt2-small.gguf")
+				os.WriteFile(path, data, 0644)
+				return dir
+			},
+			expectedPackages: []pkg.Package{
+				{
+					Name:     "gpt2-small",
+					Version:  "",
+					Type:     pkg.ModelPkg,
+					Licenses: pkg.NewLicenseSet(),
+					Metadata: pkg.GGUFFileHeader{
+						Architecture:          "gpt2",
+						Quantization:          "Unknown",
+						Parameters:            0,
+						GGUFVersion:           3,
+						TensorCount:           0,
+						MetadataKeyValuesHash: "9dc6f23591062a27",
+						RemainingKeyValues: map[string]interface{}{
+							"gpt2.context_length":   "1024",
+							"gpt2.embedding_length": uint32(768),
+						},
 					},
 				},
 			},
@ -91,17 +129,12 @@ func TestGGUFCataloger_Integration(t *testing.T) {
 			fixtureDir := tt.setup(t)

 			// Use pkgtest to catalog and compare
-			tester := pkgtest.NewCatalogTester().
+			pkgtest.NewCatalogTester().
 				FromDirectory(t, fixtureDir).
 				Expects(tt.expectedPackages, tt.expectedRelationships).
 				IgnoreLocationLayer().
-				IgnorePackageFields("FoundBy", "Locations"). // These are set by the cataloger
-				WithCompareOptions(
-					// Ignore MetadataHash as it's computed dynamically
-					cmpopts.IgnoreFields(pkg.GGUFFileHeader{}, "MetadataHash"),
-				)
-
-			tester.TestCataloger(t, NewGGUFCataloger())
+				IgnorePackageFields("FoundBy", "Locations").
+				TestCataloger(t, NewGGUFCataloger())
 		})
 	}
 }
--- a/syft/pkg/cataloger/ai/package.go
+++ b/syft/pkg/cataloger/ai/package.go
@ -5,23 +5,17 @@ import (
 	"github.com/anchore/syft/syft/pkg"
 )

-func newGGUFPackage(metadata *pkg.GGUFFileHeader, version string, locations ...file.Location) pkg.Package {
+func newGGUFPackage(metadata *pkg.GGUFFileHeader, modelName, version, license string, locations ...file.Location) pkg.Package {
 	p := pkg.Package{
-		Name:      metadata.ModelName,
+		Name:      modelName,
 		Version:   version,
 		Locations: file.NewLocationSet(locations...),
 		Type:      pkg.ModelPkg,
-		Licenses:  pkg.NewLicenseSet(),
+		Licenses:  pkg.NewLicenseSet(pkg.NewLicensesFromValues(license)...),
 		Metadata:  *metadata,
 		// NOTE: PURL is intentionally not set as the package-url spec
 		// has not yet finalized support for ML model packages
 	}
-
-	// Add license to the package if present in metadata
-	if metadata.License != "" {
-		p.Licenses.Add(pkg.NewLicenseFromFields(metadata.License, "", nil))
-	}
-
 	p.SetID()

 	return p
--- a/syft/pkg/cataloger/ai/package_test.go
+++ b/syft/pkg/cataloger/ai/package_test.go
@ -3,121 +3,119 @@ package ai
 import (
 	"testing"

-	"github.com/google/go-cmp/cmp"
-	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"

 	"github.com/anchore/syft/syft/file"
 	"github.com/anchore/syft/syft/pkg"
+	"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
 )

 func TestNewGGUFPackage(t *testing.T) {
 	tests := []struct {
 		name     string
 		metadata *pkg.GGUFFileHeader
+		input    struct {
+			modelName string
 			version   string
+			license   string
 			locations []file.Location
-		checkFunc func(t *testing.T, p pkg.Package)
+		}
+		expected pkg.Package
 	}{
 		{
 			name: "complete GGUF package with all fields",
+			input: struct {
+				modelName string
+				version   string
+				license   string
+				locations []file.Location
+			}{
+				modelName: "llama3-8b",
 				version:   "3.0",
+				license:   "Apache-2.0",
+				locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
+			},
 			metadata: &pkg.GGUFFileHeader{
-				ModelName:    "llama3-8b-instruct",
-				License:      "Apache-2.0",
 				Architecture: "llama",
 				Quantization: "Q4_K_M",
 				Parameters:   8030000000,
 				GGUFVersion:  3,
 				TensorCount:  291,
-				Header:       map[string]any{},
+				RemainingKeyValues: map[string]any{
+					"general.random_kv": "foobar",
 				},
-			locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
-			checkFunc: func(t *testing.T, p pkg.Package) {
-				if d := cmp.Diff("llama3-8b-instruct", p.Name); d != "" {
-					t.Errorf("Name mismatch (-want +got):\n%s", d)
-				}
-				if d := cmp.Diff("3.0", p.Version); d != "" {
-					t.Errorf("Version mismatch (-want +got):\n%s", d)
-				}
-				if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
-					t.Errorf("Type mismatch (-want +got):\n%s", d)
-				}
-				assert.Empty(t, p.PURL, "PURL should not be set for model packages")
-				assert.Len(t, p.Licenses.ToSlice(), 1)
-				if d := cmp.Diff("Apache-2.0", p.Licenses.ToSlice()[0].Value); d != "" {
-					t.Errorf("License value mismatch (-want +got):\n%s", d)
-				}
-				assert.NotEmpty(t, p.ID())
+			},
+			expected: pkg.Package{
+				Name:    "llama3-8b",
+				Version: "3.0",
+				Type:    pkg.ModelPkg,
+				Licenses: pkg.NewLicenseSet(
+					pkg.NewLicenseFromFields("Apache-2.0", "", nil),
+				),
+				Metadata: pkg.GGUFFileHeader{
+					Architecture: "llama",
+					Quantization: "Q4_K_M",
+					Parameters:   8030000000,
+					GGUFVersion:  3,
+					TensorCount:  291,
+					RemainingKeyValues: map[string]any{
+						"general.random_kv": "foobar",
+					},
+				},
+				Locations: file.NewLocationSet(file.NewLocation("/models/llama3-8b.gguf")),
 			},
 		},
 		{
 			name: "minimal GGUF package",
+			input: struct {
+				modelName string
+				version   string
+				license   string
+				locations []file.Location
+			}{
+				modelName: "gpt2-small",
 				version:   "1.0",
+				license:   "MIT",
+				locations: []file.Location{file.NewLocation("/models/simple.gguf")},
+			},
 			metadata: &pkg.GGUFFileHeader{
-				ModelName:    "simple-model",
 				Architecture: "gpt2",
 				GGUFVersion:  3,
 				TensorCount:  50,
 			},
-			locations: []file.Location{file.NewLocation("/models/simple.gguf")},
-			checkFunc: func(t *testing.T, p pkg.Package) {
-				if d := cmp.Diff("simple-model", p.Name); d != "" {
-					t.Errorf("Name mismatch (-want +got):\n%s", d)
-				}
-				if d := cmp.Diff("1.0", p.Version); d != "" {
-					t.Errorf("Version mismatch (-want +got):\n%s", d)
-				}
-				if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
-					t.Errorf("Type mismatch (-want +got):\n%s", d)
-				}
-				assert.Empty(t, p.PURL, "PURL should not be set for model packages")
-				assert.Empty(t, p.Licenses.ToSlice())
-			},
-		},
-		{
-			name:    "GGUF package with multiple locations",
-			version: "1.5",
-			metadata: &pkg.GGUFFileHeader{
-				ModelName:    "multi-location-model",
-				Architecture: "llama",
+			expected: pkg.Package{
+				Name:    "gpt2-small",
+				Version: "1.0",
+				Type:    pkg.ModelPkg,
+				Licenses: pkg.NewLicenseSet(
+					pkg.NewLicenseFromFields("MIT", "", nil),
+				),
+				Metadata: pkg.GGUFFileHeader{
+					Architecture: "gpt2",
 					GGUFVersion:  3,
-				TensorCount:  150,
+					TensorCount:  50,
 				},
-			locations: []file.Location{
-				file.NewLocation("/models/model1.gguf"),
-				file.NewLocation("/models/model2.gguf"),
-			},
-			checkFunc: func(t *testing.T, p pkg.Package) {
-				assert.Len(t, p.Locations.ToSlice(), 2)
+				Locations: file.NewLocationSet(file.NewLocation("/models/simple.gguf")),
 			},
 		},
 	}

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			p := newGGUFPackage(tt.metadata, tt.version, tt.locations...)
+			actual := newGGUFPackage(
+				tt.metadata,
+				tt.input.modelName,
+				tt.input.version,
+				tt.input.license,
+				tt.input.locations...,
+			)

-			if d := cmp.Diff(tt.metadata.ModelName, p.Name); d != "" {
-				t.Errorf("Name mismatch (-want +got):\n%s", d)
-			}
-			if d := cmp.Diff(tt.version, p.Version); d != "" {
-				t.Errorf("Version mismatch (-want +got):\n%s", d)
-			}
-			if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
-				t.Errorf("Type mismatch (-want +got):\n%s", d)
-			}
-
-			// Verify metadata is attached
-			metadata, ok := p.Metadata.(pkg.GGUFFileHeader)
+			// Verify metadata type
+			_, ok := actual.Metadata.(pkg.GGUFFileHeader)
 			require.True(t, ok, "metadata should be GGUFFileHeader")
-			if d := cmp.Diff(*tt.metadata, metadata); d != "" {
-				t.Errorf("Metadata mismatch (-want +got):\n%s", d)
-			}

-			if tt.checkFunc != nil {
-				tt.checkFunc(t, p)
-			}
+			// Use AssertPackagesEqual for comprehensive comparison
+			pkgtest.AssertPackagesEqual(t, tt.expected, actual)
 		})
 	}
 }
--- a/syft/pkg/cataloger/ai/parse_gguf.go
+++ b/syft/pkg/cataloger/ai/parse_gguf.go
@ -14,46 +14,35 @@ const (
 	maxHeaderSize   = 50 * 1024 * 1024 // 50MB for large tokenizer vocabularies
 )

-// readHeader reads only the GGUF header (metadata) without reading tensor data
-// This is much more efficient than reading the entire file
-// The reader should be wrapped with io.LimitedReader to prevent OOM issues
-func readHeader(r io.Reader) ([]byte, error) {
-	// Read initial chunk to determine header size
+// copyHeader copies the GGUF header from the reader to the writer.
+// It validates the magic number first, then copies the rest of the data.
+// The reader should be wrapped with io.LimitedReader to prevent OOM issues.
+func copyHeader(w io.Writer, r io.Reader) error {
+	// Read initial chunk to validate magic number
 	// GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info
 	initialBuf := make([]byte, 24) // Enough for magic, version, tensor count, and kv count
 	if _, err := io.ReadFull(r, initialBuf); err != nil {
-		return nil, fmt.Errorf("failed to read GGUF header prefix: %w", err)
+		return fmt.Errorf("failed to read GGUF header prefix: %w", err)
 	}

 	// Verify magic number
 	magic := binary.LittleEndian.Uint32(initialBuf[0:4])
 	if magic != ggufMagicNumber {
-		return nil, fmt.Errorf("invalid GGUF magic number: 0x%08X", magic)
+		return fmt.Errorf("invalid GGUF magic number: 0x%08X", magic)
 	}

-	// We need to read the metadata KV pairs to know the full header size
-	// The io.LimitedReader wrapping this reader ensures we don't read more than maxHeaderSize
-	headerData := make([]byte, 0, 1024*1024) // Start with 1MB capacity
-	headerData = append(headerData, initialBuf...)
+	// Write the initial buffer to the writer
+	if _, err := w.Write(initialBuf); err != nil {
+		return fmt.Errorf("failed to write GGUF header prefix: %w", err)
+	}

-	// Read the rest of the header in larger chunks for efficiency
+	// Copy the rest of the header from reader to writer
 	// The LimitedReader will return EOF once maxHeaderSize is reached
-	buf := make([]byte, 64*1024) // 64KB chunks
-	for {
-		n, err := r.Read(buf)
-		if n > 0 {
-			headerData = append(headerData, buf[:n]...)
-		}
-		if err == io.EOF {
-			// Reached end of file or limit, we have all available data
-			break
-		}
-		if err != nil {
-			return nil, fmt.Errorf("failed to read GGUF header: %w", err)
-		}
+	if _, err := io.Copy(w, r); err != nil {
+		return fmt.Errorf("failed to copy GGUF header: %w", err)
 	}

-	return headerData, nil
+	return nil
 }

 // Helper to convert gguf_parser metadata to simpler types
--- a/syft/pkg/cataloger/ai/parse_gguf_model.go
+++ b/syft/pkg/cataloger/ai/parse_gguf_model.go
@ -27,14 +27,6 @@ import (
 func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
 	defer internal.CloseAndLogError(reader, reader.Path())

-	// Read and validate the GGUF file header using LimitedReader to prevent OOM
-	// We use LimitedReader to cap reads at maxHeaderSize (50MB)
-	limitedReader := &io.LimitedReader{R: reader, N: maxHeaderSize}
-	headerData, err := readHeader(limitedReader)
-	if err != nil {
-		return nil, nil, fmt.Errorf("failed to read GGUF header: %w", err)
-	}
-
 	// Create a temporary file for the library to parse
 	// The library requires a file path, so we create a temp file
 	tempFile, err := os.CreateTemp("", "syft-gguf-*.gguf")
@ -44,10 +36,12 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
 	tempPath := tempFile.Name()
 	defer os.Remove(tempPath)

-	// Write the validated header data to temp file
-	if _, err := tempFile.Write(headerData); err != nil {
+	// Copy and validate the GGUF file header using LimitedReader to prevent OOM
+	// We use LimitedReader to cap reads at maxHeaderSize (50MB)
+	limitedReader := &io.LimitedReader{R: reader, N: maxHeaderSize}
+	if err := copyHeader(tempFile, limitedReader); err != nil {
 		tempFile.Close()
-		return nil, nil, fmt.Errorf("failed to write to temp file: %w", err)
+		return nil, nil, fmt.Errorf("failed to copy GGUF header: %w", err)
 	}
 	tempFile.Close()

@ -67,26 +61,26 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,

 	// Convert to syft metadata structure
 	syftMetadata := &pkg.GGUFFileHeader{
-		ModelName:    metadata.Name,
-		License:      metadata.License,
 		Architecture:          metadata.Architecture,
 		Quantization:          metadata.FileTypeDescriptor,
 		Parameters:            uint64(metadata.Parameters),
 		GGUFVersion:           uint32(ggufFile.Header.Version),
 		TensorCount:           ggufFile.Header.TensorCount,
-		Header:       convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
-		MetadataHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
+		RemainingKeyValues:    convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
+		MetadataKeyValuesHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
 	}

 	// If model name is not in metadata, use filename
-	if syftMetadata.ModelName == "" {
-		syftMetadata.ModelName = extractModelNameFromPath(reader.Path())
+	if metadata.Name == "" {
+		metadata.Name = extractModelNameFromPath(reader.Path())
 	}

 	// Create package from metadata
 	p := newGGUFPackage(
 		syftMetadata,
+		metadata.Name,
 		modelVersion,
+		metadata.License,
 		reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
 	)

--- a/syft/pkg/cataloger/ai/test_builder_test.go
+++ b/syft/pkg/cataloger/ai/test_builder_test.go
@ -1,41 +0,0 @@
-package ai
-
-import (
-	"fmt"
-	"os"
-
-	gguf_parser "github.com/gpustack/gguf-parser-go"
-)
-
-func main() {
-	// Create a test GGUF file
-	data := newTestGGUFBuilder().
-		withVersion(3).
-		withStringKV("general.architecture", "llama").
-		withStringKV("general.name", "test-model").
-		build()
-
-	// Write to temp file
-	tempFile, err := os.CreateTemp("", "test-*.gguf")
-	if err != nil {
-		panic(err)
-	}
-	defer os.Remove(tempFile.Name())
-
-	if _, err := tempFile.Write(data); err != nil {
-		panic(err)
-	}
-	tempFile.Close()
-
-	fmt.Printf("Wrote %d bytes to %s\n", len(data), tempFile.Name())
-
-	// Try to parse it
-	fmt.Println("Attempting to parse...")
-	gf, err := gguf_parser.ParseGGUFFile(tempFile.Name(), gguf_parser.SkipLargeMetadata())
-	if err != nil {
-		fmt.Printf("Parse error: %v\n", err)
-		return
-	}
-
-	fmt.Printf("Success! Model: %s\n", gf.Metadata().Name)
-}
--- a/syft/pkg/cataloger/ai/test_helpers_test.go
+++ b/syft/pkg/cataloger/ai/test_helpers_test.go
@ -6,6 +6,7 @@ import (
 )

 // GGUF type constants for test builder
+// https://github.com/ggml-org/ggml/blob/master/docs/gguf.md
 const (
 	ggufMagic       = 0x46554747 // "GGUF" in little-endian
 	ggufTypeUint8   = 0
--- a/syft/pkg/gguf.go
+++ b/syft/pkg/gguf.go
@ -3,19 +3,14 @@ package pkg
 // GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file.
 // GGUF is a binary file format used for storing model weights for the GGML library, designed for fast
 // loading and saving of models, particularly quantized large language models.
+// The Model Name, License, and Version fields have all been lifted up to be on the syft Package.
 type GGUFFileHeader struct {
 	// GGUFVersion is the GGUF format version (e.g., 3)
 	GGUFVersion uint32 `json:"ggufVersion" cyclonedx:"ggufVersion"`

-	// ModelName is the name of the model (from general.name or filename)
-	ModelName string `json:"modelName" cyclonedx:"modelName"`
-
 	// FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)
 	FileSize int64 `json:"fileSize,omitempty" cyclonedx:"fileSize"`

-	// License is the license identifier (from general.license if present)
-	License string `json:"license,omitempty" cyclonedx:"license"`
-
 	// Architecture is the model architecture (from general.architecture, e.g., "qwen3moe", "llama")
 	Architecture string `json:"architecture,omitempty" cyclonedx:"architecture"`

@ -28,15 +23,15 @@ type GGUFFileHeader struct {
 	// TensorCount is the number of tensors in the model
 	TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"`

-	// Header contains the remaining key-value pairs from the GGUF header that are not already
+	// RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already
 	// represented as typed fields above. This preserves additional metadata fields for reference
 	// (namespaced with general.*, llama.*, etc.) while avoiding duplication.
-	Header map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
+	RemainingKeyValues map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`

-	// MetadataHash is a xx64 hash of all key-value pairs from the GGUF header metadata.
+	// MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.
 	// This hash is computed over the complete header metadata (including the fields extracted
 	// into typed fields above) and provides a stable identifier for the model configuration
 	// across different file locations or remotes. It allows matching identical models even
 	// when stored in different repositories or with different filenames.
-	MetadataHash string `json:"metadataHash,omitempty" cyclonedx:"metadataHash"`
+	MetadataKeyValuesHash string `json:"metadataHash,omitempty" cyclonedx:"metadataHash"`
 }
Author	SHA1	Message	Date
Christopher Phillips	f1839215c6	chore: schema updates with new names Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>	2025-11-13 15:28:59 -05:00
Christopher Phillips	8706ff8310	chore: more idiomatic copy/reader usage Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>	2025-11-13 15:18:21 -05:00
Christopher Phillips	e58e6317d2	chore: pr comments Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>	2025-11-13 15:06:45 -05:00
Christopher Phillips	b1c8478d55	chore: pr comments Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>	2025-11-13 14:57:07 -05:00