chore: pr comments

Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2025-11-17 08:23:15 +01:00 · 2025-11-13 15:06:45 -05:00 · 2025-11-13 15:06:45 -05:00 · e58e6317d2
commit e58e6317d2
parent b1c8478d55
5 changed files with 9 additions and 126 deletions
--- a/syft/pkg/cataloger/ai/cataloger_test.go
+++ b/syft/pkg/cataloger/ai/cataloger_test.go
@ -122,78 +122,6 @@ func TestGGUFCataloger(t *testing.T) {
 			},
 			expectedRelationships: nil,
 		},
-		{
-			name: "catalog multiple GGUF files",
-			setup: func(t *testing.T) string {
-				dir := t.TempDir()
-
-				// First model - Llama with custom training data
-				data1 := newTestGGUFBuilder().
-					withVersion(3).
-					withStringKV("general.architecture", "llama").
-					withStringKV("general.name", "model-1").
-					withStringKV("general.version", "1.0").
-					withStringKV("llama.attention.head_count", "32").
-					withUint32KV("llama.attention.layer_norm_rms_epsilon", 999).
-					build()
-				os.WriteFile(filepath.Join(dir, "model-1.gguf"), data1, 0644)
-
-				// Second model - GPT2 with different config
-				data2 := newTestGGUFBuilder().
-					withVersion(3).
-					withStringKV("general.architecture", "gpt2").
-					withStringKV("general.name", "model-2").
-					withStringKV("general.version", "2.0").
-					withStringKV("general.license", "MIT").
-					withStringKV("gpt2.block_count", "12").
-					withUint64KV("tokenizer.ggml.bos_token_id", 50256).
-					build()
-				os.WriteFile(filepath.Join(dir, "model-2.gguf"), data2, 0644)
-
-				return dir
-			},
-			expectedPackages: []pkg.Package{
-				{
-					Name:    "model-1",
-					Version: "1.0",
-					Type:    pkg.ModelPkg,
-					Licenses: pkg.NewLicenseSet(),
-					Metadata: pkg.GGUFFileHeader{
-						Architecture:          "llama",
-						Quantization:          "Unknown",
-						Parameters:            0,
-						GGUFVersion:           3,
-						TensorCount:           0,
-						MetadataKeyValuesHash: "57e0dbea7d2efa6e",
-						Header: map[string]interface{}{
-							"llama.attention.head_count":              "32",
-							"llama.attention.layer_norm_rms_epsilon": uint32(999),
-						},
-					},
-				},
-				{
-					Name:    "model-2",
-					Version: "2.0",
-					Type:    pkg.ModelPkg,
-					Licenses: pkg.NewLicenseSet(
-						pkg.NewLicenseFromFields("MIT", "", nil),
-					),
-					Metadata: pkg.GGUFFileHeader{
-						Architecture:          "gpt2",
-						Quantization:          "Unknown",
-						Parameters:            0,
-						GGUFVersion:           3,
-						TensorCount:           0,
-						MetadataKeyValuesHash: "f85de1bf9be304bb",
-						Header: map[string]interface{}{
-							"gpt2.block_count":            "12",
-							"tokenizer.ggml.bos_token_id": uint64(50256),
-						},
-					},
-				},
-			},
-			expectedRelationships: nil,
-		},
 	}

 	for _, tt := range tests {
--- a/syft/pkg/cataloger/ai/package.go
+++ b/syft/pkg/cataloger/ai/package.go
@ -11,17 +11,11 @@ func newGGUFPackage(metadata *pkg.GGUFFileHeader, modelName, version, license st
 		Version:   version,
 		Locations: file.NewLocationSet(locations...),
 		Type:      pkg.ModelPkg,
-		Licenses:  pkg.NewLicenseSet(),
+		Licenses:  pkg.NewLicenseSet(pkg.NewLicensesFromValues(license)...),
 		Metadata:  *metadata,
 		// NOTE: PURL is intentionally not set as the package-url spec
 		// has not yet finalized support for ML model packages
 	}
-
-	// Add license to the package if present in metadata
-	if license != "" {
-		p.Licenses.Add(pkg.NewLicenseFromFields(license, "", nil))
-	}
-
 	p.SetID()

 	return p
--- a/syft/pkg/cataloger/ai/package_test.go
+++ b/syft/pkg/cataloger/ai/package_test.go
@ -41,7 +41,7 @@ func TestNewGGUFPackage(t *testing.T) {
 				Parameters:   8030000000,
 				GGUFVersion:  3,
 				TensorCount:  291,
-				Header: map[string]any{
+				RemainingKeyValues: map[string]any{
 					"general.random_kv": "foobar",
 				},
 			},
@ -58,7 +58,7 @@ func TestNewGGUFPackage(t *testing.T) {
 					Parameters:   8030000000,
 					GGUFVersion:  3,
 					TensorCount:  291,
-					Header: map[string]any{
+					RemainingKeyValues: map[string]any{
 						"general.random_kv": "foobar",
 					},
 				},
@ -98,45 +98,6 @@ func TestNewGGUFPackage(t *testing.T) {
 				Locations: file.NewLocationSet(file.NewLocation("/models/simple.gguf")),
 			},
 		},
-		{
-			name: "GGUF package with multiple locations",
-			input: struct {
-				modelName string
-				version   string
-				license   string
-				locations []file.Location
-			}{
-				modelName: "llama-multi",
-				version:   "2.0",
-				license:   "Apache-2.0",
-				locations: []file.Location{
-					file.NewLocation("/models/model1.gguf"),
-					file.NewLocation("/models/model2.gguf"),
-				},
-			},
-			metadata: &pkg.GGUFFileHeader{
-				Architecture: "llama",
-				GGUFVersion:  3,
-				TensorCount:  150,
-			},
-			expected: pkg.Package{
-				Name:    "llama-multi",
-				Version: "2.0",
-				Type:    pkg.ModelPkg,
-				Licenses: pkg.NewLicenseSet(
-					pkg.NewLicenseFromFields("Apache-2.0", "", nil),
-				),
-				Metadata: pkg.GGUFFileHeader{
-					Architecture: "llama",
-					GGUFVersion:  3,
-					TensorCount:  150,
-				},
-				Locations: file.NewLocationSet(
-					file.NewLocation("/models/model1.gguf"),
-					file.NewLocation("/models/model2.gguf"),
-				),
-			},
-		},
 	}

 	for _, tt := range tests {
--- a/syft/pkg/cataloger/ai/parse_gguf_model.go
+++ b/syft/pkg/cataloger/ai/parse_gguf_model.go
@ -72,7 +72,7 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
 		Parameters:            uint64(metadata.Parameters),
 		GGUFVersion:           uint32(ggufFile.Header.Version),
 		TensorCount:           ggufFile.Header.TensorCount,
-		Header:                convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
+		RemainingKeyValues:    convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
 		MetadataKeyValuesHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
 	}

--- a/syft/pkg/gguf.go
+++ b/syft/pkg/gguf.go
@ -23,10 +23,10 @@ type GGUFFileHeader struct {
 	// TensorCount is the number of tensors in the model
 	TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"`

-	// Header contains the remaining key-value pairs from the GGUF header that are not already
+	// RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already
 	// represented as typed fields above. This preserves additional metadata fields for reference
 	// (namespaced with general.*, llama.*, etc.) while avoiding duplication.
-	Header map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
+	RemainingKeyValues map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`

 	// MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.
 	// This hash is computed over the complete header metadata (including the fields extracted