From e58e6317d20da4083fe102c79f3a223803cc047c Mon Sep 17 00:00:00 2001 From: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> Date: Thu, 13 Nov 2025 15:06:45 -0500 Subject: [PATCH] chore: pr comments Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> --- syft/pkg/cataloger/ai/cataloger_test.go | 78 +---------------------- syft/pkg/cataloger/ai/package.go | 8 +-- syft/pkg/cataloger/ai/package_test.go | 43 +------------ syft/pkg/cataloger/ai/parse_gguf_model.go | 2 +- syft/pkg/gguf.go | 4 +- 5 files changed, 9 insertions(+), 126 deletions(-) diff --git a/syft/pkg/cataloger/ai/cataloger_test.go b/syft/pkg/cataloger/ai/cataloger_test.go index 5d9dda2ba..e71f2ece3 100644 --- a/syft/pkg/cataloger/ai/cataloger_test.go +++ b/syft/pkg/cataloger/ai/cataloger_test.go @@ -102,9 +102,9 @@ func TestGGUFCataloger(t *testing.T) { }, expectedPackages: []pkg.Package{ { - Name: "gpt2-small", - Version: "", - Type: pkg.ModelPkg, + Name: "gpt2-small", + Version: "", + Type: pkg.ModelPkg, Licenses: pkg.NewLicenseSet(), Metadata: pkg.GGUFFileHeader{ Architecture: "gpt2", @@ -122,78 +122,6 @@ func TestGGUFCataloger(t *testing.T) { }, expectedRelationships: nil, }, - { - name: "catalog multiple GGUF files", - setup: func(t *testing.T) string { - dir := t.TempDir() - - // First model - Llama with custom training data - data1 := newTestGGUFBuilder(). - withVersion(3). - withStringKV("general.architecture", "llama"). - withStringKV("general.name", "model-1"). - withStringKV("general.version", "1.0"). - withStringKV("llama.attention.head_count", "32"). - withUint32KV("llama.attention.layer_norm_rms_epsilon", 999). - build() - os.WriteFile(filepath.Join(dir, "model-1.gguf"), data1, 0644) - - // Second model - GPT2 with different config - data2 := newTestGGUFBuilder(). - withVersion(3). - withStringKV("general.architecture", "gpt2"). - withStringKV("general.name", "model-2"). - withStringKV("general.version", "2.0"). - withStringKV("general.license", "MIT"). - withStringKV("gpt2.block_count", "12"). - withUint64KV("tokenizer.ggml.bos_token_id", 50256). - build() - os.WriteFile(filepath.Join(dir, "model-2.gguf"), data2, 0644) - - return dir - }, - expectedPackages: []pkg.Package{ - { - Name: "model-1", - Version: "1.0", - Type: pkg.ModelPkg, - Licenses: pkg.NewLicenseSet(), - Metadata: pkg.GGUFFileHeader{ - Architecture: "llama", - Quantization: "Unknown", - Parameters: 0, - GGUFVersion: 3, - TensorCount: 0, - MetadataKeyValuesHash: "57e0dbea7d2efa6e", - Header: map[string]interface{}{ - "llama.attention.head_count": "32", - "llama.attention.layer_norm_rms_epsilon": uint32(999), - }, - }, - }, - { - Name: "model-2", - Version: "2.0", - Type: pkg.ModelPkg, - Licenses: pkg.NewLicenseSet( - pkg.NewLicenseFromFields("MIT", "", nil), - ), - Metadata: pkg.GGUFFileHeader{ - Architecture: "gpt2", - Quantization: "Unknown", - Parameters: 0, - GGUFVersion: 3, - TensorCount: 0, - MetadataKeyValuesHash: "f85de1bf9be304bb", - Header: map[string]interface{}{ - "gpt2.block_count": "12", - "tokenizer.ggml.bos_token_id": uint64(50256), - }, - }, - }, - }, - expectedRelationships: nil, - }, } for _, tt := range tests { diff --git a/syft/pkg/cataloger/ai/package.go b/syft/pkg/cataloger/ai/package.go index dfdede42d..67c6570ae 100644 --- a/syft/pkg/cataloger/ai/package.go +++ b/syft/pkg/cataloger/ai/package.go @@ -11,17 +11,11 @@ func newGGUFPackage(metadata *pkg.GGUFFileHeader, modelName, version, license st Version: version, Locations: file.NewLocationSet(locations...), Type: pkg.ModelPkg, - Licenses: pkg.NewLicenseSet(), + Licenses: pkg.NewLicenseSet(pkg.NewLicensesFromValues(license)...), Metadata: *metadata, // NOTE: PURL is intentionally not set as the package-url spec // has not yet finalized support for ML model packages } - - // Add license to the package if present in metadata - if license != "" { - p.Licenses.Add(pkg.NewLicenseFromFields(license, "", nil)) - } - p.SetID() return p diff --git a/syft/pkg/cataloger/ai/package_test.go b/syft/pkg/cataloger/ai/package_test.go index 62d124e8b..ea58304d6 100644 --- a/syft/pkg/cataloger/ai/package_test.go +++ b/syft/pkg/cataloger/ai/package_test.go @@ -41,7 +41,7 @@ func TestNewGGUFPackage(t *testing.T) { Parameters: 8030000000, GGUFVersion: 3, TensorCount: 291, - Header: map[string]any{ + RemainingKeyValues: map[string]any{ "general.random_kv": "foobar", }, }, @@ -58,7 +58,7 @@ func TestNewGGUFPackage(t *testing.T) { Parameters: 8030000000, GGUFVersion: 3, TensorCount: 291, - Header: map[string]any{ + RemainingKeyValues: map[string]any{ "general.random_kv": "foobar", }, }, @@ -98,45 +98,6 @@ func TestNewGGUFPackage(t *testing.T) { Locations: file.NewLocationSet(file.NewLocation("/models/simple.gguf")), }, }, - { - name: "GGUF package with multiple locations", - input: struct { - modelName string - version string - license string - locations []file.Location - }{ - modelName: "llama-multi", - version: "2.0", - license: "Apache-2.0", - locations: []file.Location{ - file.NewLocation("/models/model1.gguf"), - file.NewLocation("/models/model2.gguf"), - }, - }, - metadata: &pkg.GGUFFileHeader{ - Architecture: "llama", - GGUFVersion: 3, - TensorCount: 150, - }, - expected: pkg.Package{ - Name: "llama-multi", - Version: "2.0", - Type: pkg.ModelPkg, - Licenses: pkg.NewLicenseSet( - pkg.NewLicenseFromFields("Apache-2.0", "", nil), - ), - Metadata: pkg.GGUFFileHeader{ - Architecture: "llama", - GGUFVersion: 3, - TensorCount: 150, - }, - Locations: file.NewLocationSet( - file.NewLocation("/models/model1.gguf"), - file.NewLocation("/models/model2.gguf"), - ), - }, - }, } for _, tt := range tests { diff --git a/syft/pkg/cataloger/ai/parse_gguf_model.go b/syft/pkg/cataloger/ai/parse_gguf_model.go index d65537013..a2553a7e6 100644 --- a/syft/pkg/cataloger/ai/parse_gguf_model.go +++ b/syft/pkg/cataloger/ai/parse_gguf_model.go @@ -72,7 +72,7 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, Parameters: uint64(metadata.Parameters), GGUFVersion: uint32(ggufFile.Header.Version), TensorCount: ggufFile.Header.TensorCount, - Header: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV), + RemainingKeyValues: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV), MetadataKeyValuesHash: computeKVMetadataHash(ggufFile.Header.MetadataKV), } diff --git a/syft/pkg/gguf.go b/syft/pkg/gguf.go index aea01491d..59c30e075 100644 --- a/syft/pkg/gguf.go +++ b/syft/pkg/gguf.go @@ -23,10 +23,10 @@ type GGUFFileHeader struct { // TensorCount is the number of tensors in the model TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"` - // Header contains the remaining key-value pairs from the GGUF header that are not already + // RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already // represented as typed fields above. This preserves additional metadata fields for reference // (namespaced with general.*, llama.*, etc.) while avoiding duplication. - Header map[string]interface{} `json:"header,omitempty" cyclonedx:"header"` + RemainingKeyValues map[string]interface{} `json:"header,omitempty" cyclonedx:"header"` // MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata. // This hash is computed over the complete header metadata (including the fields extracted