chore: pr comments

Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
Christopher Phillips 2025-11-13 15:06:45 -05:00
parent b1c8478d55
commit e58e6317d2
No known key found for this signature in database
5 changed files with 9 additions and 126 deletions

View File

@ -122,78 +122,6 @@ func TestGGUFCataloger(t *testing.T) {
},
expectedRelationships: nil,
},
{
name: "catalog multiple GGUF files",
setup: func(t *testing.T) string {
dir := t.TempDir()
// First model - Llama with custom training data
data1 := newTestGGUFBuilder().
withVersion(3).
withStringKV("general.architecture", "llama").
withStringKV("general.name", "model-1").
withStringKV("general.version", "1.0").
withStringKV("llama.attention.head_count", "32").
withUint32KV("llama.attention.layer_norm_rms_epsilon", 999).
build()
os.WriteFile(filepath.Join(dir, "model-1.gguf"), data1, 0644)
// Second model - GPT2 with different config
data2 := newTestGGUFBuilder().
withVersion(3).
withStringKV("general.architecture", "gpt2").
withStringKV("general.name", "model-2").
withStringKV("general.version", "2.0").
withStringKV("general.license", "MIT").
withStringKV("gpt2.block_count", "12").
withUint64KV("tokenizer.ggml.bos_token_id", 50256).
build()
os.WriteFile(filepath.Join(dir, "model-2.gguf"), data2, 0644)
return dir
},
expectedPackages: []pkg.Package{
{
Name: "model-1",
Version: "1.0",
Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(),
Metadata: pkg.GGUFFileHeader{
Architecture: "llama",
Quantization: "Unknown",
Parameters: 0,
GGUFVersion: 3,
TensorCount: 0,
MetadataKeyValuesHash: "57e0dbea7d2efa6e",
Header: map[string]interface{}{
"llama.attention.head_count": "32",
"llama.attention.layer_norm_rms_epsilon": uint32(999),
},
},
},
{
Name: "model-2",
Version: "2.0",
Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(
pkg.NewLicenseFromFields("MIT", "", nil),
),
Metadata: pkg.GGUFFileHeader{
Architecture: "gpt2",
Quantization: "Unknown",
Parameters: 0,
GGUFVersion: 3,
TensorCount: 0,
MetadataKeyValuesHash: "f85de1bf9be304bb",
Header: map[string]interface{}{
"gpt2.block_count": "12",
"tokenizer.ggml.bos_token_id": uint64(50256),
},
},
},
},
expectedRelationships: nil,
},
}
for _, tt := range tests {

View File

@ -11,17 +11,11 @@ func newGGUFPackage(metadata *pkg.GGUFFileHeader, modelName, version, license st
Version: version,
Locations: file.NewLocationSet(locations...),
Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(),
Licenses: pkg.NewLicenseSet(pkg.NewLicensesFromValues(license)...),
Metadata: *metadata,
// NOTE: PURL is intentionally not set as the package-url spec
// has not yet finalized support for ML model packages
}
// Add license to the package if present in metadata
if license != "" {
p.Licenses.Add(pkg.NewLicenseFromFields(license, "", nil))
}
p.SetID()
return p

View File

@ -41,7 +41,7 @@ func TestNewGGUFPackage(t *testing.T) {
Parameters: 8030000000,
GGUFVersion: 3,
TensorCount: 291,
Header: map[string]any{
RemainingKeyValues: map[string]any{
"general.random_kv": "foobar",
},
},
@ -58,7 +58,7 @@ func TestNewGGUFPackage(t *testing.T) {
Parameters: 8030000000,
GGUFVersion: 3,
TensorCount: 291,
Header: map[string]any{
RemainingKeyValues: map[string]any{
"general.random_kv": "foobar",
},
},
@ -98,45 +98,6 @@ func TestNewGGUFPackage(t *testing.T) {
Locations: file.NewLocationSet(file.NewLocation("/models/simple.gguf")),
},
},
{
name: "GGUF package with multiple locations",
input: struct {
modelName string
version string
license string
locations []file.Location
}{
modelName: "llama-multi",
version: "2.0",
license: "Apache-2.0",
locations: []file.Location{
file.NewLocation("/models/model1.gguf"),
file.NewLocation("/models/model2.gguf"),
},
},
metadata: &pkg.GGUFFileHeader{
Architecture: "llama",
GGUFVersion: 3,
TensorCount: 150,
},
expected: pkg.Package{
Name: "llama-multi",
Version: "2.0",
Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
),
Metadata: pkg.GGUFFileHeader{
Architecture: "llama",
GGUFVersion: 3,
TensorCount: 150,
},
Locations: file.NewLocationSet(
file.NewLocation("/models/model1.gguf"),
file.NewLocation("/models/model2.gguf"),
),
},
},
}
for _, tt := range tests {

View File

@ -72,7 +72,7 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
Parameters: uint64(metadata.Parameters),
GGUFVersion: uint32(ggufFile.Header.Version),
TensorCount: ggufFile.Header.TensorCount,
Header: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
RemainingKeyValues: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
MetadataKeyValuesHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
}

View File

@ -23,10 +23,10 @@ type GGUFFileHeader struct {
// TensorCount is the number of tensors in the model
TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"`
// Header contains the remaining key-value pairs from the GGUF header that are not already
// RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already
// represented as typed fields above. This preserves additional metadata fields for reference
// (namespaced with general.*, llama.*, etc.) while avoiding duplication.
Header map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
RemainingKeyValues map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
// MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.
// This hash is computed over the complete header metadata (including the fields extracted