chore: pr comments

Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
Christopher Phillips 2025-11-13 15:06:45 -05:00
parent b1c8478d55
commit e58e6317d2
No known key found for this signature in database
5 changed files with 9 additions and 126 deletions

View File

@ -102,9 +102,9 @@ func TestGGUFCataloger(t *testing.T) {
}, },
expectedPackages: []pkg.Package{ expectedPackages: []pkg.Package{
{ {
Name: "gpt2-small", Name: "gpt2-small",
Version: "", Version: "",
Type: pkg.ModelPkg, Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(), Licenses: pkg.NewLicenseSet(),
Metadata: pkg.GGUFFileHeader{ Metadata: pkg.GGUFFileHeader{
Architecture: "gpt2", Architecture: "gpt2",
@ -122,78 +122,6 @@ func TestGGUFCataloger(t *testing.T) {
}, },
expectedRelationships: nil, expectedRelationships: nil,
}, },
{
name: "catalog multiple GGUF files",
setup: func(t *testing.T) string {
dir := t.TempDir()
// First model - Llama with custom training data
data1 := newTestGGUFBuilder().
withVersion(3).
withStringKV("general.architecture", "llama").
withStringKV("general.name", "model-1").
withStringKV("general.version", "1.0").
withStringKV("llama.attention.head_count", "32").
withUint32KV("llama.attention.layer_norm_rms_epsilon", 999).
build()
os.WriteFile(filepath.Join(dir, "model-1.gguf"), data1, 0644)
// Second model - GPT2 with different config
data2 := newTestGGUFBuilder().
withVersion(3).
withStringKV("general.architecture", "gpt2").
withStringKV("general.name", "model-2").
withStringKV("general.version", "2.0").
withStringKV("general.license", "MIT").
withStringKV("gpt2.block_count", "12").
withUint64KV("tokenizer.ggml.bos_token_id", 50256).
build()
os.WriteFile(filepath.Join(dir, "model-2.gguf"), data2, 0644)
return dir
},
expectedPackages: []pkg.Package{
{
Name: "model-1",
Version: "1.0",
Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(),
Metadata: pkg.GGUFFileHeader{
Architecture: "llama",
Quantization: "Unknown",
Parameters: 0,
GGUFVersion: 3,
TensorCount: 0,
MetadataKeyValuesHash: "57e0dbea7d2efa6e",
Header: map[string]interface{}{
"llama.attention.head_count": "32",
"llama.attention.layer_norm_rms_epsilon": uint32(999),
},
},
},
{
Name: "model-2",
Version: "2.0",
Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(
pkg.NewLicenseFromFields("MIT", "", nil),
),
Metadata: pkg.GGUFFileHeader{
Architecture: "gpt2",
Quantization: "Unknown",
Parameters: 0,
GGUFVersion: 3,
TensorCount: 0,
MetadataKeyValuesHash: "f85de1bf9be304bb",
Header: map[string]interface{}{
"gpt2.block_count": "12",
"tokenizer.ggml.bos_token_id": uint64(50256),
},
},
},
},
expectedRelationships: nil,
},
} }
for _, tt := range tests { for _, tt := range tests {

View File

@ -11,17 +11,11 @@ func newGGUFPackage(metadata *pkg.GGUFFileHeader, modelName, version, license st
Version: version, Version: version,
Locations: file.NewLocationSet(locations...), Locations: file.NewLocationSet(locations...),
Type: pkg.ModelPkg, Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(), Licenses: pkg.NewLicenseSet(pkg.NewLicensesFromValues(license)...),
Metadata: *metadata, Metadata: *metadata,
// NOTE: PURL is intentionally not set as the package-url spec // NOTE: PURL is intentionally not set as the package-url spec
// has not yet finalized support for ML model packages // has not yet finalized support for ML model packages
} }
// Add license to the package if present in metadata
if license != "" {
p.Licenses.Add(pkg.NewLicenseFromFields(license, "", nil))
}
p.SetID() p.SetID()
return p return p

View File

@ -41,7 +41,7 @@ func TestNewGGUFPackage(t *testing.T) {
Parameters: 8030000000, Parameters: 8030000000,
GGUFVersion: 3, GGUFVersion: 3,
TensorCount: 291, TensorCount: 291,
Header: map[string]any{ RemainingKeyValues: map[string]any{
"general.random_kv": "foobar", "general.random_kv": "foobar",
}, },
}, },
@ -58,7 +58,7 @@ func TestNewGGUFPackage(t *testing.T) {
Parameters: 8030000000, Parameters: 8030000000,
GGUFVersion: 3, GGUFVersion: 3,
TensorCount: 291, TensorCount: 291,
Header: map[string]any{ RemainingKeyValues: map[string]any{
"general.random_kv": "foobar", "general.random_kv": "foobar",
}, },
}, },
@ -98,45 +98,6 @@ func TestNewGGUFPackage(t *testing.T) {
Locations: file.NewLocationSet(file.NewLocation("/models/simple.gguf")), Locations: file.NewLocationSet(file.NewLocation("/models/simple.gguf")),
}, },
}, },
{
name: "GGUF package with multiple locations",
input: struct {
modelName string
version string
license string
locations []file.Location
}{
modelName: "llama-multi",
version: "2.0",
license: "Apache-2.0",
locations: []file.Location{
file.NewLocation("/models/model1.gguf"),
file.NewLocation("/models/model2.gguf"),
},
},
metadata: &pkg.GGUFFileHeader{
Architecture: "llama",
GGUFVersion: 3,
TensorCount: 150,
},
expected: pkg.Package{
Name: "llama-multi",
Version: "2.0",
Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
),
Metadata: pkg.GGUFFileHeader{
Architecture: "llama",
GGUFVersion: 3,
TensorCount: 150,
},
Locations: file.NewLocationSet(
file.NewLocation("/models/model1.gguf"),
file.NewLocation("/models/model2.gguf"),
),
},
},
} }
for _, tt := range tests { for _, tt := range tests {

View File

@ -72,7 +72,7 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
Parameters: uint64(metadata.Parameters), Parameters: uint64(metadata.Parameters),
GGUFVersion: uint32(ggufFile.Header.Version), GGUFVersion: uint32(ggufFile.Header.Version),
TensorCount: ggufFile.Header.TensorCount, TensorCount: ggufFile.Header.TensorCount,
Header: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV), RemainingKeyValues: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
MetadataKeyValuesHash: computeKVMetadataHash(ggufFile.Header.MetadataKV), MetadataKeyValuesHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
} }

View File

@ -23,10 +23,10 @@ type GGUFFileHeader struct {
// TensorCount is the number of tensors in the model // TensorCount is the number of tensors in the model
TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"` TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"`
// Header contains the remaining key-value pairs from the GGUF header that are not already // RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already
// represented as typed fields above. This preserves additional metadata fields for reference // represented as typed fields above. This preserves additional metadata fields for reference
// (namespaced with general.*, llama.*, etc.) while avoiding duplication. // (namespaced with general.*, llama.*, etc.) while avoiding duplication.
Header map[string]interface{} `json:"header,omitempty" cyclonedx:"header"` RemainingKeyValues map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
// MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata. // MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.
// This hash is computed over the complete header metadata (including the fields extracted // This hash is computed over the complete header metadata (including the fields extracted