mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 08:23:15 +01:00
chore: pr comments
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
parent
b1c8478d55
commit
e58e6317d2
@ -102,9 +102,9 @@ func TestGGUFCataloger(t *testing.T) {
|
||||
},
|
||||
expectedPackages: []pkg.Package{
|
||||
{
|
||||
Name: "gpt2-small",
|
||||
Version: "",
|
||||
Type: pkg.ModelPkg,
|
||||
Name: "gpt2-small",
|
||||
Version: "",
|
||||
Type: pkg.ModelPkg,
|
||||
Licenses: pkg.NewLicenseSet(),
|
||||
Metadata: pkg.GGUFFileHeader{
|
||||
Architecture: "gpt2",
|
||||
@ -122,78 +122,6 @@ func TestGGUFCataloger(t *testing.T) {
|
||||
},
|
||||
expectedRelationships: nil,
|
||||
},
|
||||
{
|
||||
name: "catalog multiple GGUF files",
|
||||
setup: func(t *testing.T) string {
|
||||
dir := t.TempDir()
|
||||
|
||||
// First model - Llama with custom training data
|
||||
data1 := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withStringKV("general.architecture", "llama").
|
||||
withStringKV("general.name", "model-1").
|
||||
withStringKV("general.version", "1.0").
|
||||
withStringKV("llama.attention.head_count", "32").
|
||||
withUint32KV("llama.attention.layer_norm_rms_epsilon", 999).
|
||||
build()
|
||||
os.WriteFile(filepath.Join(dir, "model-1.gguf"), data1, 0644)
|
||||
|
||||
// Second model - GPT2 with different config
|
||||
data2 := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withStringKV("general.architecture", "gpt2").
|
||||
withStringKV("general.name", "model-2").
|
||||
withStringKV("general.version", "2.0").
|
||||
withStringKV("general.license", "MIT").
|
||||
withStringKV("gpt2.block_count", "12").
|
||||
withUint64KV("tokenizer.ggml.bos_token_id", 50256).
|
||||
build()
|
||||
os.WriteFile(filepath.Join(dir, "model-2.gguf"), data2, 0644)
|
||||
|
||||
return dir
|
||||
},
|
||||
expectedPackages: []pkg.Package{
|
||||
{
|
||||
Name: "model-1",
|
||||
Version: "1.0",
|
||||
Type: pkg.ModelPkg,
|
||||
Licenses: pkg.NewLicenseSet(),
|
||||
Metadata: pkg.GGUFFileHeader{
|
||||
Architecture: "llama",
|
||||
Quantization: "Unknown",
|
||||
Parameters: 0,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 0,
|
||||
MetadataKeyValuesHash: "57e0dbea7d2efa6e",
|
||||
Header: map[string]interface{}{
|
||||
"llama.attention.head_count": "32",
|
||||
"llama.attention.layer_norm_rms_epsilon": uint32(999),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "model-2",
|
||||
Version: "2.0",
|
||||
Type: pkg.ModelPkg,
|
||||
Licenses: pkg.NewLicenseSet(
|
||||
pkg.NewLicenseFromFields("MIT", "", nil),
|
||||
),
|
||||
Metadata: pkg.GGUFFileHeader{
|
||||
Architecture: "gpt2",
|
||||
Quantization: "Unknown",
|
||||
Parameters: 0,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 0,
|
||||
MetadataKeyValuesHash: "f85de1bf9be304bb",
|
||||
Header: map[string]interface{}{
|
||||
"gpt2.block_count": "12",
|
||||
"tokenizer.ggml.bos_token_id": uint64(50256),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedRelationships: nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
|
||||
@ -11,17 +11,11 @@ func newGGUFPackage(metadata *pkg.GGUFFileHeader, modelName, version, license st
|
||||
Version: version,
|
||||
Locations: file.NewLocationSet(locations...),
|
||||
Type: pkg.ModelPkg,
|
||||
Licenses: pkg.NewLicenseSet(),
|
||||
Licenses: pkg.NewLicenseSet(pkg.NewLicensesFromValues(license)...),
|
||||
Metadata: *metadata,
|
||||
// NOTE: PURL is intentionally not set as the package-url spec
|
||||
// has not yet finalized support for ML model packages
|
||||
}
|
||||
|
||||
// Add license to the package if present in metadata
|
||||
if license != "" {
|
||||
p.Licenses.Add(pkg.NewLicenseFromFields(license, "", nil))
|
||||
}
|
||||
|
||||
p.SetID()
|
||||
|
||||
return p
|
||||
|
||||
@ -41,7 +41,7 @@ func TestNewGGUFPackage(t *testing.T) {
|
||||
Parameters: 8030000000,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 291,
|
||||
Header: map[string]any{
|
||||
RemainingKeyValues: map[string]any{
|
||||
"general.random_kv": "foobar",
|
||||
},
|
||||
},
|
||||
@ -58,7 +58,7 @@ func TestNewGGUFPackage(t *testing.T) {
|
||||
Parameters: 8030000000,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 291,
|
||||
Header: map[string]any{
|
||||
RemainingKeyValues: map[string]any{
|
||||
"general.random_kv": "foobar",
|
||||
},
|
||||
},
|
||||
@ -98,45 +98,6 @@ func TestNewGGUFPackage(t *testing.T) {
|
||||
Locations: file.NewLocationSet(file.NewLocation("/models/simple.gguf")),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "GGUF package with multiple locations",
|
||||
input: struct {
|
||||
modelName string
|
||||
version string
|
||||
license string
|
||||
locations []file.Location
|
||||
}{
|
||||
modelName: "llama-multi",
|
||||
version: "2.0",
|
||||
license: "Apache-2.0",
|
||||
locations: []file.Location{
|
||||
file.NewLocation("/models/model1.gguf"),
|
||||
file.NewLocation("/models/model2.gguf"),
|
||||
},
|
||||
},
|
||||
metadata: &pkg.GGUFFileHeader{
|
||||
Architecture: "llama",
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 150,
|
||||
},
|
||||
expected: pkg.Package{
|
||||
Name: "llama-multi",
|
||||
Version: "2.0",
|
||||
Type: pkg.ModelPkg,
|
||||
Licenses: pkg.NewLicenseSet(
|
||||
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
|
||||
),
|
||||
Metadata: pkg.GGUFFileHeader{
|
||||
Architecture: "llama",
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 150,
|
||||
},
|
||||
Locations: file.NewLocationSet(
|
||||
file.NewLocation("/models/model1.gguf"),
|
||||
file.NewLocation("/models/model2.gguf"),
|
||||
),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
|
||||
@ -72,7 +72,7 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
|
||||
Parameters: uint64(metadata.Parameters),
|
||||
GGUFVersion: uint32(ggufFile.Header.Version),
|
||||
TensorCount: ggufFile.Header.TensorCount,
|
||||
Header: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
|
||||
RemainingKeyValues: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
|
||||
MetadataKeyValuesHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
|
||||
}
|
||||
|
||||
|
||||
@ -23,10 +23,10 @@ type GGUFFileHeader struct {
|
||||
// TensorCount is the number of tensors in the model
|
||||
TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"`
|
||||
|
||||
// Header contains the remaining key-value pairs from the GGUF header that are not already
|
||||
// RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already
|
||||
// represented as typed fields above. This preserves additional metadata fields for reference
|
||||
// (namespaced with general.*, llama.*, etc.) while avoiding duplication.
|
||||
Header map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
|
||||
RemainingKeyValues map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
|
||||
|
||||
// MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.
|
||||
// This hash is computed over the complete header metadata (including the fields extracted
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user