mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 08:23:15 +01:00
chore: pr comments
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
parent
b1c8478d55
commit
e58e6317d2
@ -102,9 +102,9 @@ func TestGGUFCataloger(t *testing.T) {
|
|||||||
},
|
},
|
||||||
expectedPackages: []pkg.Package{
|
expectedPackages: []pkg.Package{
|
||||||
{
|
{
|
||||||
Name: "gpt2-small",
|
Name: "gpt2-small",
|
||||||
Version: "",
|
Version: "",
|
||||||
Type: pkg.ModelPkg,
|
Type: pkg.ModelPkg,
|
||||||
Licenses: pkg.NewLicenseSet(),
|
Licenses: pkg.NewLicenseSet(),
|
||||||
Metadata: pkg.GGUFFileHeader{
|
Metadata: pkg.GGUFFileHeader{
|
||||||
Architecture: "gpt2",
|
Architecture: "gpt2",
|
||||||
@ -122,78 +122,6 @@ func TestGGUFCataloger(t *testing.T) {
|
|||||||
},
|
},
|
||||||
expectedRelationships: nil,
|
expectedRelationships: nil,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "catalog multiple GGUF files",
|
|
||||||
setup: func(t *testing.T) string {
|
|
||||||
dir := t.TempDir()
|
|
||||||
|
|
||||||
// First model - Llama with custom training data
|
|
||||||
data1 := newTestGGUFBuilder().
|
|
||||||
withVersion(3).
|
|
||||||
withStringKV("general.architecture", "llama").
|
|
||||||
withStringKV("general.name", "model-1").
|
|
||||||
withStringKV("general.version", "1.0").
|
|
||||||
withStringKV("llama.attention.head_count", "32").
|
|
||||||
withUint32KV("llama.attention.layer_norm_rms_epsilon", 999).
|
|
||||||
build()
|
|
||||||
os.WriteFile(filepath.Join(dir, "model-1.gguf"), data1, 0644)
|
|
||||||
|
|
||||||
// Second model - GPT2 with different config
|
|
||||||
data2 := newTestGGUFBuilder().
|
|
||||||
withVersion(3).
|
|
||||||
withStringKV("general.architecture", "gpt2").
|
|
||||||
withStringKV("general.name", "model-2").
|
|
||||||
withStringKV("general.version", "2.0").
|
|
||||||
withStringKV("general.license", "MIT").
|
|
||||||
withStringKV("gpt2.block_count", "12").
|
|
||||||
withUint64KV("tokenizer.ggml.bos_token_id", 50256).
|
|
||||||
build()
|
|
||||||
os.WriteFile(filepath.Join(dir, "model-2.gguf"), data2, 0644)
|
|
||||||
|
|
||||||
return dir
|
|
||||||
},
|
|
||||||
expectedPackages: []pkg.Package{
|
|
||||||
{
|
|
||||||
Name: "model-1",
|
|
||||||
Version: "1.0",
|
|
||||||
Type: pkg.ModelPkg,
|
|
||||||
Licenses: pkg.NewLicenseSet(),
|
|
||||||
Metadata: pkg.GGUFFileHeader{
|
|
||||||
Architecture: "llama",
|
|
||||||
Quantization: "Unknown",
|
|
||||||
Parameters: 0,
|
|
||||||
GGUFVersion: 3,
|
|
||||||
TensorCount: 0,
|
|
||||||
MetadataKeyValuesHash: "57e0dbea7d2efa6e",
|
|
||||||
Header: map[string]interface{}{
|
|
||||||
"llama.attention.head_count": "32",
|
|
||||||
"llama.attention.layer_norm_rms_epsilon": uint32(999),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Name: "model-2",
|
|
||||||
Version: "2.0",
|
|
||||||
Type: pkg.ModelPkg,
|
|
||||||
Licenses: pkg.NewLicenseSet(
|
|
||||||
pkg.NewLicenseFromFields("MIT", "", nil),
|
|
||||||
),
|
|
||||||
Metadata: pkg.GGUFFileHeader{
|
|
||||||
Architecture: "gpt2",
|
|
||||||
Quantization: "Unknown",
|
|
||||||
Parameters: 0,
|
|
||||||
GGUFVersion: 3,
|
|
||||||
TensorCount: 0,
|
|
||||||
MetadataKeyValuesHash: "f85de1bf9be304bb",
|
|
||||||
Header: map[string]interface{}{
|
|
||||||
"gpt2.block_count": "12",
|
|
||||||
"tokenizer.ggml.bos_token_id": uint64(50256),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
expectedRelationships: nil,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
|
|||||||
@ -11,17 +11,11 @@ func newGGUFPackage(metadata *pkg.GGUFFileHeader, modelName, version, license st
|
|||||||
Version: version,
|
Version: version,
|
||||||
Locations: file.NewLocationSet(locations...),
|
Locations: file.NewLocationSet(locations...),
|
||||||
Type: pkg.ModelPkg,
|
Type: pkg.ModelPkg,
|
||||||
Licenses: pkg.NewLicenseSet(),
|
Licenses: pkg.NewLicenseSet(pkg.NewLicensesFromValues(license)...),
|
||||||
Metadata: *metadata,
|
Metadata: *metadata,
|
||||||
// NOTE: PURL is intentionally not set as the package-url spec
|
// NOTE: PURL is intentionally not set as the package-url spec
|
||||||
// has not yet finalized support for ML model packages
|
// has not yet finalized support for ML model packages
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add license to the package if present in metadata
|
|
||||||
if license != "" {
|
|
||||||
p.Licenses.Add(pkg.NewLicenseFromFields(license, "", nil))
|
|
||||||
}
|
|
||||||
|
|
||||||
p.SetID()
|
p.SetID()
|
||||||
|
|
||||||
return p
|
return p
|
||||||
|
|||||||
@ -41,7 +41,7 @@ func TestNewGGUFPackage(t *testing.T) {
|
|||||||
Parameters: 8030000000,
|
Parameters: 8030000000,
|
||||||
GGUFVersion: 3,
|
GGUFVersion: 3,
|
||||||
TensorCount: 291,
|
TensorCount: 291,
|
||||||
Header: map[string]any{
|
RemainingKeyValues: map[string]any{
|
||||||
"general.random_kv": "foobar",
|
"general.random_kv": "foobar",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -58,7 +58,7 @@ func TestNewGGUFPackage(t *testing.T) {
|
|||||||
Parameters: 8030000000,
|
Parameters: 8030000000,
|
||||||
GGUFVersion: 3,
|
GGUFVersion: 3,
|
||||||
TensorCount: 291,
|
TensorCount: 291,
|
||||||
Header: map[string]any{
|
RemainingKeyValues: map[string]any{
|
||||||
"general.random_kv": "foobar",
|
"general.random_kv": "foobar",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -98,45 +98,6 @@ func TestNewGGUFPackage(t *testing.T) {
|
|||||||
Locations: file.NewLocationSet(file.NewLocation("/models/simple.gguf")),
|
Locations: file.NewLocationSet(file.NewLocation("/models/simple.gguf")),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: "GGUF package with multiple locations",
|
|
||||||
input: struct {
|
|
||||||
modelName string
|
|
||||||
version string
|
|
||||||
license string
|
|
||||||
locations []file.Location
|
|
||||||
}{
|
|
||||||
modelName: "llama-multi",
|
|
||||||
version: "2.0",
|
|
||||||
license: "Apache-2.0",
|
|
||||||
locations: []file.Location{
|
|
||||||
file.NewLocation("/models/model1.gguf"),
|
|
||||||
file.NewLocation("/models/model2.gguf"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
metadata: &pkg.GGUFFileHeader{
|
|
||||||
Architecture: "llama",
|
|
||||||
GGUFVersion: 3,
|
|
||||||
TensorCount: 150,
|
|
||||||
},
|
|
||||||
expected: pkg.Package{
|
|
||||||
Name: "llama-multi",
|
|
||||||
Version: "2.0",
|
|
||||||
Type: pkg.ModelPkg,
|
|
||||||
Licenses: pkg.NewLicenseSet(
|
|
||||||
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
|
|
||||||
),
|
|
||||||
Metadata: pkg.GGUFFileHeader{
|
|
||||||
Architecture: "llama",
|
|
||||||
GGUFVersion: 3,
|
|
||||||
TensorCount: 150,
|
|
||||||
},
|
|
||||||
Locations: file.NewLocationSet(
|
|
||||||
file.NewLocation("/models/model1.gguf"),
|
|
||||||
file.NewLocation("/models/model2.gguf"),
|
|
||||||
),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
|
|||||||
@ -72,7 +72,7 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
|
|||||||
Parameters: uint64(metadata.Parameters),
|
Parameters: uint64(metadata.Parameters),
|
||||||
GGUFVersion: uint32(ggufFile.Header.Version),
|
GGUFVersion: uint32(ggufFile.Header.Version),
|
||||||
TensorCount: ggufFile.Header.TensorCount,
|
TensorCount: ggufFile.Header.TensorCount,
|
||||||
Header: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
|
RemainingKeyValues: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
|
||||||
MetadataKeyValuesHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
|
MetadataKeyValuesHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -23,10 +23,10 @@ type GGUFFileHeader struct {
|
|||||||
// TensorCount is the number of tensors in the model
|
// TensorCount is the number of tensors in the model
|
||||||
TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"`
|
TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"`
|
||||||
|
|
||||||
// Header contains the remaining key-value pairs from the GGUF header that are not already
|
// RemainingKeyValues contains the remaining key-value pairs from the GGUF header that are not already
|
||||||
// represented as typed fields above. This preserves additional metadata fields for reference
|
// represented as typed fields above. This preserves additional metadata fields for reference
|
||||||
// (namespaced with general.*, llama.*, etc.) while avoiding duplication.
|
// (namespaced with general.*, llama.*, etc.) while avoiding duplication.
|
||||||
Header map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
|
RemainingKeyValues map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
|
||||||
|
|
||||||
// MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.
|
// MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.
|
||||||
// This hash is computed over the complete header metadata (including the fields extracted
|
// This hash is computed over the complete header metadata (including the fields extracted
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user