mirror of
https://github.com/anchore/syft.git
synced 2026-07-05 02:28:25 +02:00
fix: trim fields to only be ones from safetensor header
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
parent
549f526de0
commit
dbf6dd2eb3
@ -4098,7 +4098,7 @@
|
||||
},
|
||||
"architecture": {
|
||||
"type": "string",
|
||||
"description": "Architecture is the model architecture (e.g., \"LlamaForCausalLM\",\n\"Qwen3MoeForConditionalGeneration\"), sourced from the Hugging Face config.json\n\"architectures\" array."
|
||||
"description": "Architecture is the model architecture (e.g., \"LlamaForCausalLM\",\n\"Qwen3MoeForConditionalGeneration\"). It is not present in the SafeTensors\nheader itself; it is enriched from the companion Hugging Face config.json\n\"architectures\" array when one is found alongside the model."
|
||||
},
|
||||
"quantization": {
|
||||
"type": "string",
|
||||
@ -4116,14 +4116,6 @@
|
||||
"type": "string",
|
||||
"description": "TotalSize is the total byte size of tensor data across all shards when known\n(from the Docker AI model config \"size\" field or the sharded index \"total_size\")."
|
||||
},
|
||||
"torchDtype": {
|
||||
"type": "string",
|
||||
"description": "TorchDtype is the Hugging Face torch_dtype (e.g., \"bfloat16\", \"float16\")."
|
||||
},
|
||||
"transformersVersion": {
|
||||
"type": "string",
|
||||
"description": "TransformersVersion is the transformers library version recorded in config.json."
|
||||
},
|
||||
"shardCount": {
|
||||
"type": "integer",
|
||||
"description": "ShardCount is the number of .safetensors shards for a sharded model (1 for a\nsingle-file model)."
|
||||
@ -4134,7 +4126,7 @@
|
||||
},
|
||||
"metadataHash": {
|
||||
"type": "string",
|
||||
"description": "MetadataHash is an xxhash of the normalized header metadata, providing a stable\nidentifier for identical model content across repositories or filenames."
|
||||
"description": "MetadataHash is an xxhash over the on-disk SafeTensors header (sorted tensor\nentries + __metadata__). It is derived ONLY from the safetensors file bytes —\nnever from OCI manifest, layer descriptor, or config-blob fields — so the same\nmodel content scanned via a directory source and via an OCI image produces the\nsame value. Treat this as the cross-source content fingerprint."
|
||||
},
|
||||
"parts": {
|
||||
"items": {
|
||||
|
||||
@ -77,16 +77,14 @@ func TestSafeTensorsCataloger(t *testing.T) {
|
||||
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
|
||||
),
|
||||
Metadata: pkg.SafeTensorsModelInfo{
|
||||
Format: "safetensors",
|
||||
Architecture: "LlamaForCausalLM",
|
||||
Quantization: "BF16",
|
||||
Parameters: "16.26K",
|
||||
TensorCount: 2,
|
||||
TorchDtype: "bfloat16",
|
||||
TransformersVersion: "4.40.0",
|
||||
ShardCount: 1,
|
||||
UserMetadata: pkg.KeyValues{{Key: "format", Value: "pt"}},
|
||||
MetadataHash: wantHash,
|
||||
Format: "safetensors",
|
||||
Architecture: "LlamaForCausalLM",
|
||||
Quantization: "BF16",
|
||||
Parameters: "16.26K",
|
||||
TensorCount: 2,
|
||||
ShardCount: 1,
|
||||
UserMetadata: pkg.KeyValues{{Key: "format", Value: "pt"}},
|
||||
MetadataHash: wantHash,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@ -222,10 +222,7 @@ func mergeAggregatesInto(merged *pkg.SafeTensorsModelInfo, aggregates []pkg.Safe
|
||||
}
|
||||
firstNonEmpty(&merged.Parameters, a.Parameters)
|
||||
firstNonEmpty(&merged.TotalSize, a.TotalSize)
|
||||
firstNonEmpty(&merged.Architecture, a.Architecture)
|
||||
firstNonEmpty(&merged.Quantization, a.Quantization)
|
||||
firstNonEmpty(&merged.TorchDtype, a.TorchDtype)
|
||||
firstNonEmpty(&merged.TransformersVersion, a.TransformersVersion)
|
||||
}
|
||||
}
|
||||
|
||||
@ -516,12 +513,6 @@ func applyHFConfig(md *pkg.SafeTensorsModelInfo, cfg *hfConfig) {
|
||||
if md.Architecture == "" && len(cfg.Architectures) > 0 {
|
||||
md.Architecture = cfg.Architectures[0]
|
||||
}
|
||||
if md.TorchDtype == "" {
|
||||
md.TorchDtype = cfg.TorchDtype
|
||||
}
|
||||
if md.TransformersVersion == "" {
|
||||
md.TransformersVersion = cfg.TransformersVersion
|
||||
}
|
||||
}
|
||||
|
||||
// pickSafeTensorsName implements the documented naming precedence chain:
|
||||
@ -537,10 +528,8 @@ func pickSafeTensorsName(nameOrPath, fallbackName string) string {
|
||||
|
||||
// hfConfig is a minimal projection of Hugging Face config.json fields.
|
||||
type hfConfig struct {
|
||||
Architectures []string `json:"architectures"`
|
||||
TorchDtype string `json:"torch_dtype"`
|
||||
TransformersVersion string `json:"transformers_version"`
|
||||
NameOrPath string `json:"_name_or_path"`
|
||||
Architectures []string `json:"architectures"`
|
||||
NameOrPath string `json:"_name_or_path"`
|
||||
}
|
||||
|
||||
// readmeFrontmatter holds the subset of YAML frontmatter fields we extract.
|
||||
|
||||
@ -18,8 +18,9 @@ type SafeTensorsModelInfo struct {
|
||||
Format string `json:"format,omitempty" cyclonedx:"format"`
|
||||
|
||||
// Architecture is the model architecture (e.g., "LlamaForCausalLM",
|
||||
// "Qwen3MoeForConditionalGeneration"), sourced from the Hugging Face config.json
|
||||
// "architectures" array.
|
||||
// "Qwen3MoeForConditionalGeneration"). It is not present in the SafeTensors
|
||||
// header itself; it is enriched from the companion Hugging Face config.json
|
||||
// "architectures" array when one is found alongside the model.
|
||||
Architecture string `json:"architecture,omitempty" cyclonedx:"architecture"`
|
||||
|
||||
// Quantization describes tensor precision (e.g., "BF16", "F16", "F32", "INT8").
|
||||
@ -36,12 +37,6 @@ type SafeTensorsModelInfo struct {
|
||||
// (from the Docker AI model config "size" field or the sharded index "total_size").
|
||||
TotalSize string `json:"totalSize,omitempty" cyclonedx:"totalSize"`
|
||||
|
||||
// TorchDtype is the Hugging Face torch_dtype (e.g., "bfloat16", "float16").
|
||||
TorchDtype string `json:"torchDtype,omitempty" cyclonedx:"torchDtype"`
|
||||
|
||||
// TransformersVersion is the transformers library version recorded in config.json.
|
||||
TransformersVersion string `json:"transformersVersion,omitempty" cyclonedx:"transformersVersion"`
|
||||
|
||||
// ShardCount is the number of .safetensors shards for a sharded model (1 for a
|
||||
// single-file model).
|
||||
ShardCount int `json:"shardCount,omitempty" cyclonedx:"shardCount"`
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user