mirror of
https://github.com/anchore/syft.git
synced 2026-07-05 02:28:25 +02:00
fix: trim fields to only be ones from safetensor header
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
parent
549f526de0
commit
dbf6dd2eb3
@ -4098,7 +4098,7 @@
|
|||||||
},
|
},
|
||||||
"architecture": {
|
"architecture": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Architecture is the model architecture (e.g., \"LlamaForCausalLM\",\n\"Qwen3MoeForConditionalGeneration\"), sourced from the Hugging Face config.json\n\"architectures\" array."
|
"description": "Architecture is the model architecture (e.g., \"LlamaForCausalLM\",\n\"Qwen3MoeForConditionalGeneration\"). It is not present in the SafeTensors\nheader itself; it is enriched from the companion Hugging Face config.json\n\"architectures\" array when one is found alongside the model."
|
||||||
},
|
},
|
||||||
"quantization": {
|
"quantization": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
@ -4116,14 +4116,6 @@
|
|||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "TotalSize is the total byte size of tensor data across all shards when known\n(from the Docker AI model config \"size\" field or the sharded index \"total_size\")."
|
"description": "TotalSize is the total byte size of tensor data across all shards when known\n(from the Docker AI model config \"size\" field or the sharded index \"total_size\")."
|
||||||
},
|
},
|
||||||
"torchDtype": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "TorchDtype is the Hugging Face torch_dtype (e.g., \"bfloat16\", \"float16\")."
|
|
||||||
},
|
|
||||||
"transformersVersion": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "TransformersVersion is the transformers library version recorded in config.json."
|
|
||||||
},
|
|
||||||
"shardCount": {
|
"shardCount": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"description": "ShardCount is the number of .safetensors shards for a sharded model (1 for a\nsingle-file model)."
|
"description": "ShardCount is the number of .safetensors shards for a sharded model (1 for a\nsingle-file model)."
|
||||||
@ -4134,7 +4126,7 @@
|
|||||||
},
|
},
|
||||||
"metadataHash": {
|
"metadataHash": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "MetadataHash is an xxhash of the normalized header metadata, providing a stable\nidentifier for identical model content across repositories or filenames."
|
"description": "MetadataHash is an xxhash over the on-disk SafeTensors header (sorted tensor\nentries + __metadata__). It is derived ONLY from the safetensors file bytes —\nnever from OCI manifest, layer descriptor, or config-blob fields — so the same\nmodel content scanned via a directory source and via an OCI image produces the\nsame value. Treat this as the cross-source content fingerprint."
|
||||||
},
|
},
|
||||||
"parts": {
|
"parts": {
|
||||||
"items": {
|
"items": {
|
||||||
|
|||||||
@ -77,16 +77,14 @@ func TestSafeTensorsCataloger(t *testing.T) {
|
|||||||
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
|
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
|
||||||
),
|
),
|
||||||
Metadata: pkg.SafeTensorsModelInfo{
|
Metadata: pkg.SafeTensorsModelInfo{
|
||||||
Format: "safetensors",
|
Format: "safetensors",
|
||||||
Architecture: "LlamaForCausalLM",
|
Architecture: "LlamaForCausalLM",
|
||||||
Quantization: "BF16",
|
Quantization: "BF16",
|
||||||
Parameters: "16.26K",
|
Parameters: "16.26K",
|
||||||
TensorCount: 2,
|
TensorCount: 2,
|
||||||
TorchDtype: "bfloat16",
|
ShardCount: 1,
|
||||||
TransformersVersion: "4.40.0",
|
UserMetadata: pkg.KeyValues{{Key: "format", Value: "pt"}},
|
||||||
ShardCount: 1,
|
MetadataHash: wantHash,
|
||||||
UserMetadata: pkg.KeyValues{{Key: "format", Value: "pt"}},
|
|
||||||
MetadataHash: wantHash,
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|||||||
@ -222,10 +222,7 @@ func mergeAggregatesInto(merged *pkg.SafeTensorsModelInfo, aggregates []pkg.Safe
|
|||||||
}
|
}
|
||||||
firstNonEmpty(&merged.Parameters, a.Parameters)
|
firstNonEmpty(&merged.Parameters, a.Parameters)
|
||||||
firstNonEmpty(&merged.TotalSize, a.TotalSize)
|
firstNonEmpty(&merged.TotalSize, a.TotalSize)
|
||||||
firstNonEmpty(&merged.Architecture, a.Architecture)
|
|
||||||
firstNonEmpty(&merged.Quantization, a.Quantization)
|
firstNonEmpty(&merged.Quantization, a.Quantization)
|
||||||
firstNonEmpty(&merged.TorchDtype, a.TorchDtype)
|
|
||||||
firstNonEmpty(&merged.TransformersVersion, a.TransformersVersion)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -516,12 +513,6 @@ func applyHFConfig(md *pkg.SafeTensorsModelInfo, cfg *hfConfig) {
|
|||||||
if md.Architecture == "" && len(cfg.Architectures) > 0 {
|
if md.Architecture == "" && len(cfg.Architectures) > 0 {
|
||||||
md.Architecture = cfg.Architectures[0]
|
md.Architecture = cfg.Architectures[0]
|
||||||
}
|
}
|
||||||
if md.TorchDtype == "" {
|
|
||||||
md.TorchDtype = cfg.TorchDtype
|
|
||||||
}
|
|
||||||
if md.TransformersVersion == "" {
|
|
||||||
md.TransformersVersion = cfg.TransformersVersion
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// pickSafeTensorsName implements the documented naming precedence chain:
|
// pickSafeTensorsName implements the documented naming precedence chain:
|
||||||
@ -537,10 +528,8 @@ func pickSafeTensorsName(nameOrPath, fallbackName string) string {
|
|||||||
|
|
||||||
// hfConfig is a minimal projection of Hugging Face config.json fields.
|
// hfConfig is a minimal projection of Hugging Face config.json fields.
|
||||||
type hfConfig struct {
|
type hfConfig struct {
|
||||||
Architectures []string `json:"architectures"`
|
Architectures []string `json:"architectures"`
|
||||||
TorchDtype string `json:"torch_dtype"`
|
NameOrPath string `json:"_name_or_path"`
|
||||||
TransformersVersion string `json:"transformers_version"`
|
|
||||||
NameOrPath string `json:"_name_or_path"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// readmeFrontmatter holds the subset of YAML frontmatter fields we extract.
|
// readmeFrontmatter holds the subset of YAML frontmatter fields we extract.
|
||||||
|
|||||||
@ -18,8 +18,9 @@ type SafeTensorsModelInfo struct {
|
|||||||
Format string `json:"format,omitempty" cyclonedx:"format"`
|
Format string `json:"format,omitempty" cyclonedx:"format"`
|
||||||
|
|
||||||
// Architecture is the model architecture (e.g., "LlamaForCausalLM",
|
// Architecture is the model architecture (e.g., "LlamaForCausalLM",
|
||||||
// "Qwen3MoeForConditionalGeneration"), sourced from the Hugging Face config.json
|
// "Qwen3MoeForConditionalGeneration"). It is not present in the SafeTensors
|
||||||
// "architectures" array.
|
// header itself; it is enriched from the companion Hugging Face config.json
|
||||||
|
// "architectures" array when one is found alongside the model.
|
||||||
Architecture string `json:"architecture,omitempty" cyclonedx:"architecture"`
|
Architecture string `json:"architecture,omitempty" cyclonedx:"architecture"`
|
||||||
|
|
||||||
// Quantization describes tensor precision (e.g., "BF16", "F16", "F32", "INT8").
|
// Quantization describes tensor precision (e.g., "BF16", "F16", "F32", "INT8").
|
||||||
@ -36,12 +37,6 @@ type SafeTensorsModelInfo struct {
|
|||||||
// (from the Docker AI model config "size" field or the sharded index "total_size").
|
// (from the Docker AI model config "size" field or the sharded index "total_size").
|
||||||
TotalSize string `json:"totalSize,omitempty" cyclonedx:"totalSize"`
|
TotalSize string `json:"totalSize,omitempty" cyclonedx:"totalSize"`
|
||||||
|
|
||||||
// TorchDtype is the Hugging Face torch_dtype (e.g., "bfloat16", "float16").
|
|
||||||
TorchDtype string `json:"torchDtype,omitempty" cyclonedx:"torchDtype"`
|
|
||||||
|
|
||||||
// TransformersVersion is the transformers library version recorded in config.json.
|
|
||||||
TransformersVersion string `json:"transformersVersion,omitempty" cyclonedx:"transformersVersion"`
|
|
||||||
|
|
||||||
// ShardCount is the number of .safetensors shards for a sharded model (1 for a
|
// ShardCount is the number of .safetensors shards for a sharded model (1 for a
|
||||||
// single-file model).
|
// single-file model).
|
||||||
ShardCount int `json:"shardCount,omitempty" cyclonedx:"shardCount"`
|
ShardCount int `json:"shardCount,omitempty" cyclonedx:"shardCount"`
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user