diff --git a/schema/json/schema-latest.json b/schema/json/schema-latest.json index b21fcd13a..5329f8893 100644 --- a/schema/json/schema-latest.json +++ b/schema/json/schema-latest.json @@ -4098,7 +4098,7 @@ }, "architecture": { "type": "string", - "description": "Architecture is the model architecture (e.g., \"LlamaForCausalLM\",\n\"Qwen3MoeForConditionalGeneration\"), sourced from the Hugging Face config.json\n\"architectures\" array." + "description": "Architecture is the model architecture (e.g., \"LlamaForCausalLM\",\n\"Qwen3MoeForConditionalGeneration\"). It is not present in the SafeTensors\nheader itself; it is enriched from the companion Hugging Face config.json\n\"architectures\" array when one is found alongside the model." }, "quantization": { "type": "string", @@ -4116,14 +4116,6 @@ "type": "string", "description": "TotalSize is the total byte size of tensor data across all shards when known\n(from the Docker AI model config \"size\" field or the sharded index \"total_size\")." }, - "torchDtype": { - "type": "string", - "description": "TorchDtype is the Hugging Face torch_dtype (e.g., \"bfloat16\", \"float16\")." - }, - "transformersVersion": { - "type": "string", - "description": "TransformersVersion is the transformers library version recorded in config.json." - }, "shardCount": { "type": "integer", "description": "ShardCount is the number of .safetensors shards for a sharded model (1 for a\nsingle-file model)." @@ -4134,7 +4126,7 @@ }, "metadataHash": { "type": "string", - "description": "MetadataHash is an xxhash of the normalized header metadata, providing a stable\nidentifier for identical model content across repositories or filenames." + "description": "MetadataHash is an xxhash over the on-disk SafeTensors header (sorted tensor\nentries + __metadata__). It is derived ONLY from the safetensors file bytes —\nnever from OCI manifest, layer descriptor, or config-blob fields — so the same\nmodel content scanned via a directory source and via an OCI image produces the\nsame value. Treat this as the cross-source content fingerprint." }, "parts": { "items": { diff --git a/syft/pkg/cataloger/ai/parse_safetensors_test.go b/syft/pkg/cataloger/ai/parse_safetensors_test.go index 3d141f6d3..50c8ca0fc 100644 --- a/syft/pkg/cataloger/ai/parse_safetensors_test.go +++ b/syft/pkg/cataloger/ai/parse_safetensors_test.go @@ -77,16 +77,14 @@ func TestSafeTensorsCataloger(t *testing.T) { pkg.NewLicenseFromFields("Apache-2.0", "", nil), ), Metadata: pkg.SafeTensorsModelInfo{ - Format: "safetensors", - Architecture: "LlamaForCausalLM", - Quantization: "BF16", - Parameters: "16.26K", - TensorCount: 2, - TorchDtype: "bfloat16", - TransformersVersion: "4.40.0", - ShardCount: 1, - UserMetadata: pkg.KeyValues{{Key: "format", Value: "pt"}}, - MetadataHash: wantHash, + Format: "safetensors", + Architecture: "LlamaForCausalLM", + Quantization: "BF16", + Parameters: "16.26K", + TensorCount: 2, + ShardCount: 1, + UserMetadata: pkg.KeyValues{{Key: "format", Value: "pt"}}, + MetadataHash: wantHash, }, }, }, diff --git a/syft/pkg/cataloger/ai/processor.go b/syft/pkg/cataloger/ai/processor.go index 5cf3564d0..f5f9ca307 100644 --- a/syft/pkg/cataloger/ai/processor.go +++ b/syft/pkg/cataloger/ai/processor.go @@ -222,10 +222,7 @@ func mergeAggregatesInto(merged *pkg.SafeTensorsModelInfo, aggregates []pkg.Safe } firstNonEmpty(&merged.Parameters, a.Parameters) firstNonEmpty(&merged.TotalSize, a.TotalSize) - firstNonEmpty(&merged.Architecture, a.Architecture) firstNonEmpty(&merged.Quantization, a.Quantization) - firstNonEmpty(&merged.TorchDtype, a.TorchDtype) - firstNonEmpty(&merged.TransformersVersion, a.TransformersVersion) } } @@ -516,12 +513,6 @@ func applyHFConfig(md *pkg.SafeTensorsModelInfo, cfg *hfConfig) { if md.Architecture == "" && len(cfg.Architectures) > 0 { md.Architecture = cfg.Architectures[0] } - if md.TorchDtype == "" { - md.TorchDtype = cfg.TorchDtype - } - if md.TransformersVersion == "" { - md.TransformersVersion = cfg.TransformersVersion - } } // pickSafeTensorsName implements the documented naming precedence chain: @@ -537,10 +528,8 @@ func pickSafeTensorsName(nameOrPath, fallbackName string) string { // hfConfig is a minimal projection of Hugging Face config.json fields. type hfConfig struct { - Architectures []string `json:"architectures"` - TorchDtype string `json:"torch_dtype"` - TransformersVersion string `json:"transformers_version"` - NameOrPath string `json:"_name_or_path"` + Architectures []string `json:"architectures"` + NameOrPath string `json:"_name_or_path"` } // readmeFrontmatter holds the subset of YAML frontmatter fields we extract. diff --git a/syft/pkg/safetensors.go b/syft/pkg/safetensors.go index 7dbc0fe58..ea752bd91 100644 --- a/syft/pkg/safetensors.go +++ b/syft/pkg/safetensors.go @@ -18,8 +18,9 @@ type SafeTensorsModelInfo struct { Format string `json:"format,omitempty" cyclonedx:"format"` // Architecture is the model architecture (e.g., "LlamaForCausalLM", - // "Qwen3MoeForConditionalGeneration"), sourced from the Hugging Face config.json - // "architectures" array. + // "Qwen3MoeForConditionalGeneration"). It is not present in the SafeTensors + // header itself; it is enriched from the companion Hugging Face config.json + // "architectures" array when one is found alongside the model. Architecture string `json:"architecture,omitempty" cyclonedx:"architecture"` // Quantization describes tensor precision (e.g., "BF16", "F16", "F32", "INT8"). @@ -36,12 +37,6 @@ type SafeTensorsModelInfo struct { // (from the Docker AI model config "size" field or the sharded index "total_size"). TotalSize string `json:"totalSize,omitempty" cyclonedx:"totalSize"` - // TorchDtype is the Hugging Face torch_dtype (e.g., "bfloat16", "float16"). - TorchDtype string `json:"torchDtype,omitempty" cyclonedx:"torchDtype"` - - // TransformersVersion is the transformers library version recorded in config.json. - TransformersVersion string `json:"transformersVersion,omitempty" cyclonedx:"transformersVersion"` - // ShardCount is the number of .safetensors shards for a sharded model (1 for a // single-file model). ShardCount int `json:"shardCount,omitempty" cyclonedx:"shardCount"`