fix: rename SafeTensorsMetadata -> SafeTensorsModelInfo

<optional body>

<optional footer>

Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
Christopher Phillips 2026-05-27 10:14:38 -04:00
parent dbee104681
commit 52653e24fc
No known key found for this signature in database
11 changed files with 78 additions and 22 deletions

View File

@ -11,6 +11,5 @@ const (
// 16.1.2 - placeholder for 16.1.2 changelog // 16.1.2 - placeholder for 16.1.2 changelog
// 16.1.3 - add GGUFFileParts to GGUFFileHeader metadata // 16.1.3 - add GGUFFileParts to GGUFFileHeader metadata
// 16.1.4 - add BunLockEntry metadata type for bun.lock support // 16.1.4 - add BunLockEntry metadata type for bun.lock support
>>>>>>> c6e286cb (feat: safe tensors)
) )

View File

@ -27,7 +27,7 @@ var knownNonMetadataTypeNames = strset.New(
var knownMetadataTypeNames = strset.New( var knownMetadataTypeNames = strset.New(
"DotnetPortableExecutableEntry", "DotnetPortableExecutableEntry",
"GGUFFileHeader", "GGUFFileHeader",
"SafeTensorsMetadata", "SafeTensorsModelInfo",
) )
func DiscoverTypeNames() ([]string, error) { func DiscoverTypeNames() ([]string, error) {

View File

@ -65,7 +65,7 @@ func AllTypes() []any {
pkg.RubyGemspec{}, pkg.RubyGemspec{},
pkg.RustBinaryAuditEntry{}, pkg.RustBinaryAuditEntry{},
pkg.RustCargoLockEntry{}, pkg.RustCargoLockEntry{},
pkg.SafeTensorsMetadata{}, pkg.SafeTensorsModelInfo{},
pkg.SnapEntry{}, pkg.SnapEntry{},
pkg.SwiftPackageManagerResolvedEntry{}, pkg.SwiftPackageManagerResolvedEntry{},
pkg.SwiplPackEntry{}, pkg.SwiplPackEntry{},

View File

@ -127,7 +127,7 @@ var jsonTypes = makeJSONTypes(
jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"), jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"),
jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"), jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"),
jsonNames(pkg.GGUFFileHeader{}, "gguf-file-header"), jsonNames(pkg.GGUFFileHeader{}, "gguf-file-header"),
jsonNames(pkg.SafeTensorsMetadata{}, "safetensors-metadata"), jsonNames(pkg.SafeTensorsModelInfo{}, "safetensors-model-info"),
) )
func expandLegacyNameVariants(names ...string) []string { func expandLegacyNameVariants(names ...string) []string {

View File

@ -2800,7 +2800,7 @@
"$ref": "#/$defs/RustCargoLockEntry" "$ref": "#/$defs/RustCargoLockEntry"
}, },
{ {
"$ref": "#/$defs/SafetensorsMetadata" "$ref": "#/$defs/SafetensorsModelInfo"
}, },
{ {
"$ref": "#/$defs/SnapEntry" "$ref": "#/$defs/SnapEntry"
@ -4090,7 +4090,7 @@
], ],
"description": "RustCargoLockEntry represents a locked dependency from a Cargo.lock file with precise version and checksum information." "description": "RustCargoLockEntry represents a locked dependency from a Cargo.lock file with precise version and checksum information."
}, },
"SafetensorsMetadata": { "SafetensorsModelInfo": {
"properties": { "properties": {
"format": { "format": {
"type": "string", "type": "string",
@ -4141,14 +4141,14 @@
}, },
"parts": { "parts": {
"items": { "items": {
"$ref": "#/$defs/SafetensorsMetadata" "$ref": "#/$defs/SafetensorsModelInfo"
}, },
"type": "array", "type": "array",
"description": "Parts contains metadata from additional SafeTensors shards or OCI layers that\nwere merged into this package during post-processing." "description": "Parts contains metadata from additional SafeTensors shards or OCI layers that\nwere merged into this package during post-processing."
} }
}, },
"type": "object", "type": "object",
"description": "SafeTensorsMetadata represents metadata extracted from a SafeTensors model." "description": "SafeTensorsModelInfo holds the model details extracted from SafeTensors content."
}, },
"Schema": { "Schema": {
"properties": { "properties": {

View File

@ -44,3 +44,60 @@ catalogers:
default: true default: true
evidence: evidence:
- GGUFFileHeader.MetadataKeyValuesHash - GGUFFileHeader.MetadataKeyValuesHash
- ecosystem: ai # MANUAL
name: safetensors-cataloger # AUTO-GENERATED
type: generic # AUTO-GENERATED
source: # AUTO-GENERATED
file: syft/pkg/cataloger/ai/cataloger.go
function: NewSafeTensorsCataloger
selectors: # AUTO-GENERATED
- ai
- directory
- image
- ml
- model
- package
- safetensors
parsers: # AUTO-GENERATED structure
- function: parseSafeTensorsIndex
detector: # AUTO-GENERATED
method: glob # AUTO-GENERATED
criteria: # AUTO-GENERATED
- '**/*.safetensors.index.json'
capabilities: # MANUAL - preserved across regeneration
- name: license
default: true
- name: dependency.depth
default: []
- name: dependency.edges
default: ""
- name: dependency.kinds
default: []
- name: package_manager.files.listing
default: false
- name: package_manager.files.digests
default: false
- name: package_manager.package_integrity_hash
default: false
- function: parseSafeTensorsFile
detector: # AUTO-GENERATED
method: glob # AUTO-GENERATED
criteria: # AUTO-GENERATED
- '**/*.safetensors'
capabilities: # MANUAL - preserved across regeneration
- name: license
default: true
- name: dependency.depth
default: []
- name: dependency.edges
default: ""
- name: dependency.kinds
default: []
- name: package_manager.files.listing
default: false
- name: package_manager.files.digests
default: false
- name: package_manager.package_integrity_hash
default: true
evidence:
- SafeTensorsModelInfo.MetadataHash

View File

@ -21,7 +21,7 @@ func newGGUFPackage(metadata *pkg.GGUFFileHeader, modelName, version, license st
return p return p
} }
func newSafeTensorsPackage(metadata *pkg.SafeTensorsMetadata, modelName, version, license string, locations ...file.Location) pkg.Package { func newSafeTensorsPackage(metadata *pkg.SafeTensorsModelInfo, modelName, version, license string, locations ...file.Location) pkg.Package {
p := pkg.Package{ p := pkg.Package{
Name: modelName, Name: modelName,
Version: version, Version: version,

View File

@ -32,7 +32,7 @@ func parseSafeTensorsFile(_ context.Context, resolver file.Resolver, _ *generic.
return nil, nil, fmt.Errorf("failed to read safetensors header: %w", err) return nil, nil, fmt.Errorf("failed to read safetensors header: %w", err)
} }
md := pkg.SafeTensorsMetadata{ md := pkg.SafeTensorsModelInfo{
Format: "safetensors", Format: "safetensors",
TensorCount: uint64(len(header.tensors)), TensorCount: uint64(len(header.tensors)),
Quantization: normalizeDType(header.dominantDType()), Quantization: normalizeDType(header.dominantDType()),
@ -82,7 +82,7 @@ func parseSafeTensorsIndex(_ context.Context, resolver file.Resolver, _ *generic
shards[shard] = struct{}{} shards[shard] = struct{}{}
} }
md := pkg.SafeTensorsMetadata{ md := pkg.SafeTensorsModelInfo{
Format: "safetensors", Format: "safetensors",
TensorCount: uint64(len(doc.WeightMap)), TensorCount: uint64(len(doc.WeightMap)),
ShardCount: len(shards), ShardCount: len(shards),
@ -111,7 +111,7 @@ func parseSafeTensorsIndex(_ context.Context, resolver file.Resolver, _ *generic
// safetensors artifact and folds their values into the metadata struct. It // safetensors artifact and folds their values into the metadata struct. It
// returns a name and license derived from those sources, with the caller free // returns a name and license derived from those sources, with the caller free
// to fall back to a filename-derived default. // to fall back to a filename-derived default.
func enrichFromSiblings(resolver file.Resolver, sourcePath string, md *pkg.SafeTensorsMetadata) (name, license string) { func enrichFromSiblings(resolver file.Resolver, sourcePath string, md *pkg.SafeTensorsModelInfo) (name, license string) {
if resolver == nil { if resolver == nil {
return "", "" return "", ""
} }

View File

@ -61,7 +61,7 @@ func parseSafeTensorsOCIConfig(_ context.Context, resolver file.Resolver, _ *gen
return nil, nil, nil return nil, nil, nil
} }
md := pkg.SafeTensorsMetadata{ md := pkg.SafeTensorsModelInfo{
Format: "safetensors", Format: "safetensors",
Quantization: cfg.Config.Quantization, Quantization: cfg.Config.Quantization,
Parameters: cfg.Config.Parameters, Parameters: cfg.Config.Parameters,
@ -89,7 +89,7 @@ func parseSafeTensorsOCIConfig(_ context.Context, resolver file.Resolver, _ *gen
// YAML frontmatter with license + base_model; HF config.json carries // YAML frontmatter with license + base_model; HF config.json carries
// architectures/torch_dtype/transformers_version; the vnd.docker.ai.license // architectures/torch_dtype/transformers_version; the vnd.docker.ai.license
// blob is plain license text. // blob is plain license text.
func enrichFromDockerAILayers(resolver file.Resolver, md *pkg.SafeTensorsMetadata) (name, license string) { func enrichFromDockerAILayers(resolver file.Resolver, md *pkg.SafeTensorsModelInfo) (name, license string) {
ociResolver, ok := resolver.(file.OCIMediaTypeResolver) ociResolver, ok := resolver.(file.OCIMediaTypeResolver)
if !ok { if !ok {
return "", "" return "", ""
@ -113,7 +113,7 @@ func enrichFromDockerAILayers(resolver file.Resolver, md *pkg.SafeTensorsMetadat
// readAndClassifyDockerAILayer fetches a single Docker AI model-file layer and // readAndClassifyDockerAILayer fetches a single Docker AI model-file layer and
// passes its contents to classifyAndMerge. Split out from the calling loop so // passes its contents to classifyAndMerge. Split out from the calling loop so
// the resolver handle is closed via defer on every iteration. // the resolver handle is closed via defer on every iteration.
func readAndClassifyDockerAILayer(resolver file.Resolver, loc file.Location, md *pkg.SafeTensorsMetadata, name, license *string) { func readAndClassifyDockerAILayer(resolver file.Resolver, loc file.Location, md *pkg.SafeTensorsModelInfo, name, license *string) {
rc, err := resolver.FileContentsByLocation(loc) rc, err := resolver.FileContentsByLocation(loc)
if err != nil { if err != nil {
return return
@ -130,7 +130,7 @@ func readAndClassifyDockerAILayer(resolver file.Resolver, loc file.Location, md
// classifyAndMerge sniffs a vnd.docker.ai.model.file blob (which can be README.md, // classifyAndMerge sniffs a vnd.docker.ai.model.file blob (which can be README.md,
// config.json, generation_config.json, tokenizer.json, etc.) and folds useful // config.json, generation_config.json, tokenizer.json, etc.) and folds useful
// fields into the metadata struct and out-parameters. // fields into the metadata struct and out-parameters.
func classifyAndMerge(buf []byte, md *pkg.SafeTensorsMetadata, name, license *string) { func classifyAndMerge(buf []byte, md *pkg.SafeTensorsModelInfo, name, license *string) {
trimmed := trimLeadingWhitespace(buf) trimmed := trimLeadingWhitespace(buf)
switch { switch {
case hasPrefix(trimmed, "---"): case hasPrefix(trimmed, "---"):

View File

@ -72,13 +72,13 @@ func safeTensorsMergeProcessor(pkgs []pkg.Package, rels []artifact.Relationship,
} }
var namedPkgs []pkg.Package var namedPkgs []pkg.Package
var namelessParts []pkg.SafeTensorsMetadata var namelessParts []pkg.SafeTensorsModelInfo
for _, p := range pkgs { for _, p := range pkgs {
if p.Name != "" { if p.Name != "" {
namedPkgs = append(namedPkgs, p) namedPkgs = append(namedPkgs, p)
continue continue
} }
if md, ok := p.Metadata.(pkg.SafeTensorsMetadata); ok { if md, ok := p.Metadata.(pkg.SafeTensorsModelInfo); ok {
md.MetadataHash = "" md.MetadataHash = ""
namelessParts = append(namelessParts, md) namelessParts = append(namelessParts, md)
} }
@ -90,7 +90,7 @@ func safeTensorsMergeProcessor(pkgs []pkg.Package, rels []artifact.Relationship,
if len(namedPkgs) == 1 && len(namelessParts) > 0 { if len(namedPkgs) == 1 && len(namelessParts) > 0 {
winner := &namedPkgs[0] winner := &namedPkgs[0]
if md, ok := winner.Metadata.(pkg.SafeTensorsMetadata); ok { if md, ok := winner.Metadata.(pkg.SafeTensorsModelInfo); ok {
md.Parts = namelessParts md.Parts = namelessParts
winner.Metadata = md winner.Metadata = md
} }

View File

@ -1,6 +1,6 @@
package pkg package pkg
// SafeTensorsMetadata represents metadata extracted from a SafeTensors model. // SafeTensorsModelInfo holds the model details extracted from SafeTensors content.
// SafeTensors is a simple, safe serialization format for storing tensors, used // SafeTensors is a simple, safe serialization format for storing tensors, used
// as the default weight format for Hugging Face transformer models. Syft may // as the default weight format for Hugging Face transformer models. Syft may
// populate this struct from three sources: // populate this struct from three sources:
@ -10,7 +10,7 @@ package pkg
// //
// The Model Name, License, and Version fields have all been lifted up to be on // The Model Name, License, and Version fields have all been lifted up to be on
// the syft Package. // the syft Package.
type SafeTensorsMetadata struct { type SafeTensorsModelInfo struct {
// Format is the source format label (always "safetensors" for this metadata type). // Format is the source format label (always "safetensors" for this metadata type).
// Present because the Docker AI model config blob carries an explicit format field // Present because the Docker AI model config blob carries an explicit format field
// that can also be "gguf", and recording it here makes the origin explicit. // that can also be "gguf", and recording it here makes the origin explicit.
@ -55,5 +55,5 @@ type SafeTensorsMetadata struct {
// Parts contains metadata from additional SafeTensors shards or OCI layers that // Parts contains metadata from additional SafeTensors shards or OCI layers that
// were merged into this package during post-processing. // were merged into this package during post-processing.
Parts []SafeTensorsMetadata `json:"parts,omitempty" cyclonedx:"parts"` Parts []SafeTensorsModelInfo `json:"parts,omitempty" cyclonedx:"parts"`
} }