fix: rename SafeTensorsMetadata -> SafeTensorsModelInfo

<optional body>

<optional footer>

Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
Christopher Phillips 2026-05-27 10:14:38 -04:00
parent dbee104681
commit 52653e24fc
No known key found for this signature in database
11 changed files with 78 additions and 22 deletions

View File

@ -11,6 +11,5 @@ const (
// 16.1.2 - placeholder for 16.1.2 changelog
// 16.1.3 - add GGUFFileParts to GGUFFileHeader metadata
// 16.1.4 - add BunLockEntry metadata type for bun.lock support
>>>>>>> c6e286cb (feat: safe tensors)
)

View File

@ -27,7 +27,7 @@ var knownNonMetadataTypeNames = strset.New(
var knownMetadataTypeNames = strset.New(
"DotnetPortableExecutableEntry",
"GGUFFileHeader",
"SafeTensorsMetadata",
"SafeTensorsModelInfo",
)
func DiscoverTypeNames() ([]string, error) {

View File

@ -65,7 +65,7 @@ func AllTypes() []any {
pkg.RubyGemspec{},
pkg.RustBinaryAuditEntry{},
pkg.RustCargoLockEntry{},
pkg.SafeTensorsMetadata{},
pkg.SafeTensorsModelInfo{},
pkg.SnapEntry{},
pkg.SwiftPackageManagerResolvedEntry{},
pkg.SwiplPackEntry{},

View File

@ -127,7 +127,7 @@ var jsonTypes = makeJSONTypes(
jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"),
jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"),
jsonNames(pkg.GGUFFileHeader{}, "gguf-file-header"),
jsonNames(pkg.SafeTensorsMetadata{}, "safetensors-metadata"),
jsonNames(pkg.SafeTensorsModelInfo{}, "safetensors-model-info"),
)
func expandLegacyNameVariants(names ...string) []string {

View File

@ -2800,7 +2800,7 @@
"$ref": "#/$defs/RustCargoLockEntry"
},
{
"$ref": "#/$defs/SafetensorsMetadata"
"$ref": "#/$defs/SafetensorsModelInfo"
},
{
"$ref": "#/$defs/SnapEntry"
@ -4090,7 +4090,7 @@
],
"description": "RustCargoLockEntry represents a locked dependency from a Cargo.lock file with precise version and checksum information."
},
"SafetensorsMetadata": {
"SafetensorsModelInfo": {
"properties": {
"format": {
"type": "string",
@ -4141,14 +4141,14 @@
},
"parts": {
"items": {
"$ref": "#/$defs/SafetensorsMetadata"
"$ref": "#/$defs/SafetensorsModelInfo"
},
"type": "array",
"description": "Parts contains metadata from additional SafeTensors shards or OCI layers that\nwere merged into this package during post-processing."
}
},
"type": "object",
"description": "SafeTensorsMetadata represents metadata extracted from a SafeTensors model."
"description": "SafeTensorsModelInfo holds the model details extracted from SafeTensors content."
},
"Schema": {
"properties": {

View File

@ -44,3 +44,60 @@ catalogers:
default: true
evidence:
- GGUFFileHeader.MetadataKeyValuesHash
- ecosystem: ai # MANUAL
name: safetensors-cataloger # AUTO-GENERATED
type: generic # AUTO-GENERATED
source: # AUTO-GENERATED
file: syft/pkg/cataloger/ai/cataloger.go
function: NewSafeTensorsCataloger
selectors: # AUTO-GENERATED
- ai
- directory
- image
- ml
- model
- package
- safetensors
parsers: # AUTO-GENERATED structure
- function: parseSafeTensorsIndex
detector: # AUTO-GENERATED
method: glob # AUTO-GENERATED
criteria: # AUTO-GENERATED
- '**/*.safetensors.index.json'
capabilities: # MANUAL - preserved across regeneration
- name: license
default: true
- name: dependency.depth
default: []
- name: dependency.edges
default: ""
- name: dependency.kinds
default: []
- name: package_manager.files.listing
default: false
- name: package_manager.files.digests
default: false
- name: package_manager.package_integrity_hash
default: false
- function: parseSafeTensorsFile
detector: # AUTO-GENERATED
method: glob # AUTO-GENERATED
criteria: # AUTO-GENERATED
- '**/*.safetensors'
capabilities: # MANUAL - preserved across regeneration
- name: license
default: true
- name: dependency.depth
default: []
- name: dependency.edges
default: ""
- name: dependency.kinds
default: []
- name: package_manager.files.listing
default: false
- name: package_manager.files.digests
default: false
- name: package_manager.package_integrity_hash
default: true
evidence:
- SafeTensorsModelInfo.MetadataHash

View File

@ -21,7 +21,7 @@ func newGGUFPackage(metadata *pkg.GGUFFileHeader, modelName, version, license st
return p
}
func newSafeTensorsPackage(metadata *pkg.SafeTensorsMetadata, modelName, version, license string, locations ...file.Location) pkg.Package {
func newSafeTensorsPackage(metadata *pkg.SafeTensorsModelInfo, modelName, version, license string, locations ...file.Location) pkg.Package {
p := pkg.Package{
Name: modelName,
Version: version,

View File

@ -32,7 +32,7 @@ func parseSafeTensorsFile(_ context.Context, resolver file.Resolver, _ *generic.
return nil, nil, fmt.Errorf("failed to read safetensors header: %w", err)
}
md := pkg.SafeTensorsMetadata{
md := pkg.SafeTensorsModelInfo{
Format: "safetensors",
TensorCount: uint64(len(header.tensors)),
Quantization: normalizeDType(header.dominantDType()),
@ -82,7 +82,7 @@ func parseSafeTensorsIndex(_ context.Context, resolver file.Resolver, _ *generic
shards[shard] = struct{}{}
}
md := pkg.SafeTensorsMetadata{
md := pkg.SafeTensorsModelInfo{
Format: "safetensors",
TensorCount: uint64(len(doc.WeightMap)),
ShardCount: len(shards),
@ -111,7 +111,7 @@ func parseSafeTensorsIndex(_ context.Context, resolver file.Resolver, _ *generic
// safetensors artifact and folds their values into the metadata struct. It
// returns a name and license derived from those sources, with the caller free
// to fall back to a filename-derived default.
func enrichFromSiblings(resolver file.Resolver, sourcePath string, md *pkg.SafeTensorsMetadata) (name, license string) {
func enrichFromSiblings(resolver file.Resolver, sourcePath string, md *pkg.SafeTensorsModelInfo) (name, license string) {
if resolver == nil {
return "", ""
}

View File

@ -61,7 +61,7 @@ func parseSafeTensorsOCIConfig(_ context.Context, resolver file.Resolver, _ *gen
return nil, nil, nil
}
md := pkg.SafeTensorsMetadata{
md := pkg.SafeTensorsModelInfo{
Format: "safetensors",
Quantization: cfg.Config.Quantization,
Parameters: cfg.Config.Parameters,
@ -89,7 +89,7 @@ func parseSafeTensorsOCIConfig(_ context.Context, resolver file.Resolver, _ *gen
// YAML frontmatter with license + base_model; HF config.json carries
// architectures/torch_dtype/transformers_version; the vnd.docker.ai.license
// blob is plain license text.
func enrichFromDockerAILayers(resolver file.Resolver, md *pkg.SafeTensorsMetadata) (name, license string) {
func enrichFromDockerAILayers(resolver file.Resolver, md *pkg.SafeTensorsModelInfo) (name, license string) {
ociResolver, ok := resolver.(file.OCIMediaTypeResolver)
if !ok {
return "", ""
@ -113,7 +113,7 @@ func enrichFromDockerAILayers(resolver file.Resolver, md *pkg.SafeTensorsMetadat
// readAndClassifyDockerAILayer fetches a single Docker AI model-file layer and
// passes its contents to classifyAndMerge. Split out from the calling loop so
// the resolver handle is closed via defer on every iteration.
func readAndClassifyDockerAILayer(resolver file.Resolver, loc file.Location, md *pkg.SafeTensorsMetadata, name, license *string) {
func readAndClassifyDockerAILayer(resolver file.Resolver, loc file.Location, md *pkg.SafeTensorsModelInfo, name, license *string) {
rc, err := resolver.FileContentsByLocation(loc)
if err != nil {
return
@ -130,7 +130,7 @@ func readAndClassifyDockerAILayer(resolver file.Resolver, loc file.Location, md
// classifyAndMerge sniffs a vnd.docker.ai.model.file blob (which can be README.md,
// config.json, generation_config.json, tokenizer.json, etc.) and folds useful
// fields into the metadata struct and out-parameters.
func classifyAndMerge(buf []byte, md *pkg.SafeTensorsMetadata, name, license *string) {
func classifyAndMerge(buf []byte, md *pkg.SafeTensorsModelInfo, name, license *string) {
trimmed := trimLeadingWhitespace(buf)
switch {
case hasPrefix(trimmed, "---"):

View File

@ -72,13 +72,13 @@ func safeTensorsMergeProcessor(pkgs []pkg.Package, rels []artifact.Relationship,
}
var namedPkgs []pkg.Package
var namelessParts []pkg.SafeTensorsMetadata
var namelessParts []pkg.SafeTensorsModelInfo
for _, p := range pkgs {
if p.Name != "" {
namedPkgs = append(namedPkgs, p)
continue
}
if md, ok := p.Metadata.(pkg.SafeTensorsMetadata); ok {
if md, ok := p.Metadata.(pkg.SafeTensorsModelInfo); ok {
md.MetadataHash = ""
namelessParts = append(namelessParts, md)
}
@ -90,7 +90,7 @@ func safeTensorsMergeProcessor(pkgs []pkg.Package, rels []artifact.Relationship,
if len(namedPkgs) == 1 && len(namelessParts) > 0 {
winner := &namedPkgs[0]
if md, ok := winner.Metadata.(pkg.SafeTensorsMetadata); ok {
if md, ok := winner.Metadata.(pkg.SafeTensorsModelInfo); ok {
md.Parts = namelessParts
winner.Metadata = md
}

View File

@ -1,6 +1,6 @@
package pkg
// SafeTensorsMetadata represents metadata extracted from a SafeTensors model.
// SafeTensorsModelInfo holds the model details extracted from SafeTensors content.
// SafeTensors is a simple, safe serialization format for storing tensors, used
// as the default weight format for Hugging Face transformer models. Syft may
// populate this struct from three sources:
@ -10,7 +10,7 @@ package pkg
//
// The Model Name, License, and Version fields have all been lifted up to be on
// the syft Package.
type SafeTensorsMetadata struct {
type SafeTensorsModelInfo struct {
// Format is the source format label (always "safetensors" for this metadata type).
// Present because the Docker AI model config blob carries an explicit format field
// that can also be "gguf", and recording it here makes the origin explicit.
@ -55,5 +55,5 @@ type SafeTensorsMetadata struct {
// Parts contains metadata from additional SafeTensors shards or OCI layers that
// were merged into this package during post-processing.
Parts []SafeTensorsMetadata `json:"parts,omitempty" cyclonedx:"parts"`
Parts []SafeTensorsModelInfo `json:"parts,omitempty" cyclonedx:"parts"`
}