mirror of
https://github.com/anchore/syft.git
synced 2026-07-05 02:28:25 +02:00
fix: rename SafeTensorsMetadata -> SafeTensorsModelInfo
<optional body> <optional footer> Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
parent
dbee104681
commit
52653e24fc
@ -11,6 +11,5 @@ const (
|
||||
// 16.1.2 - placeholder for 16.1.2 changelog
|
||||
// 16.1.3 - add GGUFFileParts to GGUFFileHeader metadata
|
||||
// 16.1.4 - add BunLockEntry metadata type for bun.lock support
|
||||
>>>>>>> c6e286cb (feat: safe tensors)
|
||||
|
||||
)
|
||||
|
||||
@ -27,7 +27,7 @@ var knownNonMetadataTypeNames = strset.New(
|
||||
var knownMetadataTypeNames = strset.New(
|
||||
"DotnetPortableExecutableEntry",
|
||||
"GGUFFileHeader",
|
||||
"SafeTensorsMetadata",
|
||||
"SafeTensorsModelInfo",
|
||||
)
|
||||
|
||||
func DiscoverTypeNames() ([]string, error) {
|
||||
|
||||
@ -65,7 +65,7 @@ func AllTypes() []any {
|
||||
pkg.RubyGemspec{},
|
||||
pkg.RustBinaryAuditEntry{},
|
||||
pkg.RustCargoLockEntry{},
|
||||
pkg.SafeTensorsMetadata{},
|
||||
pkg.SafeTensorsModelInfo{},
|
||||
pkg.SnapEntry{},
|
||||
pkg.SwiftPackageManagerResolvedEntry{},
|
||||
pkg.SwiplPackEntry{},
|
||||
|
||||
@ -127,7 +127,7 @@ var jsonTypes = makeJSONTypes(
|
||||
jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"),
|
||||
jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"),
|
||||
jsonNames(pkg.GGUFFileHeader{}, "gguf-file-header"),
|
||||
jsonNames(pkg.SafeTensorsMetadata{}, "safetensors-metadata"),
|
||||
jsonNames(pkg.SafeTensorsModelInfo{}, "safetensors-model-info"),
|
||||
)
|
||||
|
||||
func expandLegacyNameVariants(names ...string) []string {
|
||||
|
||||
@ -2800,7 +2800,7 @@
|
||||
"$ref": "#/$defs/RustCargoLockEntry"
|
||||
},
|
||||
{
|
||||
"$ref": "#/$defs/SafetensorsMetadata"
|
||||
"$ref": "#/$defs/SafetensorsModelInfo"
|
||||
},
|
||||
{
|
||||
"$ref": "#/$defs/SnapEntry"
|
||||
@ -4090,7 +4090,7 @@
|
||||
],
|
||||
"description": "RustCargoLockEntry represents a locked dependency from a Cargo.lock file with precise version and checksum information."
|
||||
},
|
||||
"SafetensorsMetadata": {
|
||||
"SafetensorsModelInfo": {
|
||||
"properties": {
|
||||
"format": {
|
||||
"type": "string",
|
||||
@ -4141,14 +4141,14 @@
|
||||
},
|
||||
"parts": {
|
||||
"items": {
|
||||
"$ref": "#/$defs/SafetensorsMetadata"
|
||||
"$ref": "#/$defs/SafetensorsModelInfo"
|
||||
},
|
||||
"type": "array",
|
||||
"description": "Parts contains metadata from additional SafeTensors shards or OCI layers that\nwere merged into this package during post-processing."
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"description": "SafeTensorsMetadata represents metadata extracted from a SafeTensors model."
|
||||
"description": "SafeTensorsModelInfo holds the model details extracted from SafeTensors content."
|
||||
},
|
||||
"Schema": {
|
||||
"properties": {
|
||||
|
||||
@ -44,3 +44,60 @@ catalogers:
|
||||
default: true
|
||||
evidence:
|
||||
- GGUFFileHeader.MetadataKeyValuesHash
|
||||
- ecosystem: ai # MANUAL
|
||||
name: safetensors-cataloger # AUTO-GENERATED
|
||||
type: generic # AUTO-GENERATED
|
||||
source: # AUTO-GENERATED
|
||||
file: syft/pkg/cataloger/ai/cataloger.go
|
||||
function: NewSafeTensorsCataloger
|
||||
selectors: # AUTO-GENERATED
|
||||
- ai
|
||||
- directory
|
||||
- image
|
||||
- ml
|
||||
- model
|
||||
- package
|
||||
- safetensors
|
||||
parsers: # AUTO-GENERATED structure
|
||||
- function: parseSafeTensorsIndex
|
||||
detector: # AUTO-GENERATED
|
||||
method: glob # AUTO-GENERATED
|
||||
criteria: # AUTO-GENERATED
|
||||
- '**/*.safetensors.index.json'
|
||||
capabilities: # MANUAL - preserved across regeneration
|
||||
- name: license
|
||||
default: true
|
||||
- name: dependency.depth
|
||||
default: []
|
||||
- name: dependency.edges
|
||||
default: ""
|
||||
- name: dependency.kinds
|
||||
default: []
|
||||
- name: package_manager.files.listing
|
||||
default: false
|
||||
- name: package_manager.files.digests
|
||||
default: false
|
||||
- name: package_manager.package_integrity_hash
|
||||
default: false
|
||||
- function: parseSafeTensorsFile
|
||||
detector: # AUTO-GENERATED
|
||||
method: glob # AUTO-GENERATED
|
||||
criteria: # AUTO-GENERATED
|
||||
- '**/*.safetensors'
|
||||
capabilities: # MANUAL - preserved across regeneration
|
||||
- name: license
|
||||
default: true
|
||||
- name: dependency.depth
|
||||
default: []
|
||||
- name: dependency.edges
|
||||
default: ""
|
||||
- name: dependency.kinds
|
||||
default: []
|
||||
- name: package_manager.files.listing
|
||||
default: false
|
||||
- name: package_manager.files.digests
|
||||
default: false
|
||||
- name: package_manager.package_integrity_hash
|
||||
default: true
|
||||
evidence:
|
||||
- SafeTensorsModelInfo.MetadataHash
|
||||
|
||||
@ -21,7 +21,7 @@ func newGGUFPackage(metadata *pkg.GGUFFileHeader, modelName, version, license st
|
||||
return p
|
||||
}
|
||||
|
||||
func newSafeTensorsPackage(metadata *pkg.SafeTensorsMetadata, modelName, version, license string, locations ...file.Location) pkg.Package {
|
||||
func newSafeTensorsPackage(metadata *pkg.SafeTensorsModelInfo, modelName, version, license string, locations ...file.Location) pkg.Package {
|
||||
p := pkg.Package{
|
||||
Name: modelName,
|
||||
Version: version,
|
||||
|
||||
@ -32,7 +32,7 @@ func parseSafeTensorsFile(_ context.Context, resolver file.Resolver, _ *generic.
|
||||
return nil, nil, fmt.Errorf("failed to read safetensors header: %w", err)
|
||||
}
|
||||
|
||||
md := pkg.SafeTensorsMetadata{
|
||||
md := pkg.SafeTensorsModelInfo{
|
||||
Format: "safetensors",
|
||||
TensorCount: uint64(len(header.tensors)),
|
||||
Quantization: normalizeDType(header.dominantDType()),
|
||||
@ -82,7 +82,7 @@ func parseSafeTensorsIndex(_ context.Context, resolver file.Resolver, _ *generic
|
||||
shards[shard] = struct{}{}
|
||||
}
|
||||
|
||||
md := pkg.SafeTensorsMetadata{
|
||||
md := pkg.SafeTensorsModelInfo{
|
||||
Format: "safetensors",
|
||||
TensorCount: uint64(len(doc.WeightMap)),
|
||||
ShardCount: len(shards),
|
||||
@ -111,7 +111,7 @@ func parseSafeTensorsIndex(_ context.Context, resolver file.Resolver, _ *generic
|
||||
// safetensors artifact and folds their values into the metadata struct. It
|
||||
// returns a name and license derived from those sources, with the caller free
|
||||
// to fall back to a filename-derived default.
|
||||
func enrichFromSiblings(resolver file.Resolver, sourcePath string, md *pkg.SafeTensorsMetadata) (name, license string) {
|
||||
func enrichFromSiblings(resolver file.Resolver, sourcePath string, md *pkg.SafeTensorsModelInfo) (name, license string) {
|
||||
if resolver == nil {
|
||||
return "", ""
|
||||
}
|
||||
|
||||
@ -61,7 +61,7 @@ func parseSafeTensorsOCIConfig(_ context.Context, resolver file.Resolver, _ *gen
|
||||
return nil, nil, nil
|
||||
}
|
||||
|
||||
md := pkg.SafeTensorsMetadata{
|
||||
md := pkg.SafeTensorsModelInfo{
|
||||
Format: "safetensors",
|
||||
Quantization: cfg.Config.Quantization,
|
||||
Parameters: cfg.Config.Parameters,
|
||||
@ -89,7 +89,7 @@ func parseSafeTensorsOCIConfig(_ context.Context, resolver file.Resolver, _ *gen
|
||||
// YAML frontmatter with license + base_model; HF config.json carries
|
||||
// architectures/torch_dtype/transformers_version; the vnd.docker.ai.license
|
||||
// blob is plain license text.
|
||||
func enrichFromDockerAILayers(resolver file.Resolver, md *pkg.SafeTensorsMetadata) (name, license string) {
|
||||
func enrichFromDockerAILayers(resolver file.Resolver, md *pkg.SafeTensorsModelInfo) (name, license string) {
|
||||
ociResolver, ok := resolver.(file.OCIMediaTypeResolver)
|
||||
if !ok {
|
||||
return "", ""
|
||||
@ -113,7 +113,7 @@ func enrichFromDockerAILayers(resolver file.Resolver, md *pkg.SafeTensorsMetadat
|
||||
// readAndClassifyDockerAILayer fetches a single Docker AI model-file layer and
|
||||
// passes its contents to classifyAndMerge. Split out from the calling loop so
|
||||
// the resolver handle is closed via defer on every iteration.
|
||||
func readAndClassifyDockerAILayer(resolver file.Resolver, loc file.Location, md *pkg.SafeTensorsMetadata, name, license *string) {
|
||||
func readAndClassifyDockerAILayer(resolver file.Resolver, loc file.Location, md *pkg.SafeTensorsModelInfo, name, license *string) {
|
||||
rc, err := resolver.FileContentsByLocation(loc)
|
||||
if err != nil {
|
||||
return
|
||||
@ -130,7 +130,7 @@ func readAndClassifyDockerAILayer(resolver file.Resolver, loc file.Location, md
|
||||
// classifyAndMerge sniffs a vnd.docker.ai.model.file blob (which can be README.md,
|
||||
// config.json, generation_config.json, tokenizer.json, etc.) and folds useful
|
||||
// fields into the metadata struct and out-parameters.
|
||||
func classifyAndMerge(buf []byte, md *pkg.SafeTensorsMetadata, name, license *string) {
|
||||
func classifyAndMerge(buf []byte, md *pkg.SafeTensorsModelInfo, name, license *string) {
|
||||
trimmed := trimLeadingWhitespace(buf)
|
||||
switch {
|
||||
case hasPrefix(trimmed, "---"):
|
||||
|
||||
@ -72,13 +72,13 @@ func safeTensorsMergeProcessor(pkgs []pkg.Package, rels []artifact.Relationship,
|
||||
}
|
||||
|
||||
var namedPkgs []pkg.Package
|
||||
var namelessParts []pkg.SafeTensorsMetadata
|
||||
var namelessParts []pkg.SafeTensorsModelInfo
|
||||
for _, p := range pkgs {
|
||||
if p.Name != "" {
|
||||
namedPkgs = append(namedPkgs, p)
|
||||
continue
|
||||
}
|
||||
if md, ok := p.Metadata.(pkg.SafeTensorsMetadata); ok {
|
||||
if md, ok := p.Metadata.(pkg.SafeTensorsModelInfo); ok {
|
||||
md.MetadataHash = ""
|
||||
namelessParts = append(namelessParts, md)
|
||||
}
|
||||
@ -90,7 +90,7 @@ func safeTensorsMergeProcessor(pkgs []pkg.Package, rels []artifact.Relationship,
|
||||
|
||||
if len(namedPkgs) == 1 && len(namelessParts) > 0 {
|
||||
winner := &namedPkgs[0]
|
||||
if md, ok := winner.Metadata.(pkg.SafeTensorsMetadata); ok {
|
||||
if md, ok := winner.Metadata.(pkg.SafeTensorsModelInfo); ok {
|
||||
md.Parts = namelessParts
|
||||
winner.Metadata = md
|
||||
}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
package pkg
|
||||
|
||||
// SafeTensorsMetadata represents metadata extracted from a SafeTensors model.
|
||||
// SafeTensorsModelInfo holds the model details extracted from SafeTensors content.
|
||||
// SafeTensors is a simple, safe serialization format for storing tensors, used
|
||||
// as the default weight format for Hugging Face transformer models. Syft may
|
||||
// populate this struct from three sources:
|
||||
@ -10,7 +10,7 @@ package pkg
|
||||
//
|
||||
// The Model Name, License, and Version fields have all been lifted up to be on
|
||||
// the syft Package.
|
||||
type SafeTensorsMetadata struct {
|
||||
type SafeTensorsModelInfo struct {
|
||||
// Format is the source format label (always "safetensors" for this metadata type).
|
||||
// Present because the Docker AI model config blob carries an explicit format field
|
||||
// that can also be "gguf", and recording it here makes the origin explicit.
|
||||
@ -55,5 +55,5 @@ type SafeTensorsMetadata struct {
|
||||
|
||||
// Parts contains metadata from additional SafeTensors shards or OCI layers that
|
||||
// were merged into this package during post-processing.
|
||||
Parts []SafeTensorsMetadata `json:"parts,omitempty" cyclonedx:"parts"`
|
||||
Parts []SafeTensorsModelInfo `json:"parts,omitempty" cyclonedx:"parts"`
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user