mirror of
https://github.com/anchore/syft.git
synced 2026-07-05 02:28:25 +02:00
fix: rename SafeTensorsMetadata -> SafeTensorsModelInfo
<optional body> <optional footer> Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
parent
dbee104681
commit
52653e24fc
@ -11,6 +11,5 @@ const (
|
|||||||
// 16.1.2 - placeholder for 16.1.2 changelog
|
// 16.1.2 - placeholder for 16.1.2 changelog
|
||||||
// 16.1.3 - add GGUFFileParts to GGUFFileHeader metadata
|
// 16.1.3 - add GGUFFileParts to GGUFFileHeader metadata
|
||||||
// 16.1.4 - add BunLockEntry metadata type for bun.lock support
|
// 16.1.4 - add BunLockEntry metadata type for bun.lock support
|
||||||
>>>>>>> c6e286cb (feat: safe tensors)
|
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|||||||
@ -27,7 +27,7 @@ var knownNonMetadataTypeNames = strset.New(
|
|||||||
var knownMetadataTypeNames = strset.New(
|
var knownMetadataTypeNames = strset.New(
|
||||||
"DotnetPortableExecutableEntry",
|
"DotnetPortableExecutableEntry",
|
||||||
"GGUFFileHeader",
|
"GGUFFileHeader",
|
||||||
"SafeTensorsMetadata",
|
"SafeTensorsModelInfo",
|
||||||
)
|
)
|
||||||
|
|
||||||
func DiscoverTypeNames() ([]string, error) {
|
func DiscoverTypeNames() ([]string, error) {
|
||||||
|
|||||||
@ -65,7 +65,7 @@ func AllTypes() []any {
|
|||||||
pkg.RubyGemspec{},
|
pkg.RubyGemspec{},
|
||||||
pkg.RustBinaryAuditEntry{},
|
pkg.RustBinaryAuditEntry{},
|
||||||
pkg.RustCargoLockEntry{},
|
pkg.RustCargoLockEntry{},
|
||||||
pkg.SafeTensorsMetadata{},
|
pkg.SafeTensorsModelInfo{},
|
||||||
pkg.SnapEntry{},
|
pkg.SnapEntry{},
|
||||||
pkg.SwiftPackageManagerResolvedEntry{},
|
pkg.SwiftPackageManagerResolvedEntry{},
|
||||||
pkg.SwiplPackEntry{},
|
pkg.SwiplPackEntry{},
|
||||||
|
|||||||
@ -127,7 +127,7 @@ var jsonTypes = makeJSONTypes(
|
|||||||
jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"),
|
jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"),
|
||||||
jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"),
|
jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"),
|
||||||
jsonNames(pkg.GGUFFileHeader{}, "gguf-file-header"),
|
jsonNames(pkg.GGUFFileHeader{}, "gguf-file-header"),
|
||||||
jsonNames(pkg.SafeTensorsMetadata{}, "safetensors-metadata"),
|
jsonNames(pkg.SafeTensorsModelInfo{}, "safetensors-model-info"),
|
||||||
)
|
)
|
||||||
|
|
||||||
func expandLegacyNameVariants(names ...string) []string {
|
func expandLegacyNameVariants(names ...string) []string {
|
||||||
|
|||||||
@ -2800,7 +2800,7 @@
|
|||||||
"$ref": "#/$defs/RustCargoLockEntry"
|
"$ref": "#/$defs/RustCargoLockEntry"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/$defs/SafetensorsMetadata"
|
"$ref": "#/$defs/SafetensorsModelInfo"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/$defs/SnapEntry"
|
"$ref": "#/$defs/SnapEntry"
|
||||||
@ -4090,7 +4090,7 @@
|
|||||||
],
|
],
|
||||||
"description": "RustCargoLockEntry represents a locked dependency from a Cargo.lock file with precise version and checksum information."
|
"description": "RustCargoLockEntry represents a locked dependency from a Cargo.lock file with precise version and checksum information."
|
||||||
},
|
},
|
||||||
"SafetensorsMetadata": {
|
"SafetensorsModelInfo": {
|
||||||
"properties": {
|
"properties": {
|
||||||
"format": {
|
"format": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
@ -4141,14 +4141,14 @@
|
|||||||
},
|
},
|
||||||
"parts": {
|
"parts": {
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/$defs/SafetensorsMetadata"
|
"$ref": "#/$defs/SafetensorsModelInfo"
|
||||||
},
|
},
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"description": "Parts contains metadata from additional SafeTensors shards or OCI layers that\nwere merged into this package during post-processing."
|
"description": "Parts contains metadata from additional SafeTensors shards or OCI layers that\nwere merged into this package during post-processing."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"description": "SafeTensorsMetadata represents metadata extracted from a SafeTensors model."
|
"description": "SafeTensorsModelInfo holds the model details extracted from SafeTensors content."
|
||||||
},
|
},
|
||||||
"Schema": {
|
"Schema": {
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|||||||
@ -44,3 +44,60 @@ catalogers:
|
|||||||
default: true
|
default: true
|
||||||
evidence:
|
evidence:
|
||||||
- GGUFFileHeader.MetadataKeyValuesHash
|
- GGUFFileHeader.MetadataKeyValuesHash
|
||||||
|
- ecosystem: ai # MANUAL
|
||||||
|
name: safetensors-cataloger # AUTO-GENERATED
|
||||||
|
type: generic # AUTO-GENERATED
|
||||||
|
source: # AUTO-GENERATED
|
||||||
|
file: syft/pkg/cataloger/ai/cataloger.go
|
||||||
|
function: NewSafeTensorsCataloger
|
||||||
|
selectors: # AUTO-GENERATED
|
||||||
|
- ai
|
||||||
|
- directory
|
||||||
|
- image
|
||||||
|
- ml
|
||||||
|
- model
|
||||||
|
- package
|
||||||
|
- safetensors
|
||||||
|
parsers: # AUTO-GENERATED structure
|
||||||
|
- function: parseSafeTensorsIndex
|
||||||
|
detector: # AUTO-GENERATED
|
||||||
|
method: glob # AUTO-GENERATED
|
||||||
|
criteria: # AUTO-GENERATED
|
||||||
|
- '**/*.safetensors.index.json'
|
||||||
|
capabilities: # MANUAL - preserved across regeneration
|
||||||
|
- name: license
|
||||||
|
default: true
|
||||||
|
- name: dependency.depth
|
||||||
|
default: []
|
||||||
|
- name: dependency.edges
|
||||||
|
default: ""
|
||||||
|
- name: dependency.kinds
|
||||||
|
default: []
|
||||||
|
- name: package_manager.files.listing
|
||||||
|
default: false
|
||||||
|
- name: package_manager.files.digests
|
||||||
|
default: false
|
||||||
|
- name: package_manager.package_integrity_hash
|
||||||
|
default: false
|
||||||
|
- function: parseSafeTensorsFile
|
||||||
|
detector: # AUTO-GENERATED
|
||||||
|
method: glob # AUTO-GENERATED
|
||||||
|
criteria: # AUTO-GENERATED
|
||||||
|
- '**/*.safetensors'
|
||||||
|
capabilities: # MANUAL - preserved across regeneration
|
||||||
|
- name: license
|
||||||
|
default: true
|
||||||
|
- name: dependency.depth
|
||||||
|
default: []
|
||||||
|
- name: dependency.edges
|
||||||
|
default: ""
|
||||||
|
- name: dependency.kinds
|
||||||
|
default: []
|
||||||
|
- name: package_manager.files.listing
|
||||||
|
default: false
|
||||||
|
- name: package_manager.files.digests
|
||||||
|
default: false
|
||||||
|
- name: package_manager.package_integrity_hash
|
||||||
|
default: true
|
||||||
|
evidence:
|
||||||
|
- SafeTensorsModelInfo.MetadataHash
|
||||||
|
|||||||
@ -21,7 +21,7 @@ func newGGUFPackage(metadata *pkg.GGUFFileHeader, modelName, version, license st
|
|||||||
return p
|
return p
|
||||||
}
|
}
|
||||||
|
|
||||||
func newSafeTensorsPackage(metadata *pkg.SafeTensorsMetadata, modelName, version, license string, locations ...file.Location) pkg.Package {
|
func newSafeTensorsPackage(metadata *pkg.SafeTensorsModelInfo, modelName, version, license string, locations ...file.Location) pkg.Package {
|
||||||
p := pkg.Package{
|
p := pkg.Package{
|
||||||
Name: modelName,
|
Name: modelName,
|
||||||
Version: version,
|
Version: version,
|
||||||
|
|||||||
@ -32,7 +32,7 @@ func parseSafeTensorsFile(_ context.Context, resolver file.Resolver, _ *generic.
|
|||||||
return nil, nil, fmt.Errorf("failed to read safetensors header: %w", err)
|
return nil, nil, fmt.Errorf("failed to read safetensors header: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
md := pkg.SafeTensorsMetadata{
|
md := pkg.SafeTensorsModelInfo{
|
||||||
Format: "safetensors",
|
Format: "safetensors",
|
||||||
TensorCount: uint64(len(header.tensors)),
|
TensorCount: uint64(len(header.tensors)),
|
||||||
Quantization: normalizeDType(header.dominantDType()),
|
Quantization: normalizeDType(header.dominantDType()),
|
||||||
@ -82,7 +82,7 @@ func parseSafeTensorsIndex(_ context.Context, resolver file.Resolver, _ *generic
|
|||||||
shards[shard] = struct{}{}
|
shards[shard] = struct{}{}
|
||||||
}
|
}
|
||||||
|
|
||||||
md := pkg.SafeTensorsMetadata{
|
md := pkg.SafeTensorsModelInfo{
|
||||||
Format: "safetensors",
|
Format: "safetensors",
|
||||||
TensorCount: uint64(len(doc.WeightMap)),
|
TensorCount: uint64(len(doc.WeightMap)),
|
||||||
ShardCount: len(shards),
|
ShardCount: len(shards),
|
||||||
@ -111,7 +111,7 @@ func parseSafeTensorsIndex(_ context.Context, resolver file.Resolver, _ *generic
|
|||||||
// safetensors artifact and folds their values into the metadata struct. It
|
// safetensors artifact and folds their values into the metadata struct. It
|
||||||
// returns a name and license derived from those sources, with the caller free
|
// returns a name and license derived from those sources, with the caller free
|
||||||
// to fall back to a filename-derived default.
|
// to fall back to a filename-derived default.
|
||||||
func enrichFromSiblings(resolver file.Resolver, sourcePath string, md *pkg.SafeTensorsMetadata) (name, license string) {
|
func enrichFromSiblings(resolver file.Resolver, sourcePath string, md *pkg.SafeTensorsModelInfo) (name, license string) {
|
||||||
if resolver == nil {
|
if resolver == nil {
|
||||||
return "", ""
|
return "", ""
|
||||||
}
|
}
|
||||||
|
|||||||
@ -61,7 +61,7 @@ func parseSafeTensorsOCIConfig(_ context.Context, resolver file.Resolver, _ *gen
|
|||||||
return nil, nil, nil
|
return nil, nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
md := pkg.SafeTensorsMetadata{
|
md := pkg.SafeTensorsModelInfo{
|
||||||
Format: "safetensors",
|
Format: "safetensors",
|
||||||
Quantization: cfg.Config.Quantization,
|
Quantization: cfg.Config.Quantization,
|
||||||
Parameters: cfg.Config.Parameters,
|
Parameters: cfg.Config.Parameters,
|
||||||
@ -89,7 +89,7 @@ func parseSafeTensorsOCIConfig(_ context.Context, resolver file.Resolver, _ *gen
|
|||||||
// YAML frontmatter with license + base_model; HF config.json carries
|
// YAML frontmatter with license + base_model; HF config.json carries
|
||||||
// architectures/torch_dtype/transformers_version; the vnd.docker.ai.license
|
// architectures/torch_dtype/transformers_version; the vnd.docker.ai.license
|
||||||
// blob is plain license text.
|
// blob is plain license text.
|
||||||
func enrichFromDockerAILayers(resolver file.Resolver, md *pkg.SafeTensorsMetadata) (name, license string) {
|
func enrichFromDockerAILayers(resolver file.Resolver, md *pkg.SafeTensorsModelInfo) (name, license string) {
|
||||||
ociResolver, ok := resolver.(file.OCIMediaTypeResolver)
|
ociResolver, ok := resolver.(file.OCIMediaTypeResolver)
|
||||||
if !ok {
|
if !ok {
|
||||||
return "", ""
|
return "", ""
|
||||||
@ -113,7 +113,7 @@ func enrichFromDockerAILayers(resolver file.Resolver, md *pkg.SafeTensorsMetadat
|
|||||||
// readAndClassifyDockerAILayer fetches a single Docker AI model-file layer and
|
// readAndClassifyDockerAILayer fetches a single Docker AI model-file layer and
|
||||||
// passes its contents to classifyAndMerge. Split out from the calling loop so
|
// passes its contents to classifyAndMerge. Split out from the calling loop so
|
||||||
// the resolver handle is closed via defer on every iteration.
|
// the resolver handle is closed via defer on every iteration.
|
||||||
func readAndClassifyDockerAILayer(resolver file.Resolver, loc file.Location, md *pkg.SafeTensorsMetadata, name, license *string) {
|
func readAndClassifyDockerAILayer(resolver file.Resolver, loc file.Location, md *pkg.SafeTensorsModelInfo, name, license *string) {
|
||||||
rc, err := resolver.FileContentsByLocation(loc)
|
rc, err := resolver.FileContentsByLocation(loc)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
@ -130,7 +130,7 @@ func readAndClassifyDockerAILayer(resolver file.Resolver, loc file.Location, md
|
|||||||
// classifyAndMerge sniffs a vnd.docker.ai.model.file blob (which can be README.md,
|
// classifyAndMerge sniffs a vnd.docker.ai.model.file blob (which can be README.md,
|
||||||
// config.json, generation_config.json, tokenizer.json, etc.) and folds useful
|
// config.json, generation_config.json, tokenizer.json, etc.) and folds useful
|
||||||
// fields into the metadata struct and out-parameters.
|
// fields into the metadata struct and out-parameters.
|
||||||
func classifyAndMerge(buf []byte, md *pkg.SafeTensorsMetadata, name, license *string) {
|
func classifyAndMerge(buf []byte, md *pkg.SafeTensorsModelInfo, name, license *string) {
|
||||||
trimmed := trimLeadingWhitespace(buf)
|
trimmed := trimLeadingWhitespace(buf)
|
||||||
switch {
|
switch {
|
||||||
case hasPrefix(trimmed, "---"):
|
case hasPrefix(trimmed, "---"):
|
||||||
|
|||||||
@ -72,13 +72,13 @@ func safeTensorsMergeProcessor(pkgs []pkg.Package, rels []artifact.Relationship,
|
|||||||
}
|
}
|
||||||
|
|
||||||
var namedPkgs []pkg.Package
|
var namedPkgs []pkg.Package
|
||||||
var namelessParts []pkg.SafeTensorsMetadata
|
var namelessParts []pkg.SafeTensorsModelInfo
|
||||||
for _, p := range pkgs {
|
for _, p := range pkgs {
|
||||||
if p.Name != "" {
|
if p.Name != "" {
|
||||||
namedPkgs = append(namedPkgs, p)
|
namedPkgs = append(namedPkgs, p)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if md, ok := p.Metadata.(pkg.SafeTensorsMetadata); ok {
|
if md, ok := p.Metadata.(pkg.SafeTensorsModelInfo); ok {
|
||||||
md.MetadataHash = ""
|
md.MetadataHash = ""
|
||||||
namelessParts = append(namelessParts, md)
|
namelessParts = append(namelessParts, md)
|
||||||
}
|
}
|
||||||
@ -90,7 +90,7 @@ func safeTensorsMergeProcessor(pkgs []pkg.Package, rels []artifact.Relationship,
|
|||||||
|
|
||||||
if len(namedPkgs) == 1 && len(namelessParts) > 0 {
|
if len(namedPkgs) == 1 && len(namelessParts) > 0 {
|
||||||
winner := &namedPkgs[0]
|
winner := &namedPkgs[0]
|
||||||
if md, ok := winner.Metadata.(pkg.SafeTensorsMetadata); ok {
|
if md, ok := winner.Metadata.(pkg.SafeTensorsModelInfo); ok {
|
||||||
md.Parts = namelessParts
|
md.Parts = namelessParts
|
||||||
winner.Metadata = md
|
winner.Metadata = md
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
package pkg
|
package pkg
|
||||||
|
|
||||||
// SafeTensorsMetadata represents metadata extracted from a SafeTensors model.
|
// SafeTensorsModelInfo holds the model details extracted from SafeTensors content.
|
||||||
// SafeTensors is a simple, safe serialization format for storing tensors, used
|
// SafeTensors is a simple, safe serialization format for storing tensors, used
|
||||||
// as the default weight format for Hugging Face transformer models. Syft may
|
// as the default weight format for Hugging Face transformer models. Syft may
|
||||||
// populate this struct from three sources:
|
// populate this struct from three sources:
|
||||||
@ -10,7 +10,7 @@ package pkg
|
|||||||
//
|
//
|
||||||
// The Model Name, License, and Version fields have all been lifted up to be on
|
// The Model Name, License, and Version fields have all been lifted up to be on
|
||||||
// the syft Package.
|
// the syft Package.
|
||||||
type SafeTensorsMetadata struct {
|
type SafeTensorsModelInfo struct {
|
||||||
// Format is the source format label (always "safetensors" for this metadata type).
|
// Format is the source format label (always "safetensors" for this metadata type).
|
||||||
// Present because the Docker AI model config blob carries an explicit format field
|
// Present because the Docker AI model config blob carries an explicit format field
|
||||||
// that can also be "gguf", and recording it here makes the origin explicit.
|
// that can also be "gguf", and recording it here makes the origin explicit.
|
||||||
@ -55,5 +55,5 @@ type SafeTensorsMetadata struct {
|
|||||||
|
|
||||||
// Parts contains metadata from additional SafeTensors shards or OCI layers that
|
// Parts contains metadata from additional SafeTensors shards or OCI layers that
|
||||||
// were merged into this package during post-processing.
|
// were merged into this package during post-processing.
|
||||||
Parts []SafeTensorsMetadata `json:"parts,omitempty" cyclonedx:"parts"`
|
Parts []SafeTensorsModelInfo `json:"parts,omitempty" cyclonedx:"parts"`
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user