diff --git a/internal/constants.go b/internal/constants.go index 098e80459..e4b355586 100644 --- a/internal/constants.go +++ b/internal/constants.go @@ -11,6 +11,5 @@ const ( // 16.1.2 - placeholder for 16.1.2 changelog // 16.1.3 - add GGUFFileParts to GGUFFileHeader metadata // 16.1.4 - add BunLockEntry metadata type for bun.lock support ->>>>>>> c6e286cb (feat: safe tensors) ) diff --git a/internal/packagemetadata/discover_type_names.go b/internal/packagemetadata/discover_type_names.go index 4787d415a..cdcefa184 100644 --- a/internal/packagemetadata/discover_type_names.go +++ b/internal/packagemetadata/discover_type_names.go @@ -27,7 +27,7 @@ var knownNonMetadataTypeNames = strset.New( var knownMetadataTypeNames = strset.New( "DotnetPortableExecutableEntry", "GGUFFileHeader", - "SafeTensorsMetadata", + "SafeTensorsModelInfo", ) func DiscoverTypeNames() ([]string, error) { diff --git a/internal/packagemetadata/generated.go b/internal/packagemetadata/generated.go index aaf74f36f..f9c9e38ea 100644 --- a/internal/packagemetadata/generated.go +++ b/internal/packagemetadata/generated.go @@ -65,7 +65,7 @@ func AllTypes() []any { pkg.RubyGemspec{}, pkg.RustBinaryAuditEntry{}, pkg.RustCargoLockEntry{}, - pkg.SafeTensorsMetadata{}, + pkg.SafeTensorsModelInfo{}, pkg.SnapEntry{}, pkg.SwiftPackageManagerResolvedEntry{}, pkg.SwiplPackEntry{}, diff --git a/internal/packagemetadata/names.go b/internal/packagemetadata/names.go index fcb18318a..3b68fc2b6 100644 --- a/internal/packagemetadata/names.go +++ b/internal/packagemetadata/names.go @@ -127,7 +127,7 @@ var jsonTypes = makeJSONTypes( jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"), jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"), jsonNames(pkg.GGUFFileHeader{}, "gguf-file-header"), - jsonNames(pkg.SafeTensorsMetadata{}, "safetensors-metadata"), + jsonNames(pkg.SafeTensorsModelInfo{}, "safetensors-model-info"), ) func expandLegacyNameVariants(names ...string) []string { diff --git a/schema/json/schema-latest.json b/schema/json/schema-latest.json index bfdd4407d..8102a8f1d 100644 --- a/schema/json/schema-latest.json +++ b/schema/json/schema-latest.json @@ -2800,7 +2800,7 @@ "$ref": "#/$defs/RustCargoLockEntry" }, { - "$ref": "#/$defs/SafetensorsMetadata" + "$ref": "#/$defs/SafetensorsModelInfo" }, { "$ref": "#/$defs/SnapEntry" @@ -4090,7 +4090,7 @@ ], "description": "RustCargoLockEntry represents a locked dependency from a Cargo.lock file with precise version and checksum information." }, - "SafetensorsMetadata": { + "SafetensorsModelInfo": { "properties": { "format": { "type": "string", @@ -4141,14 +4141,14 @@ }, "parts": { "items": { - "$ref": "#/$defs/SafetensorsMetadata" + "$ref": "#/$defs/SafetensorsModelInfo" }, "type": "array", "description": "Parts contains metadata from additional SafeTensors shards or OCI layers that\nwere merged into this package during post-processing." } }, "type": "object", - "description": "SafeTensorsMetadata represents metadata extracted from a SafeTensors model." + "description": "SafeTensorsModelInfo holds the model details extracted from SafeTensors content." }, "Schema": { "properties": { diff --git a/syft/pkg/cataloger/ai/capabilities.yaml b/syft/pkg/cataloger/ai/capabilities.yaml index fff9fa3a7..5c58a56a0 100644 --- a/syft/pkg/cataloger/ai/capabilities.yaml +++ b/syft/pkg/cataloger/ai/capabilities.yaml @@ -44,3 +44,60 @@ catalogers: default: true evidence: - GGUFFileHeader.MetadataKeyValuesHash + - ecosystem: ai # MANUAL + name: safetensors-cataloger # AUTO-GENERATED + type: generic # AUTO-GENERATED + source: # AUTO-GENERATED + file: syft/pkg/cataloger/ai/cataloger.go + function: NewSafeTensorsCataloger + selectors: # AUTO-GENERATED + - ai + - directory + - image + - ml + - model + - package + - safetensors + parsers: # AUTO-GENERATED structure + - function: parseSafeTensorsIndex + detector: # AUTO-GENERATED + method: glob # AUTO-GENERATED + criteria: # AUTO-GENERATED + - '**/*.safetensors.index.json' + capabilities: # MANUAL - preserved across regeneration + - name: license + default: true + - name: dependency.depth + default: [] + - name: dependency.edges + default: "" + - name: dependency.kinds + default: [] + - name: package_manager.files.listing + default: false + - name: package_manager.files.digests + default: false + - name: package_manager.package_integrity_hash + default: false + - function: parseSafeTensorsFile + detector: # AUTO-GENERATED + method: glob # AUTO-GENERATED + criteria: # AUTO-GENERATED + - '**/*.safetensors' + capabilities: # MANUAL - preserved across regeneration + - name: license + default: true + - name: dependency.depth + default: [] + - name: dependency.edges + default: "" + - name: dependency.kinds + default: [] + - name: package_manager.files.listing + default: false + - name: package_manager.files.digests + default: false + - name: package_manager.package_integrity_hash + default: true + evidence: + - SafeTensorsModelInfo.MetadataHash diff --git a/syft/pkg/cataloger/ai/package.go b/syft/pkg/cataloger/ai/package.go index 32280765d..b6043933b 100644 --- a/syft/pkg/cataloger/ai/package.go +++ b/syft/pkg/cataloger/ai/package.go @@ -21,7 +21,7 @@ func newGGUFPackage(metadata *pkg.GGUFFileHeader, modelName, version, license st return p } -func newSafeTensorsPackage(metadata *pkg.SafeTensorsMetadata, modelName, version, license string, locations ...file.Location) pkg.Package { +func newSafeTensorsPackage(metadata *pkg.SafeTensorsModelInfo, modelName, version, license string, locations ...file.Location) pkg.Package { p := pkg.Package{ Name: modelName, Version: version, diff --git a/syft/pkg/cataloger/ai/parse_safetensors_model.go b/syft/pkg/cataloger/ai/parse_safetensors_model.go index c9231ff87..1a7b81fe0 100644 --- a/syft/pkg/cataloger/ai/parse_safetensors_model.go +++ b/syft/pkg/cataloger/ai/parse_safetensors_model.go @@ -32,7 +32,7 @@ func parseSafeTensorsFile(_ context.Context, resolver file.Resolver, _ *generic. return nil, nil, fmt.Errorf("failed to read safetensors header: %w", err) } - md := pkg.SafeTensorsMetadata{ + md := pkg.SafeTensorsModelInfo{ Format: "safetensors", TensorCount: uint64(len(header.tensors)), Quantization: normalizeDType(header.dominantDType()), @@ -82,7 +82,7 @@ func parseSafeTensorsIndex(_ context.Context, resolver file.Resolver, _ *generic shards[shard] = struct{}{} } - md := pkg.SafeTensorsMetadata{ + md := pkg.SafeTensorsModelInfo{ Format: "safetensors", TensorCount: uint64(len(doc.WeightMap)), ShardCount: len(shards), @@ -111,7 +111,7 @@ func parseSafeTensorsIndex(_ context.Context, resolver file.Resolver, _ *generic // safetensors artifact and folds their values into the metadata struct. It // returns a name and license derived from those sources, with the caller free // to fall back to a filename-derived default. -func enrichFromSiblings(resolver file.Resolver, sourcePath string, md *pkg.SafeTensorsMetadata) (name, license string) { +func enrichFromSiblings(resolver file.Resolver, sourcePath string, md *pkg.SafeTensorsModelInfo) (name, license string) { if resolver == nil { return "", "" } diff --git a/syft/pkg/cataloger/ai/parse_safetensors_oci.go b/syft/pkg/cataloger/ai/parse_safetensors_oci.go index 6f72fd75e..5da4ce92d 100644 --- a/syft/pkg/cataloger/ai/parse_safetensors_oci.go +++ b/syft/pkg/cataloger/ai/parse_safetensors_oci.go @@ -61,7 +61,7 @@ func parseSafeTensorsOCIConfig(_ context.Context, resolver file.Resolver, _ *gen return nil, nil, nil } - md := pkg.SafeTensorsMetadata{ + md := pkg.SafeTensorsModelInfo{ Format: "safetensors", Quantization: cfg.Config.Quantization, Parameters: cfg.Config.Parameters, @@ -89,7 +89,7 @@ func parseSafeTensorsOCIConfig(_ context.Context, resolver file.Resolver, _ *gen // YAML frontmatter with license + base_model; HF config.json carries // architectures/torch_dtype/transformers_version; the vnd.docker.ai.license // blob is plain license text. -func enrichFromDockerAILayers(resolver file.Resolver, md *pkg.SafeTensorsMetadata) (name, license string) { +func enrichFromDockerAILayers(resolver file.Resolver, md *pkg.SafeTensorsModelInfo) (name, license string) { ociResolver, ok := resolver.(file.OCIMediaTypeResolver) if !ok { return "", "" @@ -113,7 +113,7 @@ func enrichFromDockerAILayers(resolver file.Resolver, md *pkg.SafeTensorsMetadat // readAndClassifyDockerAILayer fetches a single Docker AI model-file layer and // passes its contents to classifyAndMerge. Split out from the calling loop so // the resolver handle is closed via defer on every iteration. -func readAndClassifyDockerAILayer(resolver file.Resolver, loc file.Location, md *pkg.SafeTensorsMetadata, name, license *string) { +func readAndClassifyDockerAILayer(resolver file.Resolver, loc file.Location, md *pkg.SafeTensorsModelInfo, name, license *string) { rc, err := resolver.FileContentsByLocation(loc) if err != nil { return @@ -130,7 +130,7 @@ func readAndClassifyDockerAILayer(resolver file.Resolver, loc file.Location, md // classifyAndMerge sniffs a vnd.docker.ai.model.file blob (which can be README.md, // config.json, generation_config.json, tokenizer.json, etc.) and folds useful // fields into the metadata struct and out-parameters. -func classifyAndMerge(buf []byte, md *pkg.SafeTensorsMetadata, name, license *string) { +func classifyAndMerge(buf []byte, md *pkg.SafeTensorsModelInfo, name, license *string) { trimmed := trimLeadingWhitespace(buf) switch { case hasPrefix(trimmed, "---"): diff --git a/syft/pkg/cataloger/ai/processor.go b/syft/pkg/cataloger/ai/processor.go index aecf82625..a0c56e02d 100644 --- a/syft/pkg/cataloger/ai/processor.go +++ b/syft/pkg/cataloger/ai/processor.go @@ -72,13 +72,13 @@ func safeTensorsMergeProcessor(pkgs []pkg.Package, rels []artifact.Relationship, } var namedPkgs []pkg.Package - var namelessParts []pkg.SafeTensorsMetadata + var namelessParts []pkg.SafeTensorsModelInfo for _, p := range pkgs { if p.Name != "" { namedPkgs = append(namedPkgs, p) continue } - if md, ok := p.Metadata.(pkg.SafeTensorsMetadata); ok { + if md, ok := p.Metadata.(pkg.SafeTensorsModelInfo); ok { md.MetadataHash = "" namelessParts = append(namelessParts, md) } @@ -90,7 +90,7 @@ func safeTensorsMergeProcessor(pkgs []pkg.Package, rels []artifact.Relationship, if len(namedPkgs) == 1 && len(namelessParts) > 0 { winner := &namedPkgs[0] - if md, ok := winner.Metadata.(pkg.SafeTensorsMetadata); ok { + if md, ok := winner.Metadata.(pkg.SafeTensorsModelInfo); ok { md.Parts = namelessParts winner.Metadata = md } diff --git a/syft/pkg/safetensors.go b/syft/pkg/safetensors.go index c89da1a5d..f1fae79f9 100644 --- a/syft/pkg/safetensors.go +++ b/syft/pkg/safetensors.go @@ -1,6 +1,6 @@ package pkg -// SafeTensorsMetadata represents metadata extracted from a SafeTensors model. +// SafeTensorsModelInfo holds the model details extracted from SafeTensors content. // SafeTensors is a simple, safe serialization format for storing tensors, used // as the default weight format for Hugging Face transformer models. Syft may // populate this struct from three sources: @@ -10,7 +10,7 @@ package pkg // // The Model Name, License, and Version fields have all been lifted up to be on // the syft Package. -type SafeTensorsMetadata struct { +type SafeTensorsModelInfo struct { // Format is the source format label (always "safetensors" for this metadata type). // Present because the Docker AI model config blob carries an explicit format field // that can also be "gguf", and recording it here makes the origin explicit. @@ -55,5 +55,5 @@ type SafeTensorsMetadata struct { // Parts contains metadata from additional SafeTensors shards or OCI layers that // were merged into this package during post-processing. - Parts []SafeTensorsMetadata `json:"parts,omitempty" cyclonedx:"parts"` + Parts []SafeTensorsModelInfo `json:"parts,omitempty" cyclonedx:"parts"` }