mirror of
https://github.com/anchore/syft.git
synced 2026-07-05 02:28:25 +02:00
chore: refactor interface with new naming
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
parent
b234d3eb2f
commit
e88d6d019e
@ -14,6 +14,7 @@ import (
|
||||
|
||||
var _ Resolver = (*MockResolver)(nil)
|
||||
var _ OCIMediaTypeResolver = (*MockResolver)(nil)
|
||||
var _ OCIArtifactResolver = (*MockResolver)(nil)
|
||||
|
||||
// MockResolver implements the FileResolver interface and is intended for use *only in test code*.
|
||||
// It provides an implementation that can resolve local filesystem paths using only a provided discrete list of file
|
||||
@ -25,6 +26,7 @@ type MockResolver struct {
|
||||
mediaTypeIndex map[string][]Location
|
||||
extension map[string][]Location
|
||||
basename map[string][]Location
|
||||
ociRef string
|
||||
}
|
||||
|
||||
// NewMockResolverForPaths creates a new MockResolver, where the only resolvable
|
||||
@ -74,6 +76,16 @@ func NewMockResolverForPathsWithMetadata(metadata map[Coordinates]Metadata) *Moc
|
||||
}
|
||||
}
|
||||
|
||||
// NewMockResolverForOCIArtifact creates a MockResolver that can resolve files
|
||||
// by media type AND surfaces the given OCI ref via the OCIArtifactResolver
|
||||
// interface. Intended for tests that exercise the catalogers' OCI-artifact-aware
|
||||
// naming code paths.
|
||||
func NewMockResolverForOCIArtifact(ref string, mediaTypes map[string][]Location) *MockResolver {
|
||||
r := NewMockResolverForMediaTypes(mediaTypes)
|
||||
r.ociRef = ref
|
||||
return r
|
||||
}
|
||||
|
||||
// NewMockResolverForMediaTypes creates a MockResolver that can resolve files by media type.
|
||||
// The mediaTypes map specifies which locations should be returned for each media type.
|
||||
func NewMockResolverForMediaTypes(mediaTypes map[string][]Location) *MockResolver {
|
||||
@ -102,6 +114,11 @@ func NewMockResolverForMediaTypes(mediaTypes map[string][]Location) *MockResolve
|
||||
}
|
||||
}
|
||||
|
||||
// ImageReference returns the image reference associated with this mock, if any.
|
||||
func (r MockResolver) ImageReference() string {
|
||||
return r.ociRef
|
||||
}
|
||||
|
||||
// HasPath indicates if the given path exists in the underlying source.
|
||||
func (r MockResolver) HasPath(path string) bool {
|
||||
for _, l := range r.locations {
|
||||
|
||||
@ -63,6 +63,17 @@ type OCIMediaTypeResolver interface {
|
||||
FilesByMediaType(types ...string) ([]Location, error)
|
||||
}
|
||||
|
||||
// OCIArtifactResolver exposes the user-supplied OCI image reference to
|
||||
// catalogers. Catalogers can type-assert a Resolver to this interface when they
|
||||
// need a naming or context hint that the layer contents alone don't carry — for
|
||||
// example, when a repacked AI model artifact has stripped name fields out of its
|
||||
// config.json and the only remaining identifier is the image reference itself.
|
||||
type OCIArtifactResolver interface {
|
||||
// ImageReference returns the image reference the artifact was fetched with, e.g.
|
||||
// "docker.io/ai/smollm2-vllm:360M". Returns "" when not known.
|
||||
ImageReference() string
|
||||
}
|
||||
|
||||
// LocationResolver provides iteration over all file locations in a source.
|
||||
type LocationResolver interface {
|
||||
// AllLocations returns a channel of all file references from the underlying source.
|
||||
|
||||
@ -12,6 +12,7 @@ import (
|
||||
|
||||
var _ file.Resolver = (*ContainerImageModel)(nil)
|
||||
var _ file.OCIMediaTypeResolver = (*ContainerImageModel)(nil)
|
||||
var _ file.OCIArtifactResolver = (*ContainerImageModel)(nil)
|
||||
|
||||
// LayerInfo holds information about an OCI model layer file stored on disk.
|
||||
type LayerInfo struct {
|
||||
@ -26,10 +27,14 @@ type ContainerImageModel struct {
|
||||
tempDir string // temp directory containing all layer files
|
||||
layerFiles map[string]LayerInfo // digest -> layer info (temp path + media type)
|
||||
locations map[string]file.Location // digest -> location
|
||||
ref string // image reference the artifact was fetched with
|
||||
}
|
||||
|
||||
// NewContainerImageModel creates a new resolver with the given temp directory and layer files.
|
||||
func NewContainerImageModel(tempDir string, layerFiles map[string]LayerInfo) *ContainerImageModel {
|
||||
// NewContainerImageModel creates a new resolver with the given temp directory
|
||||
// and layer files. The ref is surfaced through the file.OCIArtifactResolver
|
||||
// interface so catalogers can derive context-level naming hints from the
|
||||
// artifact reference when the layer contents don't carry one.
|
||||
func NewContainerImageModel(tempDir string, layerFiles map[string]LayerInfo, ref string) *ContainerImageModel {
|
||||
// Create locations for all layer files
|
||||
// Each location has RealPath="/", FileSystemID=digest, AccessPath="/"
|
||||
locations := make(map[string]file.Location, len(layerFiles))
|
||||
@ -43,9 +48,15 @@ func NewContainerImageModel(tempDir string, layerFiles map[string]LayerInfo) *Co
|
||||
tempDir: tempDir,
|
||||
layerFiles: layerFiles,
|
||||
locations: locations,
|
||||
ref: ref,
|
||||
}
|
||||
}
|
||||
|
||||
// ImageReference returns the image reference the artifact was fetched with.
|
||||
func (r *ContainerImageModel) ImageReference() string {
|
||||
return r.ref
|
||||
}
|
||||
|
||||
// FilesByMediaType returns locations for layers matching the given media type patterns.
|
||||
// Patterns support glob-style matching (e.g., "application/vnd.docker.ai*").
|
||||
func (r *ContainerImageModel) FilesByMediaType(types ...string) ([]file.Location, error) {
|
||||
|
||||
@ -60,7 +60,7 @@ func TestOCIModelResolver_FilesByMediaType(t *testing.T) {
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
resolver := NewContainerImageModel(tempDir, test.layerFiles)
|
||||
resolver := NewContainerImageModel(tempDir, test.layerFiles, "")
|
||||
|
||||
locations, err := resolver.FilesByMediaType(test.patterns...)
|
||||
require.NoError(t, err)
|
||||
@ -81,7 +81,7 @@ func TestOCIModelResolver_FileContentsByLocation(t *testing.T) {
|
||||
digest: {TempPath: tempFile, MediaType: ggufLayerMediaType},
|
||||
}
|
||||
|
||||
resolver := NewContainerImageModel(tempDir, layerFiles)
|
||||
resolver := NewContainerImageModel(tempDir, layerFiles, "")
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
|
||||
@ -147,10 +147,10 @@ func TestSafeTensorsMergeProcessor(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
t.Run("dir scan: parent-dir fallback names a bare safetensors with no siblings", func(t *testing.T) {
|
||||
// case #1: model.safetensors in /models/tiny-llama/ with no config.json
|
||||
// or README. The processor cannot derive a producer name and Architecture
|
||||
// is empty, so it lands on the parent-dir rung.
|
||||
t.Run("dir scan: dropped when no sibling config.json carries _name_or_path", func(t *testing.T) {
|
||||
// Without a config.json the dir-scan path has no name source. There is
|
||||
// intentionally no parent-dir fallback (or any opaque fallback), so the
|
||||
// group is dropped rather than named after the filesystem layout.
|
||||
p := dirPkg("/models/tiny-llama/weights.safetensors", pkg.SafeTensorsModelInfo{
|
||||
Format: "safetensors",
|
||||
TensorCount: 4,
|
||||
@ -160,30 +160,15 @@ func TestSafeTensorsMergeProcessor(t *testing.T) {
|
||||
resolver := file.NewMockResolverForPaths() // no config.json / README available
|
||||
out, _, err := safeTensorsMergeProcessor(context.Background(), resolver, []pkg.Package{p}, nil, nil)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, out, 1)
|
||||
assert.Equal(t, "tiny-llama", out[0].Name)
|
||||
assert.Empty(t, out, "dir-scan group with no config.json must be dropped")
|
||||
})
|
||||
|
||||
t.Run("dir scan: parent-dir fallback rescues a metadata-only header", func(t *testing.T) {
|
||||
// case #3: header carries only __metadata__, no tensors. Parameters and
|
||||
// Architecture are both empty, so Arch-Parameters can't fire either —
|
||||
// the parent-dir fallback is the only thing that names the package.
|
||||
p := dirPkg("/scan/edge/headeronly/model.safetensors", pkg.SafeTensorsModelInfo{
|
||||
Format: "safetensors",
|
||||
MetadataHash: "xyz",
|
||||
UserMetadata: pkg.KeyValues{{Key: "producer", Value: "stgen"}},
|
||||
})
|
||||
resolver := file.NewMockResolverForPaths()
|
||||
out, _, err := safeTensorsMergeProcessor(context.Background(), resolver, []pkg.Package{p}, nil, nil)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, out, 1)
|
||||
assert.Equal(t, "headeronly", out[0].Name)
|
||||
})
|
||||
|
||||
t.Run("dir scan: Architecture-Parameters synthetic wins over parent-dir", func(t *testing.T) {
|
||||
// Architecture and Parameters are both populated → synthetic wins over
|
||||
// the parent-dir fallback. _name_or_path is not available (no sibling
|
||||
// config.json mock).
|
||||
t.Run("dir scan: Architecture-Parameters alone does not name the package", func(t *testing.T) {
|
||||
// Even with rich content-derived metadata (Architecture + Parameters),
|
||||
// the package must be dropped when there is no producer-declared name.
|
||||
// The Arch-Params synthetic rung was removed because it produced labels
|
||||
// like "LlamaForCausalLM-2.68B" that SBOM consumers couldn't trace back
|
||||
// to a recognizable model.
|
||||
p := dirPkg("/models/tiny/weights.safetensors", pkg.SafeTensorsModelInfo{
|
||||
Format: "safetensors",
|
||||
Architecture: "LlamaForCausalLM",
|
||||
@ -194,15 +179,14 @@ func TestSafeTensorsMergeProcessor(t *testing.T) {
|
||||
resolver := file.NewMockResolverForPaths()
|
||||
out, _, err := safeTensorsMergeProcessor(context.Background(), resolver, []pkg.Package{p}, nil, nil)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, out, 1)
|
||||
assert.Equal(t, "LlamaForCausalLM-2.68B", out[0].Name)
|
||||
assert.Empty(t, out, "Arch-Params alone is not a name source")
|
||||
})
|
||||
|
||||
t.Run("OCI: dropped when no name source is available", func(t *testing.T) {
|
||||
// The vllm-style shape: config-blob package + a weight-layer package,
|
||||
// both at virtual path "/", no model.file companions on the resolver.
|
||||
// With nothing to derive a name from, the group is dropped (no opaque
|
||||
// fallback / no parent-dir option for OCI).
|
||||
// both at virtual path "/", no model.file companions on the resolver
|
||||
// AND no image ref. With nothing to derive a name from, the group is
|
||||
// dropped — no opaque fallback.
|
||||
configMd := pkg.SafeTensorsModelInfo{
|
||||
Format: "safetensors",
|
||||
TensorCount: 5,
|
||||
@ -223,6 +207,57 @@ func TestSafeTensorsMergeProcessor(t *testing.T) {
|
||||
assert.Empty(t, out, "OCI group with no naming source must be dropped")
|
||||
})
|
||||
|
||||
t.Run("OCI: image-ref last segment names the group when config.json is absent", func(t *testing.T) {
|
||||
// vllm-style artifact: a repacked model whose embedded config.json has
|
||||
// been stripped of _name_or_path. The merge processor falls through to
|
||||
// the second rung — the image-reference last segment — so we still emit
|
||||
// a recognizable model name instead of dropping it.
|
||||
configMd := pkg.SafeTensorsModelInfo{
|
||||
Format: "safetensors",
|
||||
TensorCount: 290,
|
||||
TotalSize: "723MB",
|
||||
}
|
||||
shardMd := pkg.SafeTensorsModelInfo{
|
||||
Format: "safetensors",
|
||||
TensorCount: 290,
|
||||
Quantization: "BF16",
|
||||
MetadataHash: "deadbeef",
|
||||
}
|
||||
resolver := file.NewMockResolverForOCIArtifact(
|
||||
"docker.io/ai/smollm2-vllm:360M", nil,
|
||||
)
|
||||
out, _, err := safeTensorsMergeProcessor(
|
||||
context.Background(), resolver,
|
||||
[]pkg.Package{ociPkg(configMd), ociPkg(shardMd)}, nil, nil,
|
||||
)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, out, 1)
|
||||
assert.Equal(t, "smollm2-vllm", out[0].Name, "rung 2: image-ref repository basename")
|
||||
})
|
||||
|
||||
t.Run("OCI: config.json _name_or_path beats the image-ref fallback", func(t *testing.T) {
|
||||
// When the embedded config.json carries _name_or_path, rung 1 wins over
|
||||
// the image ref even if both are present.
|
||||
dir := t.TempDir()
|
||||
hfConfigPath := filepath.Join(dir, "config.json")
|
||||
require.NoError(t, os.WriteFile(hfConfigPath,
|
||||
[]byte(`{"_name_or_path":"org/preferred-name"}`), 0o644))
|
||||
resolver := file.NewMockResolverForOCIArtifact(
|
||||
"docker.io/ai/smollm2-vllm:360M",
|
||||
map[string][]file.Location{
|
||||
dockerAIModelFileMediaType: {file.NewLocation(hfConfigPath)},
|
||||
},
|
||||
)
|
||||
configMd := pkg.SafeTensorsModelInfo{Format: "safetensors", TensorCount: 1}
|
||||
out, _, err := safeTensorsMergeProcessor(
|
||||
context.Background(), resolver,
|
||||
[]pkg.Package{ociPkg(configMd)}, nil, nil,
|
||||
)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, out, 1)
|
||||
assert.Equal(t, "preferred-name", out[0].Name, "rung 1 (config.json) wins over rung 2 (image ref)")
|
||||
})
|
||||
|
||||
t.Run("OCI: merges config + shard and names from companion config.json", func(t *testing.T) {
|
||||
// Write a single model.file companion blob containing HF config.json so
|
||||
// the processor can derive _name_or_path and Architecture from it.
|
||||
@ -348,118 +383,56 @@ spdx-id: Apache-2.0
|
||||
})
|
||||
}
|
||||
|
||||
// TestSafeTensorsNamingPrecedence codifies pickSafeTensorsName's documented
|
||||
// precedence chain. Each case sets exactly the inputs that should activate one
|
||||
// rung and asserts the expected outcome — including the drop case when every
|
||||
// rung is unavailable.
|
||||
// TestSafeTensorsNamingPrecedence codifies pickSafeTensorsName's two-rung
|
||||
// precedence chain. Each case sets the inputs that should activate one rung
|
||||
// (or neither, asserting the drop path).
|
||||
//
|
||||
// Precedence (highest → lowest):
|
||||
// 1. config.json _name_or_path (path.Base applied)
|
||||
// 2. OCI manifest title (follow-up; covered today by an empty-string input)
|
||||
// 3. Architecture + Parameters (both must be non-empty)
|
||||
// 4. parent directory (dir-scan only; OCI groups skip this rung)
|
||||
// 1. config.json _name_or_path (path.Base applied; both dir-scan and OCI)
|
||||
// 2. OCI image-ref last segment (OCI only — empty string for dir scans)
|
||||
// → drop (empty name) when nothing matches
|
||||
func TestSafeTensorsNamingPrecedence(t *testing.T) {
|
||||
const dirGroup = "/scan/parent-name"
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
groupKey string
|
||||
nameOrPath string
|
||||
arch string
|
||||
params string
|
||||
imageRefName string
|
||||
want string
|
||||
}{
|
||||
// rung 1
|
||||
{
|
||||
name: "rung 1: _name_or_path beats Arch+Params and parent-dir",
|
||||
groupKey: dirGroup,
|
||||
name: "rung 1: _name_or_path beats the image-ref fallback",
|
||||
nameOrPath: "org/MyModel",
|
||||
arch: "LlamaForCausalLM",
|
||||
params: "7B",
|
||||
imageRefName: "fallback-ref",
|
||||
want: "MyModel",
|
||||
},
|
||||
{
|
||||
name: "rung 1: applies path.Base to the raw value",
|
||||
groupKey: dirGroup,
|
||||
nameOrPath: "very/deep/checkpoint/path/leaf-model",
|
||||
want: "leaf-model",
|
||||
},
|
||||
{
|
||||
name: "rung 1: works for OCI groups too",
|
||||
groupKey: ociGroupKey,
|
||||
nameOrPath: "org/OciModel",
|
||||
name: "rung 1: bare name without slashes is preserved",
|
||||
nameOrPath: "OciModel",
|
||||
want: "OciModel",
|
||||
},
|
||||
|
||||
// rung 3
|
||||
// rung 2
|
||||
{
|
||||
name: "rung 3: Arch+Params wins when no _name_or_path",
|
||||
groupKey: dirGroup,
|
||||
arch: "LlamaForCausalLM",
|
||||
params: "7B",
|
||||
want: "LlamaForCausalLM-7B",
|
||||
},
|
||||
{
|
||||
name: "rung 3: works for OCI groups (the only non-drop rung when no manifest title)",
|
||||
groupKey: ociGroupKey,
|
||||
arch: "Qwen3ForCausalLM",
|
||||
params: "2.66B",
|
||||
want: "Qwen3ForCausalLM-2.66B",
|
||||
},
|
||||
{
|
||||
name: "rung 3 NOT taken when only Architecture is set: falls through to parent-dir",
|
||||
groupKey: dirGroup,
|
||||
arch: "LlamaForCausalLM",
|
||||
want: "parent-name",
|
||||
},
|
||||
{
|
||||
name: "rung 3 NOT taken when only Parameters is set: falls through to parent-dir",
|
||||
groupKey: dirGroup,
|
||||
params: "7B",
|
||||
want: "parent-name",
|
||||
},
|
||||
|
||||
// rung 4
|
||||
{
|
||||
name: "rung 4: parent-dir when no other rung populated",
|
||||
groupKey: dirGroup,
|
||||
want: "parent-name",
|
||||
},
|
||||
{
|
||||
name: "rung 4 skipped for OCI groups: no usable parent path",
|
||||
groupKey: ociGroupKey,
|
||||
want: "",
|
||||
name: "rung 2: image-ref last segment used when _name_or_path is empty",
|
||||
imageRefName: "smollm2-vllm",
|
||||
want: "smollm2-vllm",
|
||||
},
|
||||
|
||||
// drops
|
||||
{
|
||||
name: "drop: dir group at filesystem root",
|
||||
groupKey: "/",
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
name: "drop: dir group with empty parent",
|
||||
groupKey: ".",
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
name: "drop: OCI group with nothing",
|
||||
groupKey: ociGroupKey,
|
||||
name: "drop: both rungs empty",
|
||||
want: "",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
merged := pkg.Package{
|
||||
Type: pkg.ModelPkg,
|
||||
Metadata: pkg.SafeTensorsModelInfo{
|
||||
Architecture: tc.arch,
|
||||
Parameters: tc.params,
|
||||
},
|
||||
}
|
||||
got := pickSafeTensorsName(merged, tc.groupKey, tc.nameOrPath)
|
||||
got := pickSafeTensorsName(tc.nameOrPath, tc.imageRefName)
|
||||
assert.Equal(t, tc.want, got)
|
||||
})
|
||||
}
|
||||
|
||||
@ -11,6 +11,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
gcrname "github.com/google/go-containerregistry/pkg/name"
|
||||
"gopkg.in/yaml.v3"
|
||||
|
||||
"github.com/anchore/syft/internal"
|
||||
@ -87,10 +88,13 @@ func ggufMergeProcessor(pkgs []pkg.Package, rels []artifact.Relationship, err er
|
||||
// sibling config.json + README.md for dir scans, the model-file companion
|
||||
// layers + license layer for OCI — and attaches those locations as
|
||||
// supporting evidence;
|
||||
// 4. picks a name via the precedence chain
|
||||
// config.json _name_or_path → Architecture-Parameters → parent-dir
|
||||
// and drops the group when none of those produced a name (no opaque
|
||||
// fallback / no MetadataHash-as-name).
|
||||
// 4. picks a name via a two-rung precedence chain (see pickSafeTensorsName):
|
||||
// config.json _name_or_path first (both sources), then the OCI image-ref
|
||||
// last segment (OCI only). Drops the group when neither rung produces a
|
||||
// name. There is no opaque fallback (no Architecture-Parameters synthetic,
|
||||
// no parent-dir, no MetadataHash-as-name) — an unnameable model is
|
||||
// intentionally absent from the SBOM rather than recorded under a
|
||||
// misleading label.
|
||||
func safeTensorsMergeProcessor(ctx context.Context, resolver file.Resolver, pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
if err != nil || len(pkgs) == 0 {
|
||||
return pkgs, rels, err
|
||||
@ -123,8 +127,8 @@ func safeTensorsMergeProcessor(ctx context.Context, resolver file.Resolver, pkgs
|
||||
out := other
|
||||
for _, key := range keys {
|
||||
merged := mergeSafeTensorsGroup(groups[key])
|
||||
nameOrPath := enrichSafeTensorsGroup(ctx, resolver, key, &merged)
|
||||
name := pickSafeTensorsName(merged, key, nameOrPath)
|
||||
nameOrPath, imageRefName := enrichSafeTensorsGroup(ctx, resolver, key, &merged)
|
||||
name := pickSafeTensorsName(nameOrPath, imageRefName)
|
||||
if name == "" {
|
||||
continue // drop unnameable groups, per design (no opaque fallback)
|
||||
}
|
||||
@ -331,9 +335,11 @@ func rollupHash(hashes []string) string {
|
||||
// enrichSafeTensorsGroup reads the resolver once for the group to populate the
|
||||
// merged metadata's Architecture / TorchDtype / TransformersVersion, set the
|
||||
// licenses on the merged package, and attach the location of every consulted
|
||||
// supporting file as SupportingEvidence. Returns the raw _name_or_path so the
|
||||
// caller can apply path.Base in its naming step.
|
||||
func enrichSafeTensorsGroup(ctx context.Context, resolver file.Resolver, groupKey string, merged *pkg.Package) (nameOrPath string) {
|
||||
// supporting file as SupportingEvidence. Returns two name candidates for the
|
||||
// merge processor: nameOrPath (raw _name_or_path from a config.json) and
|
||||
// imageRefName (the last path segment of the OCI image reference, empty for
|
||||
// dir-scan groups).
|
||||
func enrichSafeTensorsGroup(ctx context.Context, resolver file.Resolver, groupKey string, merged *pkg.Package) (nameOrPath, imageRefName string) {
|
||||
md := merged.Metadata.(pkg.SafeTensorsModelInfo)
|
||||
|
||||
var (
|
||||
@ -341,7 +347,7 @@ func enrichSafeTensorsGroup(ctx context.Context, resolver file.Resolver, groupKe
|
||||
supporting []file.Location
|
||||
)
|
||||
if groupKey == ociGroupKey {
|
||||
nameOrPath, lics, supporting = enrichSafeTensorsOCI(ctx, resolver, &md)
|
||||
nameOrPath, imageRefName, lics, supporting = enrichSafeTensorsOCI(ctx, resolver, &md)
|
||||
} else {
|
||||
nameOrPath, lics, supporting = enrichSafeTensorsDir(ctx, resolver, groupKey, &md)
|
||||
}
|
||||
@ -353,7 +359,7 @@ func enrichSafeTensorsGroup(ctx context.Context, resolver file.Resolver, groupKe
|
||||
for _, loc := range supporting {
|
||||
merged.Locations.Add(loc.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation))
|
||||
}
|
||||
return nameOrPath
|
||||
return nameOrPath, imageRefName
|
||||
}
|
||||
|
||||
// enrichSafeTensorsDir handles the directory-scan case: look for sibling
|
||||
@ -380,11 +386,16 @@ func enrichSafeTensorsDir(ctx context.Context, resolver file.Resolver, dir strin
|
||||
// enrichSafeTensorsOCI handles the OCI-artifact case: walk the
|
||||
// vnd.docker.ai.model.file layers (READMEs and HF config.json all ride that
|
||||
// media type — we sniff content to tell them apart), then fall back to the
|
||||
// vnd.docker.ai.license layer through the shared license scanner.
|
||||
func enrichSafeTensorsOCI(ctx context.Context, resolver file.Resolver, md *pkg.SafeTensorsModelInfo) (nameOrPath string, lics []pkg.License, supporting []file.Location) {
|
||||
// vnd.docker.ai.license layer through the shared license scanner. It also
|
||||
// pulls the user-supplied image reference off the resolver (when the resolver
|
||||
// implements file.OCIArtifactResolver) and returns its last path segment as a
|
||||
// naming candidate — repacked artifacts like Docker AI vllm images frequently
|
||||
// strip name fields out of every embedded config, so the image ref is often
|
||||
// the only remaining identifier for the model.
|
||||
func enrichSafeTensorsOCI(ctx context.Context, resolver file.Resolver, md *pkg.SafeTensorsModelInfo) (nameOrPath, imageRefName string, lics []pkg.License, supporting []file.Location) {
|
||||
ociResolver, ok := resolver.(file.OCIMediaTypeResolver)
|
||||
if !ok {
|
||||
return "", nil, nil
|
||||
return "", "", nil, nil
|
||||
}
|
||||
|
||||
modelFileLocs, err := ociResolver.FilesByMediaType(dockerAIModelFileMediaType)
|
||||
@ -424,7 +435,30 @@ func enrichSafeTensorsOCI(ctx context.Context, resolver file.Resolver, md *pkg.S
|
||||
supporting = append(supporting, licLocs...)
|
||||
}
|
||||
}
|
||||
return nameOrPath, lics, supporting
|
||||
|
||||
imageRefName = ociImageRefBasename(resolver)
|
||||
return nameOrPath, imageRefName, lics, supporting
|
||||
}
|
||||
|
||||
// ociImageRefBasename returns the last path segment of the repository portion
|
||||
// of the OCI image reference exposed by the resolver, or "" when the resolver
|
||||
// does not implement OCIArtifactResolver or the reference fails to parse. For
|
||||
// "docker.io/ai/smollm2-vllm:360M" this returns "smollm2-vllm".
|
||||
func ociImageRefBasename(resolver file.Resolver) string {
|
||||
info, ok := resolver.(file.OCIArtifactResolver)
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
ref := info.ImageReference()
|
||||
if ref == "" {
|
||||
return ""
|
||||
}
|
||||
parsed, err := gcrname.ParseReference(ref)
|
||||
if err != nil {
|
||||
log.Debugf("failed to parse OCI ref %q: %v", ref, err)
|
||||
return ""
|
||||
}
|
||||
return path.Base(parsed.Context().RepositoryStr())
|
||||
}
|
||||
|
||||
// identifyLicenseLayers turns Docker AI license-layer locations into
|
||||
@ -530,32 +564,21 @@ func applyHFConfig(md *pkg.SafeTensorsModelInfo, cfg *hfConfig) {
|
||||
|
||||
// pickSafeTensorsName implements the documented naming precedence chain:
|
||||
//
|
||||
// 1. config.json _name_or_path (path.Base, so "org/Model" → "Model")
|
||||
// 2. OCI manifest title (deferred to a follow-up; reserved here)
|
||||
// 3. Architecture-Parameters synthetic (only when both are populated)
|
||||
// 4. parent directory of the group (dir-scan only — OCI has no useful path)
|
||||
// 1. config.json _name_or_path (path.Base, so "org/Model" → "Model";
|
||||
// applies to both dir-scan and OCI groups)
|
||||
// 2. OCI image-ref last segment (OCI-only; the user-supplied artifact
|
||||
// reference's repository basename, e.g.
|
||||
// "docker.io/ai/smollm2-vllm:360M" → "smollm2-vllm")
|
||||
//
|
||||
// Returns "" to signal the merge processor should drop the group rather than
|
||||
// invent a name.
|
||||
func pickSafeTensorsName(merged pkg.Package, groupKey, nameOrPath string) string {
|
||||
md, _ := merged.Metadata.(pkg.SafeTensorsModelInfo)
|
||||
|
||||
// Returns "" to signal the merge processor should drop the group. There is
|
||||
// intentionally no Architecture-Parameters synthetic or parent-directory
|
||||
// fallback: an unnameable model is recorded as absent rather than under a
|
||||
// label the SBOM consumer would not recognize.
|
||||
func pickSafeTensorsName(nameOrPath, imageRefName string) string {
|
||||
if nameOrPath != "" {
|
||||
return path.Base(nameOrPath)
|
||||
}
|
||||
// 2. OCI manifest title — follow-up.
|
||||
|
||||
if md.Architecture != "" && md.Parameters != "" {
|
||||
return md.Architecture + "-" + md.Parameters
|
||||
}
|
||||
|
||||
if groupKey != ociGroupKey {
|
||||
base := path.Base(groupKey)
|
||||
if base != "" && base != "." && base != "/" {
|
||||
return base
|
||||
}
|
||||
}
|
||||
return ""
|
||||
return imageRefName
|
||||
}
|
||||
|
||||
// --- Relocated enrichment helpers ----------------------------------------
|
||||
@ -632,11 +655,11 @@ func extractFrontmatterBlock(buf []byte) []byte {
|
||||
if i := bytes.IndexByte(rest, '\n'); i >= 0 {
|
||||
rest = rest[i+1:]
|
||||
}
|
||||
end := bytes.Index(rest, []byte("\n---"))
|
||||
if end < 0 {
|
||||
block, _, found := bytes.Cut(rest, []byte("\n---"))
|
||||
if !found {
|
||||
return nil
|
||||
}
|
||||
return rest[:end]
|
||||
return block
|
||||
}
|
||||
|
||||
// parseFrontmatter decodes a Hugging Face model card YAML frontmatter block
|
||||
|
||||
@ -52,11 +52,6 @@ func (b *testGGUFBuilder) withVersion(v uint32) *testGGUFBuilder {
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withTensorCount(count uint64) *testGGUFBuilder {
|
||||
b.tensorCount = count
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withStringKV(key, value string) *testGGUFBuilder {
|
||||
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeString, value: value})
|
||||
return b
|
||||
@ -119,10 +114,3 @@ func (b *testGGUFBuilder) build() []byte {
|
||||
|
||||
return b.buf.Bytes()
|
||||
}
|
||||
|
||||
// buildInvalidMagic creates a file with invalid magic number
|
||||
func (b *testGGUFBuilder) buildInvalidMagic() []byte {
|
||||
buf := new(bytes.Buffer)
|
||||
binary.Write(buf, binary.LittleEndian, uint32(0x12345678))
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
@ -39,6 +39,7 @@ type ociModelSource struct {
|
||||
resolver interface {
|
||||
file.Resolver
|
||||
file.OCIMediaTypeResolver
|
||||
file.OCIArtifactResolver
|
||||
}
|
||||
mutex *sync.Mutex
|
||||
}
|
||||
@ -158,7 +159,11 @@ func fetchAndStoreModelHeaders(ctx context.Context, client *registryClient, arti
|
||||
}
|
||||
}
|
||||
|
||||
resolver := fileresolver.NewContainerImageModel(tempDir, layerFiles)
|
||||
resolver := fileresolver.NewContainerImageModel(
|
||||
tempDir,
|
||||
layerFiles,
|
||||
artifact.Reference.String(),
|
||||
)
|
||||
|
||||
return tempDir, resolver, nil
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user