tests: clean up and add complete dir tests

Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
Christopher Phillips 2026-06-05 02:10:25 -04:00
parent b85b50001d
commit dd179eb8a7
No known key found for this signature in database
4 changed files with 193 additions and 26 deletions

View File

@ -76,9 +76,9 @@ func NewMockResolverForPathsWithMetadata(metadata map[Coordinates]Metadata) *Moc
}
// NewMockResolverForOCIArtifact creates a MockResolver that can resolve files
// by media type AND surfaces the given OCI ref via the OCIArtifactResolver
// interface. Intended for tests that exercise the catalogers' OCI-artifact-aware
// naming code paths.
// by media type AND surfaces the given OCI ref via the ImageReference method.
// Intended for tests that exercise the catalogers' OCI-artifact-aware naming
// code paths.
func NewMockResolverForOCIArtifact(ref string, mediaTypes map[string][]Location) *MockResolver {
r := NewMockResolverForMediaTypes(mediaTypes)
r.ociRef = ref

View File

@ -30,9 +30,9 @@ type ContainerImageModel struct {
}
// NewContainerImageModel creates a new resolver with the given temp directory
// and layer files. The ref is surfaced through the file.OCIArtifactResolver
// interface so catalogers can derive context-level naming hints from the
// artifact reference when the layer contents don't carry one.
// and layer files. The ref is surfaced through the ImageReference method so
// catalogers can derive context-level naming hints from the artifact reference
// when the layer contents don't carry one.
func NewContainerImageModel(tempDir string, layerFiles map[string]LayerInfo, ref string) *ContainerImageModel {
// Create locations for all layer files
// Each location has RealPath="/", FileSystemID=digest, AccessPath="/"

View File

@ -40,6 +40,20 @@ func buildSafeTensorsFile(t *testing.T, metadata map[string]string, tensors map[
return out
}
// TestSafeTensorsCataloger is the end-to-end `dir:` scan naming matrix: it walks
// real on-disk directory trees and locks how a model package gets its name at
// every depth a .safetensors file can appear (the scan root `./`, an immediate
// child `./sometensor/`, and a grandchild `./dir/someothertensor/`).
//
// The naming precedence (owned by the merge processor's pickSafeTensorsName) is:
// 1. config.json _name_or_path (path.Base applied), found beside the model or
// by walking up parent directories to the scan root
// 2. otherwise the model's immediate parent directory base name
// → drop (no package) when neither yields a usable name
//
// Every model below is built from the same header bytes, so the header-derived
// metadata (Quantization/Parameters/TensorCount/UserMetadata/MetadataHash) is
// identical across rows and each row stays focused on naming.
func TestSafeTensorsCataloger(t *testing.T) {
userMeta := map[string]string{"format": "pt"}
tensors := map[string]safeTensorsEntry{
@ -50,6 +64,33 @@ func TestSafeTensorsCataloger(t *testing.T) {
// cataloger wires the header hash through to the package metadata.
wantHash := (&safeTensorsHeader{metadata: userMeta, tensors: tensors}).metadataHash()
// model writes the shared .safetensors header into dir/<name>.safetensors,
// creating dir if needed.
model := func(t *testing.T, dir string) {
t.Helper()
require.NoError(t, os.MkdirAll(dir, 0o755))
require.NoError(t, os.WriteFile(filepath.Join(dir, "model.safetensors"), buildSafeTensorsFile(t, userMeta, tensors), 0o644))
}
writeFile := func(t *testing.T, path, contents string) {
t.Helper()
require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755))
require.NoError(t, os.WriteFile(path, []byte(contents), 0o644))
}
// wantMetadata is the constant header-derived metadata; architecture is the
// only field that varies (it's enriched from config.json when present).
wantMetadata := func(architecture string) pkg.SafeTensorsModelInfo {
return pkg.SafeTensorsModelInfo{
Format: "safetensors",
Architecture: architecture,
Quantization: "BF16",
Parameters: "16.26K",
TensorCount: 2,
ShardCount: 1,
UserMetadata: pkg.KeyValues{{Key: "format", Value: "pt"}},
MetadataHash: wantHash,
}
}
tests := []struct {
name string
setup func(t *testing.T) string
@ -57,16 +98,17 @@ func TestSafeTensorsCataloger(t *testing.T) {
expectedRelationships []artifact.Relationship
}{
{
name: "single-file model directory with config.json and README",
// rung 1: config.json _name_or_path (path.Base of "org/Llama-3-8B")
// wins over the "sometensor" directory fallback; license from README.
name: "config.json _name_or_path names the model and wins over the directory",
setup: func(t *testing.T) string {
dir := t.TempDir()
modelDir := filepath.Join(dir, "models")
require.NoError(t, os.MkdirAll(modelDir, 0o755))
require.NoError(t, os.WriteFile(filepath.Join(modelDir, "model.safetensors"), buildSafeTensorsFile(t, userMeta, tensors), 0o644))
require.NoError(t, os.WriteFile(filepath.Join(modelDir, "config.json"),
[]byte(`{"architectures":["LlamaForCausalLM"],"torch_dtype":"bfloat16","transformers_version":"4.40.0","_name_or_path":"meta-llama/Llama-3-8B"}`), 0o644))
require.NoError(t, os.WriteFile(filepath.Join(modelDir, "README.md"),
[]byte("---\nlicense: Apache-2.0\nbase_model:\n - meta-llama/Llama-3\n---\n# Llama 3\n"), 0o644))
modelDir := filepath.Join(dir, "sometensor")
model(t, modelDir)
writeFile(t, filepath.Join(modelDir, "config.json"),
`{"architectures":["LlamaForCausalLM"],"torch_dtype":"bfloat16","transformers_version":"4.40.0","_name_or_path":"meta-llama/Llama-3-8B"}`)
writeFile(t, filepath.Join(modelDir, "README.md"),
"---\nlicense: Apache-2.0\nbase_model:\n - meta-llama/Llama-3\n---\n# Llama 3\n")
return dir
},
expectedPackages: []pkg.Package{
@ -76,16 +118,134 @@ func TestSafeTensorsCataloger(t *testing.T) {
Licenses: pkg.NewLicenseSet(
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
),
Metadata: pkg.SafeTensorsModelInfo{
Format: "safetensors",
Architecture: "LlamaForCausalLM",
Quantization: "BF16",
Parameters: "16.26K",
TensorCount: 2,
ShardCount: 1,
UserMetadata: pkg.KeyValues{{Key: "format", Value: "pt"}},
MetadataHash: wantHash,
},
Metadata: wantMetadata("LlamaForCausalLM"),
},
},
},
{
// rung 2: no config.json at all, so the model is named after its
// immediate parent directory.
name: "no config.json falls back to the parent directory name",
setup: func(t *testing.T) string {
dir := t.TempDir()
model(t, filepath.Join(dir, "sometensor"))
return dir
},
expectedPackages: []pkg.Package{
{
Name: "sometensor",
Type: pkg.ModelPkg,
Metadata: wantMetadata(""),
},
},
},
{
// rung 2: config.json exists but carries no _name_or_path (only
// non-identifying info), so we still fall back to the directory name
// while enriching architecture from the config.
name: "config.json without _name_or_path falls back to the parent directory name",
setup: func(t *testing.T) string {
dir := t.TempDir()
modelDir := filepath.Join(dir, "sometensor")
model(t, modelDir)
writeFile(t, filepath.Join(modelDir, "config.json"),
`{"architectures":["LlamaForCausalLM"],"torch_dtype":"bfloat16"}`)
return dir
},
expectedPackages: []pkg.Package{
{
Name: "sometensor",
Type: pkg.ModelPkg,
Metadata: wantMetadata("LlamaForCausalLM"),
},
},
},
{
// rung 2: the fallback is the IMMEDIATE parent ("someothertensor"),
// not an ancestor ("dir").
name: "nested model with no config.json is named by its immediate parent directory",
setup: func(t *testing.T) string {
dir := t.TempDir()
model(t, filepath.Join(dir, "dir", "someothertensor"))
return dir
},
expectedPackages: []pkg.Package{
{
Name: "someothertensor",
Type: pkg.ModelPkg,
Metadata: wantMetadata(""),
},
},
},
{
// rung 1 at the scan root: a model directly at `./` is unnameable by
// the directory fallback (its parent is the degenerate root "."), but
// a sibling config.json still names it.
name: "root-level model is named from a root config.json",
setup: func(t *testing.T) string {
dir := t.TempDir()
model(t, dir)
writeFile(t, filepath.Join(dir, "config.json"), `{"_name_or_path":"org/RootModel"}`)
return dir
},
expectedPackages: []pkg.Package{
{
Name: "RootModel",
Type: pkg.ModelPkg,
Metadata: wantMetadata(""),
},
},
},
{
// drop: a model directly at `./` with no config.json has no usable
// name — the parent is the degenerate root ".", which yields no
// directory fallback — so no package is emitted.
name: "root-level model with no config.json is dropped",
setup: func(t *testing.T) string {
dir := t.TempDir()
model(t, dir)
return dir
},
expectedPackages: nil,
},
{
// rung 1 via parent-walk: findDirHFConfig walks up from the model
// directory, so a config.json in an ancestor names a nested model.
name: "config.json in an ancestor directory names a nested model",
setup: func(t *testing.T) string {
dir := t.TempDir()
model(t, filepath.Join(dir, "dir", "someothertensor"))
writeFile(t, filepath.Join(dir, "dir", "config.json"), `{"_name_or_path":"org/Ancestor"}`)
return dir
},
expectedPackages: []pkg.Package{
{
Name: "Ancestor",
Type: pkg.ModelPkg,
Metadata: wantMetadata(""),
},
},
},
{
// grouping: independent models in one scan are grouped by their own
// parent directory and each named from it.
name: "sibling models in one scan are each named by their own directory",
setup: func(t *testing.T) string {
dir := t.TempDir()
model(t, filepath.Join(dir, "sometensor"))
model(t, filepath.Join(dir, "dir", "someothertensor"))
return dir
},
expectedPackages: []pkg.Package{
{
Name: "sometensor",
Type: pkg.ModelPkg,
Metadata: wantMetadata(""),
},
{
Name: "someothertensor",
Type: pkg.ModelPkg,
Metadata: wantMetadata(""),
},
},
},

View File

@ -18,7 +18,6 @@ import (
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/fileresolver"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
)
@ -417,10 +416,18 @@ func resolveSafeTensorsOCIIdentity(resolver file.Resolver, md *pkg.SafeTensorsMo
}
}
// ociImageReferencer is the minimal capability ociImageRefBasename needs: a
// resolver that can surface the OCI image reference it was built from. It is
// kept local to this package (rather than exported from the file package) so the
// assertion stays with its only consumer.
type ociImageReferencer interface {
ImageReference() string
}
func ociImageRefBasename(resolver file.Resolver) string {
// TODO: we don't think this approach is generalizable quite yet, but we really do need this information.
// (Ideally we should be NOT be type asserting on the file resolver directly).
info, ok := resolver.(*fileresolver.ContainerImageModel)
info, ok := resolver.(ociImageReferencer)
if !ok {
return ""
}