mirror of
https://github.com/anchore/syft.git
synced 2026-07-05 02:28:25 +02:00
tests: clean up and add complete dir tests
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
parent
b85b50001d
commit
dd179eb8a7
@ -76,9 +76,9 @@ func NewMockResolverForPathsWithMetadata(metadata map[Coordinates]Metadata) *Moc
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NewMockResolverForOCIArtifact creates a MockResolver that can resolve files
|
// NewMockResolverForOCIArtifact creates a MockResolver that can resolve files
|
||||||
// by media type AND surfaces the given OCI ref via the OCIArtifactResolver
|
// by media type AND surfaces the given OCI ref via the ImageReference method.
|
||||||
// interface. Intended for tests that exercise the catalogers' OCI-artifact-aware
|
// Intended for tests that exercise the catalogers' OCI-artifact-aware naming
|
||||||
// naming code paths.
|
// code paths.
|
||||||
func NewMockResolverForOCIArtifact(ref string, mediaTypes map[string][]Location) *MockResolver {
|
func NewMockResolverForOCIArtifact(ref string, mediaTypes map[string][]Location) *MockResolver {
|
||||||
r := NewMockResolverForMediaTypes(mediaTypes)
|
r := NewMockResolverForMediaTypes(mediaTypes)
|
||||||
r.ociRef = ref
|
r.ociRef = ref
|
||||||
|
|||||||
@ -30,9 +30,9 @@ type ContainerImageModel struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NewContainerImageModel creates a new resolver with the given temp directory
|
// NewContainerImageModel creates a new resolver with the given temp directory
|
||||||
// and layer files. The ref is surfaced through the file.OCIArtifactResolver
|
// and layer files. The ref is surfaced through the ImageReference method so
|
||||||
// interface so catalogers can derive context-level naming hints from the
|
// catalogers can derive context-level naming hints from the artifact reference
|
||||||
// artifact reference when the layer contents don't carry one.
|
// when the layer contents don't carry one.
|
||||||
func NewContainerImageModel(tempDir string, layerFiles map[string]LayerInfo, ref string) *ContainerImageModel {
|
func NewContainerImageModel(tempDir string, layerFiles map[string]LayerInfo, ref string) *ContainerImageModel {
|
||||||
// Create locations for all layer files
|
// Create locations for all layer files
|
||||||
// Each location has RealPath="/", FileSystemID=digest, AccessPath="/"
|
// Each location has RealPath="/", FileSystemID=digest, AccessPath="/"
|
||||||
|
|||||||
@ -40,6 +40,20 @@ func buildSafeTensorsFile(t *testing.T, metadata map[string]string, tensors map[
|
|||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestSafeTensorsCataloger is the end-to-end `dir:` scan naming matrix: it walks
|
||||||
|
// real on-disk directory trees and locks how a model package gets its name at
|
||||||
|
// every depth a .safetensors file can appear (the scan root `./`, an immediate
|
||||||
|
// child `./sometensor/`, and a grandchild `./dir/someothertensor/`).
|
||||||
|
//
|
||||||
|
// The naming precedence (owned by the merge processor's pickSafeTensorsName) is:
|
||||||
|
// 1. config.json _name_or_path (path.Base applied), found beside the model or
|
||||||
|
// by walking up parent directories to the scan root
|
||||||
|
// 2. otherwise the model's immediate parent directory base name
|
||||||
|
// → drop (no package) when neither yields a usable name
|
||||||
|
//
|
||||||
|
// Every model below is built from the same header bytes, so the header-derived
|
||||||
|
// metadata (Quantization/Parameters/TensorCount/UserMetadata/MetadataHash) is
|
||||||
|
// identical across rows and each row stays focused on naming.
|
||||||
func TestSafeTensorsCataloger(t *testing.T) {
|
func TestSafeTensorsCataloger(t *testing.T) {
|
||||||
userMeta := map[string]string{"format": "pt"}
|
userMeta := map[string]string{"format": "pt"}
|
||||||
tensors := map[string]safeTensorsEntry{
|
tensors := map[string]safeTensorsEntry{
|
||||||
@ -50,6 +64,33 @@ func TestSafeTensorsCataloger(t *testing.T) {
|
|||||||
// cataloger wires the header hash through to the package metadata.
|
// cataloger wires the header hash through to the package metadata.
|
||||||
wantHash := (&safeTensorsHeader{metadata: userMeta, tensors: tensors}).metadataHash()
|
wantHash := (&safeTensorsHeader{metadata: userMeta, tensors: tensors}).metadataHash()
|
||||||
|
|
||||||
|
// model writes the shared .safetensors header into dir/<name>.safetensors,
|
||||||
|
// creating dir if needed.
|
||||||
|
model := func(t *testing.T, dir string) {
|
||||||
|
t.Helper()
|
||||||
|
require.NoError(t, os.MkdirAll(dir, 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(filepath.Join(dir, "model.safetensors"), buildSafeTensorsFile(t, userMeta, tensors), 0o644))
|
||||||
|
}
|
||||||
|
writeFile := func(t *testing.T, path, contents string) {
|
||||||
|
t.Helper()
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte(contents), 0o644))
|
||||||
|
}
|
||||||
|
// wantMetadata is the constant header-derived metadata; architecture is the
|
||||||
|
// only field that varies (it's enriched from config.json when present).
|
||||||
|
wantMetadata := func(architecture string) pkg.SafeTensorsModelInfo {
|
||||||
|
return pkg.SafeTensorsModelInfo{
|
||||||
|
Format: "safetensors",
|
||||||
|
Architecture: architecture,
|
||||||
|
Quantization: "BF16",
|
||||||
|
Parameters: "16.26K",
|
||||||
|
TensorCount: 2,
|
||||||
|
ShardCount: 1,
|
||||||
|
UserMetadata: pkg.KeyValues{{Key: "format", Value: "pt"}},
|
||||||
|
MetadataHash: wantHash,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
setup func(t *testing.T) string
|
setup func(t *testing.T) string
|
||||||
@ -57,16 +98,17 @@ func TestSafeTensorsCataloger(t *testing.T) {
|
|||||||
expectedRelationships []artifact.Relationship
|
expectedRelationships []artifact.Relationship
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
name: "single-file model directory with config.json and README",
|
// rung 1: config.json _name_or_path (path.Base of "org/Llama-3-8B")
|
||||||
|
// wins over the "sometensor" directory fallback; license from README.
|
||||||
|
name: "config.json _name_or_path names the model and wins over the directory",
|
||||||
setup: func(t *testing.T) string {
|
setup: func(t *testing.T) string {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
modelDir := filepath.Join(dir, "models")
|
modelDir := filepath.Join(dir, "sometensor")
|
||||||
require.NoError(t, os.MkdirAll(modelDir, 0o755))
|
model(t, modelDir)
|
||||||
require.NoError(t, os.WriteFile(filepath.Join(modelDir, "model.safetensors"), buildSafeTensorsFile(t, userMeta, tensors), 0o644))
|
writeFile(t, filepath.Join(modelDir, "config.json"),
|
||||||
require.NoError(t, os.WriteFile(filepath.Join(modelDir, "config.json"),
|
`{"architectures":["LlamaForCausalLM"],"torch_dtype":"bfloat16","transformers_version":"4.40.0","_name_or_path":"meta-llama/Llama-3-8B"}`)
|
||||||
[]byte(`{"architectures":["LlamaForCausalLM"],"torch_dtype":"bfloat16","transformers_version":"4.40.0","_name_or_path":"meta-llama/Llama-3-8B"}`), 0o644))
|
writeFile(t, filepath.Join(modelDir, "README.md"),
|
||||||
require.NoError(t, os.WriteFile(filepath.Join(modelDir, "README.md"),
|
"---\nlicense: Apache-2.0\nbase_model:\n - meta-llama/Llama-3\n---\n# Llama 3\n")
|
||||||
[]byte("---\nlicense: Apache-2.0\nbase_model:\n - meta-llama/Llama-3\n---\n# Llama 3\n"), 0o644))
|
|
||||||
return dir
|
return dir
|
||||||
},
|
},
|
||||||
expectedPackages: []pkg.Package{
|
expectedPackages: []pkg.Package{
|
||||||
@ -76,16 +118,134 @@ func TestSafeTensorsCataloger(t *testing.T) {
|
|||||||
Licenses: pkg.NewLicenseSet(
|
Licenses: pkg.NewLicenseSet(
|
||||||
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
|
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
|
||||||
),
|
),
|
||||||
Metadata: pkg.SafeTensorsModelInfo{
|
Metadata: wantMetadata("LlamaForCausalLM"),
|
||||||
Format: "safetensors",
|
},
|
||||||
Architecture: "LlamaForCausalLM",
|
},
|
||||||
Quantization: "BF16",
|
},
|
||||||
Parameters: "16.26K",
|
{
|
||||||
TensorCount: 2,
|
// rung 2: no config.json at all, so the model is named after its
|
||||||
ShardCount: 1,
|
// immediate parent directory.
|
||||||
UserMetadata: pkg.KeyValues{{Key: "format", Value: "pt"}},
|
name: "no config.json falls back to the parent directory name",
|
||||||
MetadataHash: wantHash,
|
setup: func(t *testing.T) string {
|
||||||
},
|
dir := t.TempDir()
|
||||||
|
model(t, filepath.Join(dir, "sometensor"))
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expectedPackages: []pkg.Package{
|
||||||
|
{
|
||||||
|
Name: "sometensor",
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Metadata: wantMetadata(""),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// rung 2: config.json exists but carries no _name_or_path (only
|
||||||
|
// non-identifying info), so we still fall back to the directory name
|
||||||
|
// while enriching architecture from the config.
|
||||||
|
name: "config.json without _name_or_path falls back to the parent directory name",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
modelDir := filepath.Join(dir, "sometensor")
|
||||||
|
model(t, modelDir)
|
||||||
|
writeFile(t, filepath.Join(modelDir, "config.json"),
|
||||||
|
`{"architectures":["LlamaForCausalLM"],"torch_dtype":"bfloat16"}`)
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expectedPackages: []pkg.Package{
|
||||||
|
{
|
||||||
|
Name: "sometensor",
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Metadata: wantMetadata("LlamaForCausalLM"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// rung 2: the fallback is the IMMEDIATE parent ("someothertensor"),
|
||||||
|
// not an ancestor ("dir").
|
||||||
|
name: "nested model with no config.json is named by its immediate parent directory",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
model(t, filepath.Join(dir, "dir", "someothertensor"))
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expectedPackages: []pkg.Package{
|
||||||
|
{
|
||||||
|
Name: "someothertensor",
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Metadata: wantMetadata(""),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// rung 1 at the scan root: a model directly at `./` is unnameable by
|
||||||
|
// the directory fallback (its parent is the degenerate root "."), but
|
||||||
|
// a sibling config.json still names it.
|
||||||
|
name: "root-level model is named from a root config.json",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
model(t, dir)
|
||||||
|
writeFile(t, filepath.Join(dir, "config.json"), `{"_name_or_path":"org/RootModel"}`)
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expectedPackages: []pkg.Package{
|
||||||
|
{
|
||||||
|
Name: "RootModel",
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Metadata: wantMetadata(""),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// drop: a model directly at `./` with no config.json has no usable
|
||||||
|
// name — the parent is the degenerate root ".", which yields no
|
||||||
|
// directory fallback — so no package is emitted.
|
||||||
|
name: "root-level model with no config.json is dropped",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
model(t, dir)
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expectedPackages: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// rung 1 via parent-walk: findDirHFConfig walks up from the model
|
||||||
|
// directory, so a config.json in an ancestor names a nested model.
|
||||||
|
name: "config.json in an ancestor directory names a nested model",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
model(t, filepath.Join(dir, "dir", "someothertensor"))
|
||||||
|
writeFile(t, filepath.Join(dir, "dir", "config.json"), `{"_name_or_path":"org/Ancestor"}`)
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expectedPackages: []pkg.Package{
|
||||||
|
{
|
||||||
|
Name: "Ancestor",
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Metadata: wantMetadata(""),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// grouping: independent models in one scan are grouped by their own
|
||||||
|
// parent directory and each named from it.
|
||||||
|
name: "sibling models in one scan are each named by their own directory",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
model(t, filepath.Join(dir, "sometensor"))
|
||||||
|
model(t, filepath.Join(dir, "dir", "someothertensor"))
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expectedPackages: []pkg.Package{
|
||||||
|
{
|
||||||
|
Name: "sometensor",
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Metadata: wantMetadata(""),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "someothertensor",
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Metadata: wantMetadata(""),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|||||||
@ -18,7 +18,6 @@ import (
|
|||||||
"github.com/anchore/syft/internal/log"
|
"github.com/anchore/syft/internal/log"
|
||||||
"github.com/anchore/syft/syft/artifact"
|
"github.com/anchore/syft/syft/artifact"
|
||||||
"github.com/anchore/syft/syft/file"
|
"github.com/anchore/syft/syft/file"
|
||||||
"github.com/anchore/syft/syft/internal/fileresolver"
|
|
||||||
"github.com/anchore/syft/syft/pkg"
|
"github.com/anchore/syft/syft/pkg"
|
||||||
"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
|
"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
|
||||||
)
|
)
|
||||||
@ -417,10 +416,18 @@ func resolveSafeTensorsOCIIdentity(resolver file.Resolver, md *pkg.SafeTensorsMo
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ociImageReferencer is the minimal capability ociImageRefBasename needs: a
|
||||||
|
// resolver that can surface the OCI image reference it was built from. It is
|
||||||
|
// kept local to this package (rather than exported from the file package) so the
|
||||||
|
// assertion stays with its only consumer.
|
||||||
|
type ociImageReferencer interface {
|
||||||
|
ImageReference() string
|
||||||
|
}
|
||||||
|
|
||||||
func ociImageRefBasename(resolver file.Resolver) string {
|
func ociImageRefBasename(resolver file.Resolver) string {
|
||||||
// TODO: we don't think this approach is generalizable quite yet, but we really do need this information.
|
// TODO: we don't think this approach is generalizable quite yet, but we really do need this information.
|
||||||
// (Ideally we should be NOT be type asserting on the file resolver directly).
|
// (Ideally we should be NOT be type asserting on the file resolver directly).
|
||||||
info, ok := resolver.(*fileresolver.ContainerImageModel)
|
info, ok := resolver.(ociImageReferencer)
|
||||||
if !ok {
|
if !ok {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user