mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 08:23:15 +01:00
test: migrate gguf tests over
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
parent
746f00ad68
commit
165611d2e4
385
syft/pkg/cataloger/aiartifact/cataloger_test.go
Normal file
385
syft/pkg/cataloger/aiartifact/cataloger_test.go
Normal file
@ -0,0 +1,385 @@
|
||||
package aiartifact
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/anchore/syft/syft/artifact"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
|
||||
)
|
||||
|
||||
func TestGGUFCataloger_Globs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
setup func(t *testing.T) string // returns fixture directory
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "finds GGUF files in root",
|
||||
setup: func(t *testing.T) string {
|
||||
dir := t.TempDir()
|
||||
createTestGGUFInDir(t, dir, "model1.gguf")
|
||||
createTestGGUFInDir(t, dir, "model2.gguf")
|
||||
return dir
|
||||
},
|
||||
expected: []string{
|
||||
"model1.gguf",
|
||||
"model2.gguf",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "finds GGUF files in subdirectories",
|
||||
setup: func(t *testing.T) string {
|
||||
dir := t.TempDir()
|
||||
modelsDir := filepath.Join(dir, "models")
|
||||
os.MkdirAll(modelsDir, 0755)
|
||||
createTestGGUFInDir(t, modelsDir, "llama.gguf")
|
||||
|
||||
deepDir := filepath.Join(dir, "deep", "nested", "path")
|
||||
os.MkdirAll(deepDir, 0755)
|
||||
createTestGGUFInDir(t, deepDir, "mistral.gguf")
|
||||
|
||||
return dir
|
||||
},
|
||||
expected: []string{
|
||||
"models/llama.gguf",
|
||||
"deep/nested/path/mistral.gguf",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ignores non-GGUF files",
|
||||
setup: func(t *testing.T) string {
|
||||
dir := t.TempDir()
|
||||
createTestGGUFInDir(t, dir, "model.gguf")
|
||||
|
||||
// Create non-GGUF files
|
||||
os.WriteFile(filepath.Join(dir, "readme.txt"), []byte("readme"), 0644)
|
||||
os.WriteFile(filepath.Join(dir, "model.bin"), []byte("binary"), 0644)
|
||||
os.WriteFile(filepath.Join(dir, "config.json"), []byte("{}"), 0644)
|
||||
|
||||
return dir
|
||||
},
|
||||
expected: []string{
|
||||
"model.gguf",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
fixtureDir := tt.setup(t)
|
||||
|
||||
tester := pkgtest.NewCatalogTester().
|
||||
FromDirectory(t, fixtureDir)
|
||||
|
||||
if len(tt.expected) > 0 {
|
||||
tester.ExpectsResolverContentQueries(tt.expected)
|
||||
}
|
||||
|
||||
tester.TestCataloger(t, NewGGUFCataloger())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGGUFCataloger_Integration(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
setup func(t *testing.T) string
|
||||
expectedPackages []pkg.Package
|
||||
expectedRelationships []artifact.Relationship
|
||||
}{
|
||||
{
|
||||
name: "catalog single GGUF file",
|
||||
setup: func(t *testing.T) string {
|
||||
dir := t.TempDir()
|
||||
data := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withTensorCount(291).
|
||||
withStringKV("general.architecture", "llama").
|
||||
withStringKV("general.name", "llama3-8b").
|
||||
withStringKV("general.version", "3.0").
|
||||
withStringKV("general.license", "Apache-2.0").
|
||||
withStringKV("general.quantization", "Q4_K_M").
|
||||
withUint64KV("general.parameter_count", 8030000000).
|
||||
build()
|
||||
|
||||
path := filepath.Join(dir, "llama3-8b.gguf")
|
||||
os.WriteFile(path, data, 0644)
|
||||
return dir
|
||||
},
|
||||
expectedPackages: []pkg.Package{
|
||||
{
|
||||
Name: "llama3-8b",
|
||||
Version: "3.0",
|
||||
Type: pkg.ModelPkg,
|
||||
Licenses: pkg.NewLicenseSet(
|
||||
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
|
||||
),
|
||||
Metadata: pkg.GGUFFileMetadata{
|
||||
ModelFormat: "gguf",
|
||||
ModelName: "llama3-8b",
|
||||
ModelVersion: "3.0",
|
||||
License: "Apache-2.0",
|
||||
Architecture: "llama",
|
||||
Quantization: "Q4_K_M",
|
||||
Parameters: 8030000000,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 291,
|
||||
Header: map[string]interface{}{},
|
||||
TruncatedHeader: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedRelationships: nil,
|
||||
},
|
||||
{
|
||||
name: "catalog multiple GGUF files",
|
||||
setup: func(t *testing.T) string {
|
||||
dir := t.TempDir()
|
||||
|
||||
// Create first model
|
||||
data1 := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withTensorCount(100).
|
||||
withStringKV("general.architecture", "llama").
|
||||
withStringKV("general.name", "model1").
|
||||
withStringKV("general.version", "1.0").
|
||||
build()
|
||||
os.WriteFile(filepath.Join(dir, "model1.gguf"), data1, 0644)
|
||||
|
||||
// Create second model
|
||||
data2 := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withTensorCount(200).
|
||||
withStringKV("general.architecture", "mistral").
|
||||
withStringKV("general.name", "model2").
|
||||
withStringKV("general.version", "2.0").
|
||||
build()
|
||||
os.WriteFile(filepath.Join(dir, "model2.gguf"), data2, 0644)
|
||||
|
||||
return dir
|
||||
},
|
||||
expectedPackages: []pkg.Package{
|
||||
{
|
||||
Name: "model1",
|
||||
Version: "1.0",
|
||||
Type: pkg.ModelPkg,
|
||||
Metadata: pkg.GGUFFileMetadata{
|
||||
ModelFormat: "gguf",
|
||||
ModelName: "model1",
|
||||
ModelVersion: "1.0",
|
||||
Architecture: "llama",
|
||||
Quantization: unkownGGUFData,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 100,
|
||||
Header: map[string]interface{}{},
|
||||
TruncatedHeader: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "model2",
|
||||
Version: "2.0",
|
||||
Type: pkg.ModelPkg,
|
||||
Metadata: pkg.GGUFFileMetadata{
|
||||
ModelFormat: "gguf",
|
||||
ModelName: "model2",
|
||||
ModelVersion: "2.0",
|
||||
Architecture: "mistral",
|
||||
Quantization: unkownGGUFData,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 200,
|
||||
Header: map[string]interface{}{},
|
||||
TruncatedHeader: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedRelationships: nil,
|
||||
},
|
||||
{
|
||||
name: "catalog GGUF in nested directories",
|
||||
setup: func(t *testing.T) string {
|
||||
dir := t.TempDir()
|
||||
nestedDir := filepath.Join(dir, "models", "quantized")
|
||||
os.MkdirAll(nestedDir, 0755)
|
||||
|
||||
data := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withTensorCount(150).
|
||||
withStringKV("general.architecture", "qwen").
|
||||
withStringKV("general.name", "qwen-nested").
|
||||
build()
|
||||
|
||||
os.WriteFile(filepath.Join(nestedDir, "qwen.gguf"), data, 0644)
|
||||
return dir
|
||||
},
|
||||
expectedPackages: []pkg.Package{
|
||||
{
|
||||
Name: "qwen-nested",
|
||||
Version: unkownGGUFData,
|
||||
Type: pkg.ModelPkg,
|
||||
Metadata: pkg.GGUFFileMetadata{
|
||||
ModelFormat: "gguf",
|
||||
ModelName: "qwen-nested",
|
||||
ModelVersion: unkownGGUFData,
|
||||
Architecture: "qwen",
|
||||
Quantization: unkownGGUFData,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 150,
|
||||
Header: map[string]interface{}{},
|
||||
TruncatedHeader: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedRelationships: nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
fixtureDir := tt.setup(t)
|
||||
|
||||
// Use pkgtest to catalog and compare
|
||||
tester := pkgtest.NewCatalogTester().
|
||||
FromDirectory(t, fixtureDir).
|
||||
Expects(tt.expectedPackages, tt.expectedRelationships).
|
||||
IgnoreLocationLayer().
|
||||
IgnorePackageFields("FoundBy", "Locations"). // These are set by the cataloger
|
||||
WithCompareOptions(
|
||||
// Ignore Hash as it's computed dynamically
|
||||
cmpopts.IgnoreFields(pkg.GGUFFileMetadata{}, "Hash"),
|
||||
)
|
||||
|
||||
tester.TestCataloger(t, NewGGUFCataloger())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGGUFCataloger_SkipsInvalidFiles(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
// Create a valid GGUF
|
||||
validData := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withTensorCount(100).
|
||||
withStringKV("general.architecture", "llama").
|
||||
withStringKV("general.name", "valid-model").
|
||||
build()
|
||||
os.WriteFile(filepath.Join(dir, "valid.gguf"), validData, 0644)
|
||||
|
||||
// Create an invalid GGUF (wrong magic)
|
||||
invalidData := newTestGGUFBuilder().buildInvalidMagic()
|
||||
os.WriteFile(filepath.Join(dir, "invalid.gguf"), invalidData, 0644)
|
||||
|
||||
// Create a truncated GGUF
|
||||
os.WriteFile(filepath.Join(dir, "truncated.gguf"), []byte{0x47}, 0644)
|
||||
|
||||
// Catalog should succeed and only return the valid package
|
||||
tester := pkgtest.NewCatalogTester().
|
||||
FromDirectory(t, dir).
|
||||
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
|
||||
// Should only find the valid model
|
||||
require.Len(t, pkgs, 1)
|
||||
assert.Equal(t, "valid-model", pkgs[0].Name)
|
||||
})
|
||||
|
||||
tester.TestCataloger(t, NewGGUFCataloger())
|
||||
}
|
||||
|
||||
func TestGGUFCataloger_Name(t *testing.T) {
|
||||
cataloger := NewGGUFCataloger()
|
||||
assert.Equal(t, "gguf-cataloger", cataloger.Name())
|
||||
}
|
||||
|
||||
func TestGGUFCataloger_EmptyDirectory(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
// Create a subdirectory to ensure glob still runs
|
||||
os.MkdirAll(filepath.Join(dir, "models"), 0755)
|
||||
|
||||
tester := pkgtest.NewCatalogTester().
|
||||
FromDirectory(t, dir).
|
||||
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, rels []artifact.Relationship) {
|
||||
assert.Empty(t, pkgs)
|
||||
assert.Empty(t, rels)
|
||||
})
|
||||
|
||||
tester.TestCataloger(t, NewGGUFCataloger())
|
||||
}
|
||||
|
||||
func TestGGUFCataloger_MixedFiles(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
// Create GGUF file
|
||||
ggufData := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withTensorCount(100).
|
||||
withStringKV("general.architecture", "llama").
|
||||
withStringKV("general.name", "test-model").
|
||||
build()
|
||||
os.WriteFile(filepath.Join(dir, "model.gguf"), ggufData, 0644)
|
||||
|
||||
// Create other file types
|
||||
os.WriteFile(filepath.Join(dir, "README.md"), []byte("# Models"), 0644)
|
||||
os.WriteFile(filepath.Join(dir, "config.json"), []byte("{}"), 0644)
|
||||
os.WriteFile(filepath.Join(dir, "weights.bin"), []byte("weights"), 0644)
|
||||
os.MkdirAll(filepath.Join(dir, "subdir"), 0755)
|
||||
|
||||
tester := pkgtest.NewCatalogTester().
|
||||
FromDirectory(t, dir).
|
||||
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
|
||||
// Should only find the GGUF model
|
||||
require.Len(t, pkgs, 1)
|
||||
assert.Equal(t, "test-model", pkgs[0].Name)
|
||||
assert.Equal(t, pkg.ModelPkg, pkgs[0].Type)
|
||||
})
|
||||
|
||||
tester.TestCataloger(t, NewGGUFCataloger())
|
||||
}
|
||||
|
||||
func TestGGUFCataloger_CaseInsensitiveGlob(t *testing.T) {
|
||||
// Test that the glob pattern is case-sensitive (as expected for **/*.gguf)
|
||||
dir := t.TempDir()
|
||||
|
||||
// Create lowercase .gguf
|
||||
data := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withTensorCount(100).
|
||||
withStringKV("general.architecture", "llama").
|
||||
withStringKV("general.name", "lowercase").
|
||||
build()
|
||||
os.WriteFile(filepath.Join(dir, "model.gguf"), data, 0644)
|
||||
|
||||
// Create uppercase .GGUF (should not match the glob)
|
||||
os.WriteFile(filepath.Join(dir, "MODEL.GGUF"), data, 0644)
|
||||
|
||||
tester := pkgtest.NewCatalogTester().
|
||||
FromDirectory(t, dir).
|
||||
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
|
||||
// Depending on filesystem case-sensitivity, we may get 1 or 2 packages
|
||||
// On case-insensitive filesystems (macOS), both might match
|
||||
// On case-sensitive filesystems (Linux), only lowercase matches
|
||||
assert.GreaterOrEqual(t, len(pkgs), 1, "should find at least the lowercase file")
|
||||
})
|
||||
|
||||
tester.TestCataloger(t, NewGGUFCataloger())
|
||||
}
|
||||
|
||||
// createTestGGUFInDir creates a minimal test GGUF file in the specified directory
|
||||
func createTestGGUFInDir(t *testing.T, dir, filename string) {
|
||||
t.Helper()
|
||||
data := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withTensorCount(100).
|
||||
withStringKV("general.architecture", "llama").
|
||||
withStringKV("general.name", "test-model").
|
||||
build()
|
||||
|
||||
path := filepath.Join(dir, filename)
|
||||
err := os.WriteFile(path, data, 0644)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
@ -1,9 +1,6 @@
|
||||
package aiartifact
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/anchore/packageurl-go"
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
)
|
||||
@ -12,11 +9,12 @@ func newGGUFPackage(metadata *pkg.GGUFFileMetadata, locations ...file.Location)
|
||||
p := pkg.Package{
|
||||
Name: metadata.ModelName,
|
||||
Version: metadata.ModelVersion,
|
||||
PURL: packageURL(metadata),
|
||||
Locations: file.NewLocationSet(locations...),
|
||||
Type: pkg.ModelPkg,
|
||||
Licenses: pkg.NewLicenseSet(),
|
||||
Metadata: *metadata,
|
||||
// NOTE: PURL is intentionally not set as the package-url spec
|
||||
// has not yet finalized support for ML model packages
|
||||
}
|
||||
|
||||
// Add license to the package if present in metadata
|
||||
@ -28,41 +26,3 @@ func newGGUFPackage(metadata *pkg.GGUFFileMetadata, locations ...file.Location)
|
||||
|
||||
return p
|
||||
}
|
||||
|
||||
// packageURL returns the PURL for the specific GGUF model package (see https://github.com/package-url/purl-spec)
|
||||
func packageURL(metadata *pkg.GGUFFileMetadata) string {
|
||||
var qualifiers packageurl.Qualifiers
|
||||
|
||||
// Add model-specific qualifiers
|
||||
if metadata.Architecture != "" {
|
||||
qualifiers = append(qualifiers, packageurl.Qualifier{
|
||||
Key: "arch",
|
||||
Value: metadata.Architecture,
|
||||
})
|
||||
}
|
||||
|
||||
if metadata.Quantization != "" && metadata.Quantization != "unknown" {
|
||||
qualifiers = append(qualifiers, packageurl.Qualifier{
|
||||
Key: "quantization",
|
||||
Value: metadata.Quantization,
|
||||
})
|
||||
}
|
||||
|
||||
if metadata.Parameters > 0 {
|
||||
qualifiers = append(qualifiers, packageurl.Qualifier{
|
||||
Key: "parameters",
|
||||
Value: fmt.Sprintf("%d", metadata.Parameters),
|
||||
})
|
||||
}
|
||||
|
||||
// Use mlmodel as the type for machine learning models in GGUF format
|
||||
// This follows the PURL spec guidance for ML models
|
||||
return packageurl.NewPackageURL(
|
||||
"mlmodel",
|
||||
"gguf",
|
||||
metadata.ModelName,
|
||||
metadata.ModelVersion,
|
||||
qualifiers,
|
||||
"",
|
||||
).ToString()
|
||||
}
|
||||
|
||||
185
syft/pkg/cataloger/aiartifact/package_test.go
Normal file
185
syft/pkg/cataloger/aiartifact/package_test.go
Normal file
@ -0,0 +1,185 @@
|
||||
package aiartifact
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
)
|
||||
|
||||
func TestNewGGUFPackage(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
metadata *pkg.GGUFFileMetadata
|
||||
locations []file.Location
|
||||
checkFunc func(t *testing.T, p pkg.Package)
|
||||
}{
|
||||
{
|
||||
name: "complete GGUF package with all fields",
|
||||
metadata: &pkg.GGUFFileMetadata{
|
||||
ModelFormat: "gguf",
|
||||
ModelName: "llama3-8b-instruct",
|
||||
ModelVersion: "3.0",
|
||||
License: "Apache-2.0",
|
||||
Architecture: "llama",
|
||||
Quantization: "Q4_K_M",
|
||||
Parameters: 8030000000,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 291,
|
||||
Header: map[string]interface{}{},
|
||||
TruncatedHeader: false,
|
||||
},
|
||||
locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
|
||||
checkFunc: func(t *testing.T, p pkg.Package) {
|
||||
assert.Equal(t, "llama3-8b-instruct", p.Name)
|
||||
assert.Equal(t, "3.0", p.Version)
|
||||
assert.Equal(t, pkg.ModelPkg, p.Type)
|
||||
assert.Empty(t, p.PURL, "PURL should not be set for model packages")
|
||||
assert.Len(t, p.Licenses.ToSlice(), 1)
|
||||
assert.Equal(t, "Apache-2.0", p.Licenses.ToSlice()[0].Value)
|
||||
assert.NotEmpty(t, p.ID())
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "minimal GGUF package",
|
||||
metadata: &pkg.GGUFFileMetadata{
|
||||
ModelFormat: "gguf",
|
||||
ModelName: "simple-model",
|
||||
ModelVersion: "1.0",
|
||||
Architecture: "gpt2",
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 50,
|
||||
},
|
||||
locations: []file.Location{file.NewLocation("/models/simple.gguf")},
|
||||
checkFunc: func(t *testing.T, p pkg.Package) {
|
||||
assert.Equal(t, "simple-model", p.Name)
|
||||
assert.Equal(t, "1.0", p.Version)
|
||||
assert.Equal(t, pkg.ModelPkg, p.Type)
|
||||
assert.Empty(t, p.PURL, "PURL should not be set for model packages")
|
||||
assert.Empty(t, p.Licenses.ToSlice())
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "GGUF package with multiple locations",
|
||||
metadata: &pkg.GGUFFileMetadata{
|
||||
ModelFormat: "gguf",
|
||||
ModelName: "multi-location-model",
|
||||
ModelVersion: "1.5",
|
||||
Architecture: "llama",
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 150,
|
||||
},
|
||||
locations: []file.Location{
|
||||
file.NewLocation("/models/model1.gguf"),
|
||||
file.NewLocation("/models/model2.gguf"),
|
||||
},
|
||||
checkFunc: func(t *testing.T, p pkg.Package) {
|
||||
assert.Len(t, p.Locations.ToSlice(), 2)
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
p := newGGUFPackage(tt.metadata, tt.locations...)
|
||||
|
||||
assert.Equal(t, tt.metadata.ModelName, p.Name)
|
||||
assert.Equal(t, tt.metadata.ModelVersion, p.Version)
|
||||
assert.Equal(t, pkg.ModelPkg, p.Type)
|
||||
|
||||
// Verify metadata is attached
|
||||
metadata, ok := p.Metadata.(pkg.GGUFFileMetadata)
|
||||
require.True(t, ok, "metadata should be GGUFFileMetadata")
|
||||
assert.Equal(t, *tt.metadata, metadata)
|
||||
|
||||
if tt.checkFunc != nil {
|
||||
tt.checkFunc(t, p)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewGGUFPackage_IDUniqueness(t *testing.T) {
|
||||
// Test that different packages get different IDs
|
||||
metadata1 := &pkg.GGUFFileMetadata{
|
||||
ModelFormat: "gguf",
|
||||
ModelName: "model-1",
|
||||
ModelVersion: "1.0",
|
||||
Architecture: "llama",
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 100,
|
||||
}
|
||||
|
||||
metadata2 := &pkg.GGUFFileMetadata{
|
||||
ModelFormat: "gguf",
|
||||
ModelName: "model-2",
|
||||
ModelVersion: "1.0",
|
||||
Architecture: "llama",
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 100,
|
||||
}
|
||||
|
||||
loc := file.NewLocation("/models/test.gguf")
|
||||
p1 := newGGUFPackage(metadata1, loc)
|
||||
p2 := newGGUFPackage(metadata2, loc)
|
||||
|
||||
assert.NotEqual(t, p1.ID(), p2.ID(), "different packages should have different IDs")
|
||||
}
|
||||
|
||||
func TestNewGGUFPackage_IDConsistency(t *testing.T) {
|
||||
// Test that same metadata produces same ID
|
||||
metadata := &pkg.GGUFFileMetadata{
|
||||
ModelFormat: "gguf",
|
||||
ModelName: "test-model",
|
||||
ModelVersion: "1.0",
|
||||
Architecture: "llama",
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 100,
|
||||
}
|
||||
|
||||
loc := file.NewLocation("/models/test.gguf")
|
||||
p1 := newGGUFPackage(metadata, loc)
|
||||
p2 := newGGUFPackage(metadata, loc)
|
||||
|
||||
assert.Equal(t, p1.ID(), p2.ID(), "identical packages should have identical IDs")
|
||||
}
|
||||
|
||||
func TestNewGGUFPackage_MetadataPreservation(t *testing.T) {
|
||||
// Ensure all metadata fields are preserved in the package
|
||||
metadata := &pkg.GGUFFileMetadata{
|
||||
ModelFormat: "gguf",
|
||||
ModelName: "preservation-test",
|
||||
ModelVersion: "2.0",
|
||||
License: "MIT",
|
||||
Architecture: "llama",
|
||||
Quantization: "Q4_K_M",
|
||||
Parameters: 7000000000,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 219,
|
||||
Hash: "abc123",
|
||||
Header: map[string]interface{}{"custom.field": "value"},
|
||||
TruncatedHeader: false,
|
||||
}
|
||||
|
||||
loc := file.NewLocation("/models/test.gguf")
|
||||
p := newGGUFPackage(metadata, loc)
|
||||
|
||||
extractedMetadata, ok := p.Metadata.(pkg.GGUFFileMetadata)
|
||||
require.True(t, ok)
|
||||
|
||||
assert.Equal(t, metadata.ModelFormat, extractedMetadata.ModelFormat)
|
||||
assert.Equal(t, metadata.ModelName, extractedMetadata.ModelName)
|
||||
assert.Equal(t, metadata.ModelVersion, extractedMetadata.ModelVersion)
|
||||
assert.Equal(t, metadata.License, extractedMetadata.License)
|
||||
assert.Equal(t, metadata.Architecture, extractedMetadata.Architecture)
|
||||
assert.Equal(t, metadata.Quantization, extractedMetadata.Quantization)
|
||||
assert.Equal(t, metadata.Parameters, extractedMetadata.Parameters)
|
||||
assert.Equal(t, metadata.GGUFVersion, extractedMetadata.GGUFVersion)
|
||||
assert.Equal(t, metadata.TensorCount, extractedMetadata.TensorCount)
|
||||
assert.Equal(t, metadata.Hash, extractedMetadata.Hash)
|
||||
assert.Equal(t, metadata.TruncatedHeader, extractedMetadata.TruncatedHeader)
|
||||
assert.Equal(t, metadata.Header, extractedMetadata.Header)
|
||||
}
|
||||
1164
syft/pkg/cataloger/aiartifact/parse_gguf_test.go
Normal file
1164
syft/pkg/cataloger/aiartifact/parse_gguf_test.go
Normal file
File diff suppressed because it is too large
Load Diff
109
syft/pkg/cataloger/aiartifact/test_helpers_test.go
Normal file
109
syft/pkg/cataloger/aiartifact/test_helpers_test.go
Normal file
@ -0,0 +1,109 @@
|
||||
package aiartifact
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
// testGGUFBuilder helps build GGUF files for testing
|
||||
type testGGUFBuilder struct {
|
||||
buf *bytes.Buffer
|
||||
version uint32
|
||||
tensorCount uint64
|
||||
kvPairs []testKVPair
|
||||
}
|
||||
|
||||
type testKVPair struct {
|
||||
key string
|
||||
valueType uint32
|
||||
value interface{}
|
||||
}
|
||||
|
||||
func newTestGGUFBuilder() *testGGUFBuilder {
|
||||
return &testGGUFBuilder{
|
||||
buf: new(bytes.Buffer),
|
||||
version: 3,
|
||||
tensorCount: 100,
|
||||
kvPairs: []testKVPair{},
|
||||
}
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withVersion(v uint32) *testGGUFBuilder {
|
||||
b.version = v
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withTensorCount(count uint64) *testGGUFBuilder {
|
||||
b.tensorCount = count
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withStringKV(key, value string) *testGGUFBuilder {
|
||||
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeString, value: value})
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withUint64KV(key string, value uint64) *testGGUFBuilder {
|
||||
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint64, value: value})
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withUint32KV(key string, value uint32) *testGGUFBuilder {
|
||||
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint32, value: value})
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) writeString(s string) {
|
||||
binary.Write(b.buf, binary.LittleEndian, uint64(len(s)))
|
||||
b.buf.WriteString(s)
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) build() []byte {
|
||||
// Write magic number "GGUF"
|
||||
binary.Write(b.buf, binary.LittleEndian, uint32(ggufMagic))
|
||||
|
||||
// Write version
|
||||
binary.Write(b.buf, binary.LittleEndian, b.version)
|
||||
|
||||
// Write tensor count
|
||||
binary.Write(b.buf, binary.LittleEndian, b.tensorCount)
|
||||
|
||||
// Write KV count
|
||||
binary.Write(b.buf, binary.LittleEndian, uint64(len(b.kvPairs)))
|
||||
|
||||
// Write KV pairs
|
||||
for _, kv := range b.kvPairs {
|
||||
// Write key
|
||||
b.writeString(kv.key)
|
||||
// Write value type
|
||||
binary.Write(b.buf, binary.LittleEndian, kv.valueType)
|
||||
// Write value based on type
|
||||
switch kv.valueType {
|
||||
case ggufTypeString:
|
||||
b.writeString(kv.value.(string))
|
||||
case ggufTypeUint32:
|
||||
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint32))
|
||||
case ggufTypeUint64:
|
||||
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint64))
|
||||
case ggufTypeUint8:
|
||||
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint8))
|
||||
case ggufTypeInt32:
|
||||
binary.Write(b.buf, binary.LittleEndian, kv.value.(int32))
|
||||
case ggufTypeBool:
|
||||
var v uint8
|
||||
if kv.value.(bool) {
|
||||
v = 1
|
||||
}
|
||||
binary.Write(b.buf, binary.LittleEndian, v)
|
||||
}
|
||||
}
|
||||
|
||||
return b.buf.Bytes()
|
||||
}
|
||||
|
||||
// buildInvalidMagic creates a file with invalid magic number
|
||||
func (b *testGGUFBuilder) buildInvalidMagic() []byte {
|
||||
buf := new(bytes.Buffer)
|
||||
binary.Write(buf, binary.LittleEndian, uint32(0x12345678))
|
||||
return buf.Bytes()
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user