mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 16:33:21 +01:00
test: migrate gguf tests over
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
parent
746f00ad68
commit
165611d2e4
385
syft/pkg/cataloger/aiartifact/cataloger_test.go
Normal file
385
syft/pkg/cataloger/aiartifact/cataloger_test.go
Normal file
@ -0,0 +1,385 @@
|
|||||||
|
package aiartifact
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/google/go-cmp/cmp/cmpopts"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/syft/artifact"
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGGUFCataloger_Globs(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
setup func(t *testing.T) string // returns fixture directory
|
||||||
|
expected []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "finds GGUF files in root",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
createTestGGUFInDir(t, dir, "model1.gguf")
|
||||||
|
createTestGGUFInDir(t, dir, "model2.gguf")
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expected: []string{
|
||||||
|
"model1.gguf",
|
||||||
|
"model2.gguf",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "finds GGUF files in subdirectories",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
modelsDir := filepath.Join(dir, "models")
|
||||||
|
os.MkdirAll(modelsDir, 0755)
|
||||||
|
createTestGGUFInDir(t, modelsDir, "llama.gguf")
|
||||||
|
|
||||||
|
deepDir := filepath.Join(dir, "deep", "nested", "path")
|
||||||
|
os.MkdirAll(deepDir, 0755)
|
||||||
|
createTestGGUFInDir(t, deepDir, "mistral.gguf")
|
||||||
|
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expected: []string{
|
||||||
|
"models/llama.gguf",
|
||||||
|
"deep/nested/path/mistral.gguf",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ignores non-GGUF files",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
createTestGGUFInDir(t, dir, "model.gguf")
|
||||||
|
|
||||||
|
// Create non-GGUF files
|
||||||
|
os.WriteFile(filepath.Join(dir, "readme.txt"), []byte("readme"), 0644)
|
||||||
|
os.WriteFile(filepath.Join(dir, "model.bin"), []byte("binary"), 0644)
|
||||||
|
os.WriteFile(filepath.Join(dir, "config.json"), []byte("{}"), 0644)
|
||||||
|
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expected: []string{
|
||||||
|
"model.gguf",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
fixtureDir := tt.setup(t)
|
||||||
|
|
||||||
|
tester := pkgtest.NewCatalogTester().
|
||||||
|
FromDirectory(t, fixtureDir)
|
||||||
|
|
||||||
|
if len(tt.expected) > 0 {
|
||||||
|
tester.ExpectsResolverContentQueries(tt.expected)
|
||||||
|
}
|
||||||
|
|
||||||
|
tester.TestCataloger(t, NewGGUFCataloger())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGGUFCataloger_Integration(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
setup func(t *testing.T) string
|
||||||
|
expectedPackages []pkg.Package
|
||||||
|
expectedRelationships []artifact.Relationship
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "catalog single GGUF file",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
data := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withTensorCount(291).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "llama3-8b").
|
||||||
|
withStringKV("general.version", "3.0").
|
||||||
|
withStringKV("general.license", "Apache-2.0").
|
||||||
|
withStringKV("general.quantization", "Q4_K_M").
|
||||||
|
withUint64KV("general.parameter_count", 8030000000).
|
||||||
|
build()
|
||||||
|
|
||||||
|
path := filepath.Join(dir, "llama3-8b.gguf")
|
||||||
|
os.WriteFile(path, data, 0644)
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expectedPackages: []pkg.Package{
|
||||||
|
{
|
||||||
|
Name: "llama3-8b",
|
||||||
|
Version: "3.0",
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Licenses: pkg.NewLicenseSet(
|
||||||
|
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
|
||||||
|
),
|
||||||
|
Metadata: pkg.GGUFFileMetadata{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "llama3-8b",
|
||||||
|
ModelVersion: "3.0",
|
||||||
|
License: "Apache-2.0",
|
||||||
|
Architecture: "llama",
|
||||||
|
Quantization: "Q4_K_M",
|
||||||
|
Parameters: 8030000000,
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 291,
|
||||||
|
Header: map[string]interface{}{},
|
||||||
|
TruncatedHeader: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedRelationships: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "catalog multiple GGUF files",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
// Create first model
|
||||||
|
data1 := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withTensorCount(100).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "model1").
|
||||||
|
withStringKV("general.version", "1.0").
|
||||||
|
build()
|
||||||
|
os.WriteFile(filepath.Join(dir, "model1.gguf"), data1, 0644)
|
||||||
|
|
||||||
|
// Create second model
|
||||||
|
data2 := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withTensorCount(200).
|
||||||
|
withStringKV("general.architecture", "mistral").
|
||||||
|
withStringKV("general.name", "model2").
|
||||||
|
withStringKV("general.version", "2.0").
|
||||||
|
build()
|
||||||
|
os.WriteFile(filepath.Join(dir, "model2.gguf"), data2, 0644)
|
||||||
|
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expectedPackages: []pkg.Package{
|
||||||
|
{
|
||||||
|
Name: "model1",
|
||||||
|
Version: "1.0",
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Metadata: pkg.GGUFFileMetadata{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "model1",
|
||||||
|
ModelVersion: "1.0",
|
||||||
|
Architecture: "llama",
|
||||||
|
Quantization: unkownGGUFData,
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 100,
|
||||||
|
Header: map[string]interface{}{},
|
||||||
|
TruncatedHeader: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "model2",
|
||||||
|
Version: "2.0",
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Metadata: pkg.GGUFFileMetadata{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "model2",
|
||||||
|
ModelVersion: "2.0",
|
||||||
|
Architecture: "mistral",
|
||||||
|
Quantization: unkownGGUFData,
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 200,
|
||||||
|
Header: map[string]interface{}{},
|
||||||
|
TruncatedHeader: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedRelationships: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "catalog GGUF in nested directories",
|
||||||
|
setup: func(t *testing.T) string {
|
||||||
|
dir := t.TempDir()
|
||||||
|
nestedDir := filepath.Join(dir, "models", "quantized")
|
||||||
|
os.MkdirAll(nestedDir, 0755)
|
||||||
|
|
||||||
|
data := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withTensorCount(150).
|
||||||
|
withStringKV("general.architecture", "qwen").
|
||||||
|
withStringKV("general.name", "qwen-nested").
|
||||||
|
build()
|
||||||
|
|
||||||
|
os.WriteFile(filepath.Join(nestedDir, "qwen.gguf"), data, 0644)
|
||||||
|
return dir
|
||||||
|
},
|
||||||
|
expectedPackages: []pkg.Package{
|
||||||
|
{
|
||||||
|
Name: "qwen-nested",
|
||||||
|
Version: unkownGGUFData,
|
||||||
|
Type: pkg.ModelPkg,
|
||||||
|
Metadata: pkg.GGUFFileMetadata{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "qwen-nested",
|
||||||
|
ModelVersion: unkownGGUFData,
|
||||||
|
Architecture: "qwen",
|
||||||
|
Quantization: unkownGGUFData,
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 150,
|
||||||
|
Header: map[string]interface{}{},
|
||||||
|
TruncatedHeader: false,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedRelationships: nil,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
fixtureDir := tt.setup(t)
|
||||||
|
|
||||||
|
// Use pkgtest to catalog and compare
|
||||||
|
tester := pkgtest.NewCatalogTester().
|
||||||
|
FromDirectory(t, fixtureDir).
|
||||||
|
Expects(tt.expectedPackages, tt.expectedRelationships).
|
||||||
|
IgnoreLocationLayer().
|
||||||
|
IgnorePackageFields("FoundBy", "Locations"). // These are set by the cataloger
|
||||||
|
WithCompareOptions(
|
||||||
|
// Ignore Hash as it's computed dynamically
|
||||||
|
cmpopts.IgnoreFields(pkg.GGUFFileMetadata{}, "Hash"),
|
||||||
|
)
|
||||||
|
|
||||||
|
tester.TestCataloger(t, NewGGUFCataloger())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGGUFCataloger_SkipsInvalidFiles(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
// Create a valid GGUF
|
||||||
|
validData := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withTensorCount(100).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "valid-model").
|
||||||
|
build()
|
||||||
|
os.WriteFile(filepath.Join(dir, "valid.gguf"), validData, 0644)
|
||||||
|
|
||||||
|
// Create an invalid GGUF (wrong magic)
|
||||||
|
invalidData := newTestGGUFBuilder().buildInvalidMagic()
|
||||||
|
os.WriteFile(filepath.Join(dir, "invalid.gguf"), invalidData, 0644)
|
||||||
|
|
||||||
|
// Create a truncated GGUF
|
||||||
|
os.WriteFile(filepath.Join(dir, "truncated.gguf"), []byte{0x47}, 0644)
|
||||||
|
|
||||||
|
// Catalog should succeed and only return the valid package
|
||||||
|
tester := pkgtest.NewCatalogTester().
|
||||||
|
FromDirectory(t, dir).
|
||||||
|
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
|
||||||
|
// Should only find the valid model
|
||||||
|
require.Len(t, pkgs, 1)
|
||||||
|
assert.Equal(t, "valid-model", pkgs[0].Name)
|
||||||
|
})
|
||||||
|
|
||||||
|
tester.TestCataloger(t, NewGGUFCataloger())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGGUFCataloger_Name(t *testing.T) {
|
||||||
|
cataloger := NewGGUFCataloger()
|
||||||
|
assert.Equal(t, "gguf-cataloger", cataloger.Name())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGGUFCataloger_EmptyDirectory(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
// Create a subdirectory to ensure glob still runs
|
||||||
|
os.MkdirAll(filepath.Join(dir, "models"), 0755)
|
||||||
|
|
||||||
|
tester := pkgtest.NewCatalogTester().
|
||||||
|
FromDirectory(t, dir).
|
||||||
|
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, rels []artifact.Relationship) {
|
||||||
|
assert.Empty(t, pkgs)
|
||||||
|
assert.Empty(t, rels)
|
||||||
|
})
|
||||||
|
|
||||||
|
tester.TestCataloger(t, NewGGUFCataloger())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGGUFCataloger_MixedFiles(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
// Create GGUF file
|
||||||
|
ggufData := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withTensorCount(100).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "test-model").
|
||||||
|
build()
|
||||||
|
os.WriteFile(filepath.Join(dir, "model.gguf"), ggufData, 0644)
|
||||||
|
|
||||||
|
// Create other file types
|
||||||
|
os.WriteFile(filepath.Join(dir, "README.md"), []byte("# Models"), 0644)
|
||||||
|
os.WriteFile(filepath.Join(dir, "config.json"), []byte("{}"), 0644)
|
||||||
|
os.WriteFile(filepath.Join(dir, "weights.bin"), []byte("weights"), 0644)
|
||||||
|
os.MkdirAll(filepath.Join(dir, "subdir"), 0755)
|
||||||
|
|
||||||
|
tester := pkgtest.NewCatalogTester().
|
||||||
|
FromDirectory(t, dir).
|
||||||
|
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
|
||||||
|
// Should only find the GGUF model
|
||||||
|
require.Len(t, pkgs, 1)
|
||||||
|
assert.Equal(t, "test-model", pkgs[0].Name)
|
||||||
|
assert.Equal(t, pkg.ModelPkg, pkgs[0].Type)
|
||||||
|
})
|
||||||
|
|
||||||
|
tester.TestCataloger(t, NewGGUFCataloger())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGGUFCataloger_CaseInsensitiveGlob(t *testing.T) {
|
||||||
|
// Test that the glob pattern is case-sensitive (as expected for **/*.gguf)
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
// Create lowercase .gguf
|
||||||
|
data := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withTensorCount(100).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "lowercase").
|
||||||
|
build()
|
||||||
|
os.WriteFile(filepath.Join(dir, "model.gguf"), data, 0644)
|
||||||
|
|
||||||
|
// Create uppercase .GGUF (should not match the glob)
|
||||||
|
os.WriteFile(filepath.Join(dir, "MODEL.GGUF"), data, 0644)
|
||||||
|
|
||||||
|
tester := pkgtest.NewCatalogTester().
|
||||||
|
FromDirectory(t, dir).
|
||||||
|
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
|
||||||
|
// Depending on filesystem case-sensitivity, we may get 1 or 2 packages
|
||||||
|
// On case-insensitive filesystems (macOS), both might match
|
||||||
|
// On case-sensitive filesystems (Linux), only lowercase matches
|
||||||
|
assert.GreaterOrEqual(t, len(pkgs), 1, "should find at least the lowercase file")
|
||||||
|
})
|
||||||
|
|
||||||
|
tester.TestCataloger(t, NewGGUFCataloger())
|
||||||
|
}
|
||||||
|
|
||||||
|
// createTestGGUFInDir creates a minimal test GGUF file in the specified directory
|
||||||
|
func createTestGGUFInDir(t *testing.T, dir, filename string) {
|
||||||
|
t.Helper()
|
||||||
|
data := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withTensorCount(100).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "test-model").
|
||||||
|
build()
|
||||||
|
|
||||||
|
path := filepath.Join(dir, filename)
|
||||||
|
err := os.WriteFile(path, data, 0644)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
@ -1,9 +1,6 @@
|
|||||||
package aiartifact
|
package aiartifact
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/anchore/packageurl-go"
|
|
||||||
"github.com/anchore/syft/syft/file"
|
"github.com/anchore/syft/syft/file"
|
||||||
"github.com/anchore/syft/syft/pkg"
|
"github.com/anchore/syft/syft/pkg"
|
||||||
)
|
)
|
||||||
@ -12,11 +9,12 @@ func newGGUFPackage(metadata *pkg.GGUFFileMetadata, locations ...file.Location)
|
|||||||
p := pkg.Package{
|
p := pkg.Package{
|
||||||
Name: metadata.ModelName,
|
Name: metadata.ModelName,
|
||||||
Version: metadata.ModelVersion,
|
Version: metadata.ModelVersion,
|
||||||
PURL: packageURL(metadata),
|
|
||||||
Locations: file.NewLocationSet(locations...),
|
Locations: file.NewLocationSet(locations...),
|
||||||
Type: pkg.ModelPkg,
|
Type: pkg.ModelPkg,
|
||||||
Licenses: pkg.NewLicenseSet(),
|
Licenses: pkg.NewLicenseSet(),
|
||||||
Metadata: *metadata,
|
Metadata: *metadata,
|
||||||
|
// NOTE: PURL is intentionally not set as the package-url spec
|
||||||
|
// has not yet finalized support for ML model packages
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add license to the package if present in metadata
|
// Add license to the package if present in metadata
|
||||||
@ -28,41 +26,3 @@ func newGGUFPackage(metadata *pkg.GGUFFileMetadata, locations ...file.Location)
|
|||||||
|
|
||||||
return p
|
return p
|
||||||
}
|
}
|
||||||
|
|
||||||
// packageURL returns the PURL for the specific GGUF model package (see https://github.com/package-url/purl-spec)
|
|
||||||
func packageURL(metadata *pkg.GGUFFileMetadata) string {
|
|
||||||
var qualifiers packageurl.Qualifiers
|
|
||||||
|
|
||||||
// Add model-specific qualifiers
|
|
||||||
if metadata.Architecture != "" {
|
|
||||||
qualifiers = append(qualifiers, packageurl.Qualifier{
|
|
||||||
Key: "arch",
|
|
||||||
Value: metadata.Architecture,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
if metadata.Quantization != "" && metadata.Quantization != "unknown" {
|
|
||||||
qualifiers = append(qualifiers, packageurl.Qualifier{
|
|
||||||
Key: "quantization",
|
|
||||||
Value: metadata.Quantization,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
if metadata.Parameters > 0 {
|
|
||||||
qualifiers = append(qualifiers, packageurl.Qualifier{
|
|
||||||
Key: "parameters",
|
|
||||||
Value: fmt.Sprintf("%d", metadata.Parameters),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use mlmodel as the type for machine learning models in GGUF format
|
|
||||||
// This follows the PURL spec guidance for ML models
|
|
||||||
return packageurl.NewPackageURL(
|
|
||||||
"mlmodel",
|
|
||||||
"gguf",
|
|
||||||
metadata.ModelName,
|
|
||||||
metadata.ModelVersion,
|
|
||||||
qualifiers,
|
|
||||||
"",
|
|
||||||
).ToString()
|
|
||||||
}
|
|
||||||
|
|||||||
185
syft/pkg/cataloger/aiartifact/package_test.go
Normal file
185
syft/pkg/cataloger/aiartifact/package_test.go
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
package aiartifact
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNewGGUFPackage(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
metadata *pkg.GGUFFileMetadata
|
||||||
|
locations []file.Location
|
||||||
|
checkFunc func(t *testing.T, p pkg.Package)
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "complete GGUF package with all fields",
|
||||||
|
metadata: &pkg.GGUFFileMetadata{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "llama3-8b-instruct",
|
||||||
|
ModelVersion: "3.0",
|
||||||
|
License: "Apache-2.0",
|
||||||
|
Architecture: "llama",
|
||||||
|
Quantization: "Q4_K_M",
|
||||||
|
Parameters: 8030000000,
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 291,
|
||||||
|
Header: map[string]interface{}{},
|
||||||
|
TruncatedHeader: false,
|
||||||
|
},
|
||||||
|
locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
|
||||||
|
checkFunc: func(t *testing.T, p pkg.Package) {
|
||||||
|
assert.Equal(t, "llama3-8b-instruct", p.Name)
|
||||||
|
assert.Equal(t, "3.0", p.Version)
|
||||||
|
assert.Equal(t, pkg.ModelPkg, p.Type)
|
||||||
|
assert.Empty(t, p.PURL, "PURL should not be set for model packages")
|
||||||
|
assert.Len(t, p.Licenses.ToSlice(), 1)
|
||||||
|
assert.Equal(t, "Apache-2.0", p.Licenses.ToSlice()[0].Value)
|
||||||
|
assert.NotEmpty(t, p.ID())
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "minimal GGUF package",
|
||||||
|
metadata: &pkg.GGUFFileMetadata{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "simple-model",
|
||||||
|
ModelVersion: "1.0",
|
||||||
|
Architecture: "gpt2",
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 50,
|
||||||
|
},
|
||||||
|
locations: []file.Location{file.NewLocation("/models/simple.gguf")},
|
||||||
|
checkFunc: func(t *testing.T, p pkg.Package) {
|
||||||
|
assert.Equal(t, "simple-model", p.Name)
|
||||||
|
assert.Equal(t, "1.0", p.Version)
|
||||||
|
assert.Equal(t, pkg.ModelPkg, p.Type)
|
||||||
|
assert.Empty(t, p.PURL, "PURL should not be set for model packages")
|
||||||
|
assert.Empty(t, p.Licenses.ToSlice())
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "GGUF package with multiple locations",
|
||||||
|
metadata: &pkg.GGUFFileMetadata{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "multi-location-model",
|
||||||
|
ModelVersion: "1.5",
|
||||||
|
Architecture: "llama",
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 150,
|
||||||
|
},
|
||||||
|
locations: []file.Location{
|
||||||
|
file.NewLocation("/models/model1.gguf"),
|
||||||
|
file.NewLocation("/models/model2.gguf"),
|
||||||
|
},
|
||||||
|
checkFunc: func(t *testing.T, p pkg.Package) {
|
||||||
|
assert.Len(t, p.Locations.ToSlice(), 2)
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
p := newGGUFPackage(tt.metadata, tt.locations...)
|
||||||
|
|
||||||
|
assert.Equal(t, tt.metadata.ModelName, p.Name)
|
||||||
|
assert.Equal(t, tt.metadata.ModelVersion, p.Version)
|
||||||
|
assert.Equal(t, pkg.ModelPkg, p.Type)
|
||||||
|
|
||||||
|
// Verify metadata is attached
|
||||||
|
metadata, ok := p.Metadata.(pkg.GGUFFileMetadata)
|
||||||
|
require.True(t, ok, "metadata should be GGUFFileMetadata")
|
||||||
|
assert.Equal(t, *tt.metadata, metadata)
|
||||||
|
|
||||||
|
if tt.checkFunc != nil {
|
||||||
|
tt.checkFunc(t, p)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewGGUFPackage_IDUniqueness(t *testing.T) {
|
||||||
|
// Test that different packages get different IDs
|
||||||
|
metadata1 := &pkg.GGUFFileMetadata{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "model-1",
|
||||||
|
ModelVersion: "1.0",
|
||||||
|
Architecture: "llama",
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 100,
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata2 := &pkg.GGUFFileMetadata{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "model-2",
|
||||||
|
ModelVersion: "1.0",
|
||||||
|
Architecture: "llama",
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 100,
|
||||||
|
}
|
||||||
|
|
||||||
|
loc := file.NewLocation("/models/test.gguf")
|
||||||
|
p1 := newGGUFPackage(metadata1, loc)
|
||||||
|
p2 := newGGUFPackage(metadata2, loc)
|
||||||
|
|
||||||
|
assert.NotEqual(t, p1.ID(), p2.ID(), "different packages should have different IDs")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewGGUFPackage_IDConsistency(t *testing.T) {
|
||||||
|
// Test that same metadata produces same ID
|
||||||
|
metadata := &pkg.GGUFFileMetadata{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "test-model",
|
||||||
|
ModelVersion: "1.0",
|
||||||
|
Architecture: "llama",
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 100,
|
||||||
|
}
|
||||||
|
|
||||||
|
loc := file.NewLocation("/models/test.gguf")
|
||||||
|
p1 := newGGUFPackage(metadata, loc)
|
||||||
|
p2 := newGGUFPackage(metadata, loc)
|
||||||
|
|
||||||
|
assert.Equal(t, p1.ID(), p2.ID(), "identical packages should have identical IDs")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewGGUFPackage_MetadataPreservation(t *testing.T) {
|
||||||
|
// Ensure all metadata fields are preserved in the package
|
||||||
|
metadata := &pkg.GGUFFileMetadata{
|
||||||
|
ModelFormat: "gguf",
|
||||||
|
ModelName: "preservation-test",
|
||||||
|
ModelVersion: "2.0",
|
||||||
|
License: "MIT",
|
||||||
|
Architecture: "llama",
|
||||||
|
Quantization: "Q4_K_M",
|
||||||
|
Parameters: 7000000000,
|
||||||
|
GGUFVersion: 3,
|
||||||
|
TensorCount: 219,
|
||||||
|
Hash: "abc123",
|
||||||
|
Header: map[string]interface{}{"custom.field": "value"},
|
||||||
|
TruncatedHeader: false,
|
||||||
|
}
|
||||||
|
|
||||||
|
loc := file.NewLocation("/models/test.gguf")
|
||||||
|
p := newGGUFPackage(metadata, loc)
|
||||||
|
|
||||||
|
extractedMetadata, ok := p.Metadata.(pkg.GGUFFileMetadata)
|
||||||
|
require.True(t, ok)
|
||||||
|
|
||||||
|
assert.Equal(t, metadata.ModelFormat, extractedMetadata.ModelFormat)
|
||||||
|
assert.Equal(t, metadata.ModelName, extractedMetadata.ModelName)
|
||||||
|
assert.Equal(t, metadata.ModelVersion, extractedMetadata.ModelVersion)
|
||||||
|
assert.Equal(t, metadata.License, extractedMetadata.License)
|
||||||
|
assert.Equal(t, metadata.Architecture, extractedMetadata.Architecture)
|
||||||
|
assert.Equal(t, metadata.Quantization, extractedMetadata.Quantization)
|
||||||
|
assert.Equal(t, metadata.Parameters, extractedMetadata.Parameters)
|
||||||
|
assert.Equal(t, metadata.GGUFVersion, extractedMetadata.GGUFVersion)
|
||||||
|
assert.Equal(t, metadata.TensorCount, extractedMetadata.TensorCount)
|
||||||
|
assert.Equal(t, metadata.Hash, extractedMetadata.Hash)
|
||||||
|
assert.Equal(t, metadata.TruncatedHeader, extractedMetadata.TruncatedHeader)
|
||||||
|
assert.Equal(t, metadata.Header, extractedMetadata.Header)
|
||||||
|
}
|
||||||
1164
syft/pkg/cataloger/aiartifact/parse_gguf_test.go
Normal file
1164
syft/pkg/cataloger/aiartifact/parse_gguf_test.go
Normal file
File diff suppressed because it is too large
Load Diff
109
syft/pkg/cataloger/aiartifact/test_helpers_test.go
Normal file
109
syft/pkg/cataloger/aiartifact/test_helpers_test.go
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
package aiartifact
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/binary"
|
||||||
|
)
|
||||||
|
|
||||||
|
// testGGUFBuilder helps build GGUF files for testing
|
||||||
|
type testGGUFBuilder struct {
|
||||||
|
buf *bytes.Buffer
|
||||||
|
version uint32
|
||||||
|
tensorCount uint64
|
||||||
|
kvPairs []testKVPair
|
||||||
|
}
|
||||||
|
|
||||||
|
type testKVPair struct {
|
||||||
|
key string
|
||||||
|
valueType uint32
|
||||||
|
value interface{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTestGGUFBuilder() *testGGUFBuilder {
|
||||||
|
return &testGGUFBuilder{
|
||||||
|
buf: new(bytes.Buffer),
|
||||||
|
version: 3,
|
||||||
|
tensorCount: 100,
|
||||||
|
kvPairs: []testKVPair{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) withVersion(v uint32) *testGGUFBuilder {
|
||||||
|
b.version = v
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) withTensorCount(count uint64) *testGGUFBuilder {
|
||||||
|
b.tensorCount = count
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) withStringKV(key, value string) *testGGUFBuilder {
|
||||||
|
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeString, value: value})
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) withUint64KV(key string, value uint64) *testGGUFBuilder {
|
||||||
|
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint64, value: value})
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) withUint32KV(key string, value uint32) *testGGUFBuilder {
|
||||||
|
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint32, value: value})
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) writeString(s string) {
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, uint64(len(s)))
|
||||||
|
b.buf.WriteString(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *testGGUFBuilder) build() []byte {
|
||||||
|
// Write magic number "GGUF"
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, uint32(ggufMagic))
|
||||||
|
|
||||||
|
// Write version
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, b.version)
|
||||||
|
|
||||||
|
// Write tensor count
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, b.tensorCount)
|
||||||
|
|
||||||
|
// Write KV count
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, uint64(len(b.kvPairs)))
|
||||||
|
|
||||||
|
// Write KV pairs
|
||||||
|
for _, kv := range b.kvPairs {
|
||||||
|
// Write key
|
||||||
|
b.writeString(kv.key)
|
||||||
|
// Write value type
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, kv.valueType)
|
||||||
|
// Write value based on type
|
||||||
|
switch kv.valueType {
|
||||||
|
case ggufTypeString:
|
||||||
|
b.writeString(kv.value.(string))
|
||||||
|
case ggufTypeUint32:
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint32))
|
||||||
|
case ggufTypeUint64:
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint64))
|
||||||
|
case ggufTypeUint8:
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint8))
|
||||||
|
case ggufTypeInt32:
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, kv.value.(int32))
|
||||||
|
case ggufTypeBool:
|
||||||
|
var v uint8
|
||||||
|
if kv.value.(bool) {
|
||||||
|
v = 1
|
||||||
|
}
|
||||||
|
binary.Write(b.buf, binary.LittleEndian, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return b.buf.Bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildInvalidMagic creates a file with invalid magic number
|
||||||
|
func (b *testGGUFBuilder) buildInvalidMagic() []byte {
|
||||||
|
buf := new(bytes.Buffer)
|
||||||
|
binary.Write(buf, binary.LittleEndian, uint32(0x12345678))
|
||||||
|
return buf.Bytes()
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user