mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 08:23:15 +01:00
1165 lines
29 KiB
Go
1165 lines
29 KiB
Go
package aiartifact
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/binary"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/anchore/syft/syft/artifact"
|
|
"github.com/anchore/syft/syft/file"
|
|
"github.com/anchore/syft/syft/pkg"
|
|
)
|
|
|
|
func TestParseGGUFHeader(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
buildData func() []byte
|
|
wantMeta *pkg.GGUFFileMetadata
|
|
wantErr bool
|
|
}{
|
|
{
|
|
name: "standard GGUF with all fields",
|
|
buildData: func() []byte {
|
|
return newTestGGUFBuilder().
|
|
withVersion(3).
|
|
withTensorCount(291).
|
|
withStringKV("general.architecture", "llama").
|
|
withStringKV("general.name", "llama3-8b-instruct").
|
|
withStringKV("general.version", "3.0").
|
|
withStringKV("general.license", "Apache-2.0").
|
|
withStringKV("general.quantization", "Q4_K_M").
|
|
withUint64KV("general.parameter_count", 8030000000).
|
|
build()
|
|
},
|
|
wantMeta: &pkg.GGUFFileMetadata{
|
|
ModelFormat: "gguf",
|
|
ModelName: "llama3-8b-instruct",
|
|
ModelVersion: "3.0",
|
|
License: "Apache-2.0",
|
|
Architecture: "llama",
|
|
Quantization: "Q4_K_M",
|
|
Parameters: 8030000000,
|
|
GGUFVersion: 3,
|
|
TensorCount: 291,
|
|
Header: map[string]any{},
|
|
TruncatedHeader: false,
|
|
},
|
|
},
|
|
{
|
|
name: "minimal GGUF with only architecture",
|
|
buildData: func() []byte {
|
|
return newTestGGUFBuilder().
|
|
withVersion(3).
|
|
withTensorCount(100).
|
|
withStringKV("general.architecture", "qwen").
|
|
withStringKV("general.name", "qwen2-1.5b").
|
|
build()
|
|
},
|
|
wantMeta: &pkg.GGUFFileMetadata{
|
|
ModelFormat: "gguf",
|
|
ModelName: "qwen2-1.5b",
|
|
ModelVersion: unkownGGUFData,
|
|
Architecture: "qwen",
|
|
Quantization: unkownGGUFData,
|
|
GGUFVersion: 3,
|
|
TensorCount: 100,
|
|
Header: map[string]any{},
|
|
TruncatedHeader: false,
|
|
},
|
|
},
|
|
{
|
|
name: "GGUF v2 (older version)",
|
|
buildData: func() []byte {
|
|
return newTestGGUFBuilder().
|
|
withVersion(2).
|
|
withTensorCount(50).
|
|
withStringKV("general.architecture", "gpt2").
|
|
withStringKV("general.name", "gpt2-small").
|
|
build()
|
|
},
|
|
wantMeta: &pkg.GGUFFileMetadata{
|
|
ModelFormat: "gguf",
|
|
ModelName: "gpt2-small",
|
|
ModelVersion: unkownGGUFData,
|
|
Architecture: "gpt2",
|
|
Quantization: unkownGGUFData,
|
|
GGUFVersion: 2,
|
|
TensorCount: 50,
|
|
Header: map[string]any{},
|
|
TruncatedHeader: false,
|
|
},
|
|
},
|
|
{
|
|
name: "GGUF without general.name falls back to location",
|
|
buildData: func() []byte {
|
|
return newTestGGUFBuilder().
|
|
withVersion(3).
|
|
withTensorCount(150).
|
|
withStringKV("general.architecture", "llama").
|
|
withStringKV("general.license", "MIT").
|
|
build()
|
|
},
|
|
wantMeta: &pkg.GGUFFileMetadata{
|
|
ModelFormat: "gguf",
|
|
ModelName: "test-model", // will be extracted from location
|
|
ModelVersion: unkownGGUFData,
|
|
Architecture: "llama",
|
|
License: "MIT",
|
|
Quantization: unkownGGUFData,
|
|
GGUFVersion: 3,
|
|
TensorCount: 150,
|
|
Header: map[string]any{},
|
|
TruncatedHeader: false,
|
|
},
|
|
},
|
|
{
|
|
name: "GGUF with extra metadata fields in header",
|
|
buildData: func() []byte {
|
|
return newTestGGUFBuilder().
|
|
withVersion(3).
|
|
withTensorCount(200).
|
|
withStringKV("general.architecture", "mistral").
|
|
withStringKV("general.name", "mistral-7b").
|
|
withStringKV("llama.attention.head_count", "32").
|
|
withStringKV("llama.embedding_length", "4096").
|
|
build()
|
|
},
|
|
wantMeta: &pkg.GGUFFileMetadata{
|
|
ModelFormat: "gguf",
|
|
ModelName: "mistral-7b",
|
|
ModelVersion: unkownGGUFData,
|
|
Architecture: "mistral",
|
|
Quantization: unkownGGUFData,
|
|
GGUFVersion: 3,
|
|
TensorCount: 200,
|
|
Header: map[string]any{
|
|
"llama.attention.head_count": "32",
|
|
"llama.embedding_length": "4096",
|
|
},
|
|
TruncatedHeader: false,
|
|
},
|
|
},
|
|
{
|
|
name: "invalid magic number",
|
|
buildData: func() []byte {
|
|
return newTestGGUFBuilder().buildInvalidMagic()
|
|
},
|
|
wantErr: true,
|
|
},
|
|
{
|
|
name: "truncated file (too small)",
|
|
buildData: func() []byte {
|
|
return []byte{0x47, 0x47}
|
|
},
|
|
wantErr: true,
|
|
},
|
|
{
|
|
name: "empty file",
|
|
buildData: func() []byte {
|
|
return []byte{}
|
|
},
|
|
wantErr: true,
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
data := tt.buildData()
|
|
got, err := parseGGUFHeader(data, "/path/to/test-model.gguf")
|
|
|
|
if tt.wantErr {
|
|
require.Error(t, err)
|
|
return
|
|
}
|
|
|
|
require.NoError(t, err)
|
|
require.NotNil(t, got)
|
|
|
|
// Don't compare Hash as it's computed
|
|
assert.Equal(t, tt.wantMeta.ModelFormat, got.ModelFormat)
|
|
assert.Equal(t, tt.wantMeta.ModelVersion, got.ModelVersion)
|
|
assert.Equal(t, tt.wantMeta.Architecture, got.Architecture)
|
|
assert.Equal(t, tt.wantMeta.Quantization, got.Quantization)
|
|
assert.Equal(t, tt.wantMeta.GGUFVersion, got.GGUFVersion)
|
|
assert.Equal(t, tt.wantMeta.TensorCount, got.TensorCount)
|
|
assert.Equal(t, tt.wantMeta.Parameters, got.Parameters)
|
|
assert.Equal(t, tt.wantMeta.License, got.License)
|
|
assert.Equal(t, tt.wantMeta.TruncatedHeader, got.TruncatedHeader)
|
|
|
|
// For the case without general.name, check that filename was used
|
|
if tt.name == "GGUF without general.name falls back to location" {
|
|
assert.Equal(t, "test-model", got.ModelName)
|
|
} else if tt.wantMeta.ModelName != "" {
|
|
assert.Equal(t, tt.wantMeta.ModelName, got.ModelName)
|
|
}
|
|
|
|
// Check Header map
|
|
for k, v := range tt.wantMeta.Header {
|
|
assert.Equal(t, v, got.Header[k], "Header key %s mismatch", k)
|
|
}
|
|
|
|
// Hash should be computed
|
|
if !tt.wantErr {
|
|
assert.NotEmpty(t, got.Hash)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestReadValue(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
valueType uint32
|
|
buildData func() []byte
|
|
want interface{}
|
|
wantErr bool
|
|
}{
|
|
{
|
|
name: "uint8",
|
|
valueType: ggufTypeUint8,
|
|
buildData: func() []byte {
|
|
buf := new(bytes.Buffer)
|
|
binary.Write(buf, binary.LittleEndian, uint8(42))
|
|
return buf.Bytes()
|
|
},
|
|
want: uint8(42),
|
|
},
|
|
{
|
|
name: "int8",
|
|
valueType: ggufTypeInt8,
|
|
buildData: func() []byte {
|
|
buf := new(bytes.Buffer)
|
|
binary.Write(buf, binary.LittleEndian, int8(-42))
|
|
return buf.Bytes()
|
|
},
|
|
want: int8(-42),
|
|
},
|
|
{
|
|
name: "uint32",
|
|
valueType: ggufTypeUint32,
|
|
buildData: func() []byte {
|
|
buf := new(bytes.Buffer)
|
|
binary.Write(buf, binary.LittleEndian, uint32(12345))
|
|
return buf.Bytes()
|
|
},
|
|
want: uint32(12345),
|
|
},
|
|
{
|
|
name: "uint64",
|
|
valueType: ggufTypeUint64,
|
|
buildData: func() []byte {
|
|
buf := new(bytes.Buffer)
|
|
binary.Write(buf, binary.LittleEndian, uint64(9876543210))
|
|
return buf.Bytes()
|
|
},
|
|
want: uint64(9876543210),
|
|
},
|
|
{
|
|
name: "float32",
|
|
valueType: ggufTypeFloat32,
|
|
buildData: func() []byte {
|
|
buf := new(bytes.Buffer)
|
|
binary.Write(buf, binary.LittleEndian, float32(3.14159))
|
|
return buf.Bytes()
|
|
},
|
|
want: float32(3.14159),
|
|
},
|
|
{
|
|
name: "bool true",
|
|
valueType: ggufTypeBool,
|
|
buildData: func() []byte {
|
|
buf := new(bytes.Buffer)
|
|
binary.Write(buf, binary.LittleEndian, uint8(1))
|
|
return buf.Bytes()
|
|
},
|
|
want: true,
|
|
},
|
|
{
|
|
name: "bool false",
|
|
valueType: ggufTypeBool,
|
|
buildData: func() []byte {
|
|
buf := new(bytes.Buffer)
|
|
binary.Write(buf, binary.LittleEndian, uint8(0))
|
|
return buf.Bytes()
|
|
},
|
|
want: false,
|
|
},
|
|
{
|
|
name: "string",
|
|
valueType: ggufTypeString,
|
|
buildData: func() []byte {
|
|
buf := new(bytes.Buffer)
|
|
s := "hello world"
|
|
binary.Write(buf, binary.LittleEndian, uint64(len(s)))
|
|
buf.WriteString(s)
|
|
return buf.Bytes()
|
|
},
|
|
want: "hello world",
|
|
},
|
|
{
|
|
name: "unknown type",
|
|
valueType: 99,
|
|
buildData: func() []byte {
|
|
return []byte{}
|
|
},
|
|
wantErr: true,
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
data := tt.buildData()
|
|
reader := bytes.NewReader(data)
|
|
|
|
got, err := readValue(reader, tt.valueType)
|
|
|
|
if tt.wantErr {
|
|
require.Error(t, err)
|
|
return
|
|
}
|
|
|
|
require.NoError(t, err)
|
|
assert.Equal(t, tt.want, got)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestReadString(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
buildData func() []byte
|
|
want string
|
|
wantErr bool
|
|
}{
|
|
{
|
|
name: "normal string",
|
|
buildData: func() []byte {
|
|
buf := new(bytes.Buffer)
|
|
s := "test string"
|
|
binary.Write(buf, binary.LittleEndian, uint64(len(s)))
|
|
buf.WriteString(s)
|
|
return buf.Bytes()
|
|
},
|
|
want: "test string",
|
|
},
|
|
{
|
|
name: "empty string",
|
|
buildData: func() []byte {
|
|
buf := new(bytes.Buffer)
|
|
binary.Write(buf, binary.LittleEndian, uint64(0))
|
|
return buf.Bytes()
|
|
},
|
|
want: "",
|
|
},
|
|
{
|
|
name: "string exceeds max length",
|
|
buildData: func() []byte {
|
|
buf := new(bytes.Buffer)
|
|
binary.Write(buf, binary.LittleEndian, uint64(maxKeyLen+1))
|
|
return buf.Bytes()
|
|
},
|
|
wantErr: true,
|
|
},
|
|
{
|
|
name: "truncated string data",
|
|
buildData: func() []byte {
|
|
buf := new(bytes.Buffer)
|
|
binary.Write(buf, binary.LittleEndian, uint64(100))
|
|
buf.WriteString("short")
|
|
return buf.Bytes()
|
|
},
|
|
wantErr: true,
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
data := tt.buildData()
|
|
reader := bytes.NewReader(data)
|
|
|
|
got, err := readString(reader)
|
|
|
|
if tt.wantErr {
|
|
require.Error(t, err)
|
|
return
|
|
}
|
|
|
|
require.NoError(t, err)
|
|
assert.Equal(t, tt.want, got)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestReadArray(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
buildData func() []byte
|
|
want interface{}
|
|
wantErr bool
|
|
}{
|
|
{
|
|
name: "array of uint32",
|
|
buildData: func() []byte {
|
|
buf := new(bytes.Buffer)
|
|
binary.Write(buf, binary.LittleEndian, uint32(ggufTypeUint32)) // element type
|
|
binary.Write(buf, binary.LittleEndian, uint64(3)) // array length
|
|
binary.Write(buf, binary.LittleEndian, uint32(1))
|
|
binary.Write(buf, binary.LittleEndian, uint32(2))
|
|
binary.Write(buf, binary.LittleEndian, uint32(3))
|
|
return buf.Bytes()
|
|
},
|
|
want: []interface{}{uint32(1), uint32(2), uint32(3)},
|
|
},
|
|
{
|
|
name: "empty array",
|
|
buildData: func() []byte {
|
|
buf := new(bytes.Buffer)
|
|
binary.Write(buf, binary.LittleEndian, uint32(ggufTypeUint32))
|
|
binary.Write(buf, binary.LittleEndian, uint64(0))
|
|
return buf.Bytes()
|
|
},
|
|
want: ([]interface{})(nil), // Empty array returns nil slice
|
|
},
|
|
{
|
|
name: "array too large",
|
|
buildData: func() []byte {
|
|
buf := new(bytes.Buffer)
|
|
binary.Write(buf, binary.LittleEndian, uint32(ggufTypeUint32))
|
|
binary.Write(buf, binary.LittleEndian, uint64(10000))
|
|
return buf.Bytes()
|
|
},
|
|
wantErr: true,
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
data := tt.buildData()
|
|
reader := bytes.NewReader(data)
|
|
|
|
got, err := readArray(reader)
|
|
|
|
if tt.wantErr {
|
|
require.Error(t, err)
|
|
return
|
|
}
|
|
|
|
require.NoError(t, err)
|
|
assert.Equal(t, tt.want, got)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestReadKVPair(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
buildData func() []byte
|
|
wantKey string
|
|
wantValue interface{}
|
|
wantErr bool
|
|
}{
|
|
{
|
|
name: "string key-value pair",
|
|
buildData: func() []byte {
|
|
buf := new(bytes.Buffer)
|
|
// Write key
|
|
key := "general.name"
|
|
binary.Write(buf, binary.LittleEndian, uint64(len(key)))
|
|
buf.WriteString(key)
|
|
// Write value type
|
|
binary.Write(buf, binary.LittleEndian, uint32(ggufTypeString))
|
|
// Write value
|
|
value := "test-model"
|
|
binary.Write(buf, binary.LittleEndian, uint64(len(value)))
|
|
buf.WriteString(value)
|
|
return buf.Bytes()
|
|
},
|
|
wantKey: "general.name",
|
|
wantValue: "test-model",
|
|
},
|
|
{
|
|
name: "uint64 key-value pair",
|
|
buildData: func() []byte {
|
|
buf := new(bytes.Buffer)
|
|
// Write key
|
|
key := "general.parameter_count"
|
|
binary.Write(buf, binary.LittleEndian, uint64(len(key)))
|
|
buf.WriteString(key)
|
|
// Write value type
|
|
binary.Write(buf, binary.LittleEndian, uint32(ggufTypeUint64))
|
|
// Write value
|
|
binary.Write(buf, binary.LittleEndian, uint64(7000000000))
|
|
return buf.Bytes()
|
|
},
|
|
wantKey: "general.parameter_count",
|
|
wantValue: uint64(7000000000),
|
|
},
|
|
{
|
|
name: "key too long",
|
|
buildData: func() []byte {
|
|
buf := new(bytes.Buffer)
|
|
binary.Write(buf, binary.LittleEndian, uint64(maxKeyLen+1))
|
|
return buf.Bytes()
|
|
},
|
|
wantErr: true,
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
data := tt.buildData()
|
|
reader := bytes.NewReader(data)
|
|
|
|
gotKey, gotValue, err := readKVPair(reader)
|
|
|
|
if tt.wantErr {
|
|
require.Error(t, err)
|
|
return
|
|
}
|
|
|
|
require.NoError(t, err)
|
|
assert.Equal(t, tt.wantKey, gotKey)
|
|
assert.Equal(t, tt.wantValue, gotValue)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestInferQuantizationFromFilename(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
filename string
|
|
want string
|
|
}{
|
|
{
|
|
name: "Q4_K_M quantization",
|
|
filename: "/path/to/model-Q4_K_M.gguf",
|
|
want: "Q4_K_M",
|
|
},
|
|
{
|
|
name: "IQ4_NL quantization",
|
|
filename: "/path/to/model-IQ4_NL.gguf",
|
|
want: "Q4_NL", // The regex [IQ]\d+_[A-Z_]+ matches Q4_NL from IQ4_NL
|
|
},
|
|
{
|
|
name: "Q5_K_S quantization",
|
|
filename: "mistral-7b-Q5_K_S.gguf",
|
|
want: "Q5_K_S",
|
|
},
|
|
{
|
|
name: "no quantization in filename",
|
|
filename: "/path/to/model.gguf",
|
|
want: unkownGGUFData,
|
|
},
|
|
{
|
|
name: "partial match should not match",
|
|
filename: "/path/to/Q4-model.gguf",
|
|
want: unkownGGUFData,
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
got := inferQuantizationFromFilename(tt.filename)
|
|
assert.Equal(t, tt.want, got)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestComputeMetadataHash(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
metadata *pkg.GGUFFileMetadata
|
|
wantLen int
|
|
}{
|
|
{
|
|
name: "hash should be consistent",
|
|
metadata: &pkg.GGUFFileMetadata{
|
|
ModelFormat: "gguf",
|
|
ModelName: "test-model",
|
|
ModelVersion: "1.0",
|
|
Architecture: "llama",
|
|
GGUFVersion: 3,
|
|
TensorCount: 100,
|
|
},
|
|
wantLen: 16, // 8 bytes = 16 hex chars
|
|
},
|
|
{
|
|
name: "different metadata produces different hash",
|
|
metadata: &pkg.GGUFFileMetadata{
|
|
ModelFormat: "gguf",
|
|
ModelName: "different-model",
|
|
ModelVersion: "2.0",
|
|
Architecture: "gpt2",
|
|
GGUFVersion: 2,
|
|
TensorCount: 200,
|
|
},
|
|
wantLen: 16,
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
hash := computeMetadataHash(tt.metadata)
|
|
assert.Len(t, hash, tt.wantLen)
|
|
assert.NotEmpty(t, hash)
|
|
})
|
|
}
|
|
|
|
// Test that same metadata produces same hash
|
|
meta1 := &pkg.GGUFFileMetadata{
|
|
ModelFormat: "gguf",
|
|
ModelName: "test",
|
|
ModelVersion: "1.0",
|
|
Architecture: "llama",
|
|
GGUFVersion: 3,
|
|
TensorCount: 100,
|
|
}
|
|
meta2 := &pkg.GGUFFileMetadata{
|
|
ModelFormat: "gguf",
|
|
ModelName: "test",
|
|
ModelVersion: "1.0",
|
|
Architecture: "llama",
|
|
GGUFVersion: 3,
|
|
TensorCount: 100,
|
|
}
|
|
hash1 := computeMetadataHash(meta1)
|
|
hash2 := computeMetadataHash(meta2)
|
|
assert.Equal(t, hash1, hash2, "identical metadata should produce identical hash")
|
|
|
|
// Test that different metadata produces different hash
|
|
meta3 := &pkg.GGUFFileMetadata{
|
|
ModelFormat: "gguf",
|
|
ModelName: "different",
|
|
ModelVersion: "1.0",
|
|
Architecture: "llama",
|
|
GGUFVersion: 3,
|
|
TensorCount: 100,
|
|
}
|
|
hash3 := computeMetadataHash(meta3)
|
|
assert.NotEqual(t, hash1, hash3, "different metadata should produce different hash")
|
|
}
|
|
|
|
func TestParseGGUFHeader_EdgeCases(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
buildData func() []byte
|
|
location string
|
|
wantErr bool
|
|
checkFunc func(t *testing.T, meta *pkg.GGUFFileMetadata)
|
|
}{
|
|
{
|
|
name: "excessive KV pairs should error",
|
|
buildData: func() []byte {
|
|
buf := new(bytes.Buffer)
|
|
binary.Write(buf, binary.LittleEndian, uint32(ggufMagic))
|
|
binary.Write(buf, binary.LittleEndian, uint32(3))
|
|
binary.Write(buf, binary.LittleEndian, uint64(100))
|
|
binary.Write(buf, binary.LittleEndian, uint64(maxKVPairs+1)) // Too many
|
|
return buf.Bytes()
|
|
},
|
|
wantErr: true,
|
|
},
|
|
{
|
|
name: "tensor count at maximum should succeed",
|
|
buildData: func() []byte {
|
|
return newTestGGUFBuilder().
|
|
withVersion(3).
|
|
withTensorCount(maxTensors).
|
|
withStringKV("general.architecture", "llama").
|
|
withStringKV("general.name", "large-model").
|
|
build()
|
|
},
|
|
wantErr: false,
|
|
checkFunc: func(t *testing.T, meta *pkg.GGUFFileMetadata) {
|
|
assert.Equal(t, uint64(maxTensors), meta.TensorCount)
|
|
},
|
|
},
|
|
{
|
|
name: "tensor count exceeds maximum should be capped",
|
|
buildData: func() []byte {
|
|
return newTestGGUFBuilder().
|
|
withVersion(3).
|
|
withTensorCount(maxTensors+1000).
|
|
withStringKV("general.architecture", "llama").
|
|
withStringKV("general.name", "huge-model").
|
|
build()
|
|
},
|
|
wantErr: false,
|
|
checkFunc: func(t *testing.T, meta *pkg.GGUFFileMetadata) {
|
|
assert.Equal(t, uint64(maxTensors), meta.TensorCount)
|
|
},
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
data := tt.buildData()
|
|
location := tt.location
|
|
if location == "" {
|
|
location = "/test/path.gguf"
|
|
}
|
|
|
|
got, err := parseGGUFHeader(data, location)
|
|
|
|
if tt.wantErr {
|
|
require.Error(t, err)
|
|
return
|
|
}
|
|
|
|
require.NoError(t, err)
|
|
if tt.checkFunc != nil {
|
|
tt.checkFunc(t, got)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestReadValue_EOF(t *testing.T) {
|
|
// Test that reading beyond available data returns appropriate errors
|
|
tests := []struct {
|
|
name string
|
|
valueType uint32
|
|
data []byte
|
|
}{
|
|
{
|
|
name: "EOF reading uint32",
|
|
valueType: ggufTypeUint32,
|
|
data: []byte{0x01}, // Only 1 byte, need 4
|
|
},
|
|
{
|
|
name: "EOF reading string length",
|
|
valueType: ggufTypeString,
|
|
data: []byte{0x01}, // Incomplete length
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
reader := bytes.NewReader(tt.data)
|
|
_, err := readValue(reader, tt.valueType)
|
|
assert.Error(t, err)
|
|
assert.True(t, err == io.EOF || err == io.ErrUnexpectedEOF || bytes.Contains([]byte(err.Error()), []byte("EOF")))
|
|
})
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// Integration Tests for parseGGUFModel
|
|
// ============================================================================
|
|
|
|
func TestParseGGUFModel(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
fixture func(t *testing.T) string // returns path to temp fixture
|
|
expectedPackages []pkg.Package
|
|
expectedRelationships []artifact.Relationship
|
|
wantErr bool
|
|
}{
|
|
{
|
|
name: "valid GGUF with complete metadata",
|
|
fixture: func(t *testing.T) string {
|
|
return createTempGGUFFile(t, "llama3-8b-q4.gguf",
|
|
newTestGGUFBuilder().
|
|
withVersion(3).
|
|
withTensorCount(291).
|
|
withStringKV("general.architecture", "llama").
|
|
withStringKV("general.name", "llama3-8b-instruct").
|
|
withStringKV("general.version", "3.0").
|
|
withStringKV("general.license", "Apache-2.0").
|
|
withStringKV("general.quantization", "Q4_K_M").
|
|
withUint64KV("general.parameter_count", 8030000000).
|
|
build(),
|
|
)
|
|
},
|
|
expectedPackages: []pkg.Package{
|
|
{
|
|
Name: "llama3-8b-instruct",
|
|
Version: "3.0",
|
|
Type: pkg.ModelPkg,
|
|
Metadata: pkg.GGUFFileMetadata{
|
|
ModelFormat: "gguf",
|
|
ModelName: "llama3-8b-instruct",
|
|
ModelVersion: "3.0",
|
|
License: "Apache-2.0",
|
|
Architecture: "llama",
|
|
Quantization: "Q4_K_M",
|
|
Parameters: 8030000000,
|
|
GGUFVersion: 3,
|
|
TensorCount: 291,
|
|
Header: map[string]interface{}{},
|
|
TruncatedHeader: false,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
{
|
|
name: "minimal GGUF file",
|
|
fixture: func(t *testing.T) string {
|
|
return createTempGGUFFile(t, "minimal.gguf",
|
|
newTestGGUFBuilder().
|
|
withVersion(3).
|
|
withTensorCount(100).
|
|
withStringKV("general.architecture", "qwen").
|
|
withStringKV("general.name", "qwen2-1.5b").
|
|
build(),
|
|
)
|
|
},
|
|
expectedPackages: []pkg.Package{
|
|
{
|
|
Name: "qwen2-1.5b",
|
|
Version: unkownGGUFData,
|
|
Type: pkg.ModelPkg,
|
|
Metadata: pkg.GGUFFileMetadata{
|
|
ModelFormat: "gguf",
|
|
ModelName: "qwen2-1.5b",
|
|
ModelVersion: unkownGGUFData,
|
|
Architecture: "qwen",
|
|
Quantization: unkownGGUFData,
|
|
GGUFVersion: 3,
|
|
TensorCount: 100,
|
|
Header: map[string]interface{}{},
|
|
TruncatedHeader: false,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
{
|
|
name: "GGUF without general.name uses filename",
|
|
fixture: func(t *testing.T) string {
|
|
return createTempGGUFFile(t, "inferred-name-model.gguf",
|
|
newTestGGUFBuilder().
|
|
withVersion(3).
|
|
withTensorCount(150).
|
|
withStringKV("general.architecture", "llama").
|
|
withStringKV("general.license", "MIT").
|
|
build(),
|
|
)
|
|
},
|
|
expectedPackages: []pkg.Package{
|
|
{
|
|
Name: "inferred-name-model",
|
|
Version: unkownGGUFData,
|
|
Type: pkg.ModelPkg,
|
|
Metadata: pkg.GGUFFileMetadata{
|
|
ModelFormat: "gguf",
|
|
ModelName: "inferred-name-model",
|
|
ModelVersion: unkownGGUFData,
|
|
License: "MIT",
|
|
Architecture: "llama",
|
|
Quantization: unkownGGUFData,
|
|
GGUFVersion: 3,
|
|
TensorCount: 150,
|
|
Header: map[string]interface{}{},
|
|
TruncatedHeader: false,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
{
|
|
name: "GGUF with quantization inferred from filename",
|
|
fixture: func(t *testing.T) string {
|
|
return createTempGGUFFile(t, "mistral-7b-Q4_K_M.gguf",
|
|
newTestGGUFBuilder().
|
|
withVersion(3).
|
|
withTensorCount(219).
|
|
withStringKV("general.architecture", "mistral").
|
|
withStringKV("general.name", "mistral-7b-instruct").
|
|
withStringKV("general.version", "0.2").
|
|
withUint64KV("general.parameter_count", 7240000000).
|
|
withStringKV("general.quantized_by", "llama.cpp"). // Triggers filename inference
|
|
build(),
|
|
)
|
|
},
|
|
expectedPackages: []pkg.Package{
|
|
{
|
|
Name: "mistral-7b-instruct",
|
|
Version: "0.2",
|
|
Type: pkg.ModelPkg,
|
|
Metadata: pkg.GGUFFileMetadata{
|
|
ModelFormat: "gguf",
|
|
ModelName: "mistral-7b-instruct",
|
|
ModelVersion: "0.2",
|
|
Architecture: "mistral",
|
|
Quantization: "Q4_K_M",
|
|
Parameters: 7240000000,
|
|
GGUFVersion: 3,
|
|
TensorCount: 219,
|
|
Header: map[string]interface{}{},
|
|
TruncatedHeader: false,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
{
|
|
name: "invalid GGUF magic number",
|
|
fixture: func(t *testing.T) string {
|
|
return createTempGGUFFile(t, "invalid-magic.gguf",
|
|
newTestGGUFBuilder().buildInvalidMagic(),
|
|
)
|
|
},
|
|
wantErr: true,
|
|
},
|
|
{
|
|
name: "truncated GGUF file",
|
|
fixture: func(t *testing.T) string {
|
|
return createTempGGUFFile(t, "truncated.gguf", []byte{0x47, 0x47})
|
|
},
|
|
wantErr: true,
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
fixturePath := tt.fixture(t)
|
|
defer os.Remove(fixturePath)
|
|
|
|
f, err := os.Open(fixturePath)
|
|
require.NoError(t, err)
|
|
defer f.Close()
|
|
|
|
location := file.NewLocation(fixturePath)
|
|
reader := file.LocationReadCloser{
|
|
Location: location,
|
|
ReadCloser: f,
|
|
}
|
|
|
|
ctx := context.Background()
|
|
pkgs, relationships, err := parseGGUFModel(ctx, nil, nil, reader)
|
|
|
|
if tt.wantErr {
|
|
require.Error(t, err)
|
|
return
|
|
}
|
|
|
|
require.NoError(t, err)
|
|
require.Len(t, pkgs, len(tt.expectedPackages))
|
|
|
|
// Compare packages (ignoring Hash which is computed)
|
|
for i, expectedPkg := range tt.expectedPackages {
|
|
actualPkg := pkgs[i]
|
|
|
|
assert.Equal(t, expectedPkg.Name, actualPkg.Name)
|
|
assert.Equal(t, expectedPkg.Version, actualPkg.Version)
|
|
assert.Equal(t, expectedPkg.Type, actualPkg.Type)
|
|
assert.Empty(t, actualPkg.PURL, "PURL should not be set for model packages")
|
|
|
|
// Check metadata
|
|
actualMeta, ok := actualPkg.Metadata.(pkg.GGUFFileMetadata)
|
|
require.True(t, ok)
|
|
expectedMeta := expectedPkg.Metadata.(pkg.GGUFFileMetadata)
|
|
|
|
assert.Equal(t, expectedMeta.ModelFormat, actualMeta.ModelFormat)
|
|
assert.Equal(t, expectedMeta.ModelName, actualMeta.ModelName)
|
|
assert.Equal(t, expectedMeta.ModelVersion, actualMeta.ModelVersion)
|
|
assert.Equal(t, expectedMeta.License, actualMeta.License)
|
|
assert.Equal(t, expectedMeta.Architecture, actualMeta.Architecture)
|
|
assert.Equal(t, expectedMeta.Quantization, actualMeta.Quantization)
|
|
assert.Equal(t, expectedMeta.Parameters, actualMeta.Parameters)
|
|
assert.Equal(t, expectedMeta.GGUFVersion, actualMeta.GGUFVersion)
|
|
assert.Equal(t, expectedMeta.TensorCount, actualMeta.TensorCount)
|
|
assert.Equal(t, expectedMeta.TruncatedHeader, actualMeta.TruncatedHeader)
|
|
|
|
// Hash should be computed
|
|
assert.NotEmpty(t, actualMeta.Hash)
|
|
}
|
|
|
|
assert.Equal(t, tt.expectedRelationships, relationships)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestParseGGUFModel_HeaderReadLimit(t *testing.T) {
|
|
builder := newTestGGUFBuilder().
|
|
withVersion(3).
|
|
withTensorCount(100).
|
|
withStringKV("general.architecture", "llama").
|
|
withStringKV("general.name", "large-header-model")
|
|
|
|
// Add many additional fields
|
|
for i := 0; i < 50; i++ {
|
|
builder.withStringKV("custom.field"+string(rune(i)), "value")
|
|
}
|
|
|
|
fixturePath := createTempGGUFFile(t, "large-header.gguf", builder.build())
|
|
defer os.Remove(fixturePath)
|
|
|
|
f, err := os.Open(fixturePath)
|
|
require.NoError(t, err)
|
|
defer f.Close()
|
|
|
|
reader := file.LocationReadCloser{
|
|
Location: file.NewLocation(fixturePath),
|
|
ReadCloser: f,
|
|
}
|
|
|
|
ctx := context.Background()
|
|
pkgs, _, err := parseGGUFModel(ctx, nil, nil, reader)
|
|
|
|
require.NoError(t, err)
|
|
require.Len(t, pkgs, 1)
|
|
assert.Equal(t, "large-header-model", pkgs[0].Name)
|
|
}
|
|
|
|
func TestParseGGUFModel_EmptyFile(t *testing.T) {
|
|
tmpDir := t.TempDir()
|
|
fixturePath := filepath.Join(tmpDir, "empty.gguf")
|
|
err := os.WriteFile(fixturePath, []byte{}, 0644)
|
|
require.NoError(t, err)
|
|
|
|
f, err := os.Open(fixturePath)
|
|
require.NoError(t, err)
|
|
defer f.Close()
|
|
|
|
reader := file.LocationReadCloser{
|
|
Location: file.NewLocation(fixturePath),
|
|
ReadCloser: f,
|
|
}
|
|
|
|
ctx := context.Background()
|
|
_, _, err = parseGGUFModel(ctx, nil, nil, reader)
|
|
|
|
require.Error(t, err)
|
|
assert.Contains(t, err.Error(), "too small")
|
|
}
|
|
|
|
func TestParseGGUFModel_LargeFile(t *testing.T) {
|
|
// Test that we only read the header, not the entire file
|
|
builder := newTestGGUFBuilder().
|
|
withVersion(3).
|
|
withTensorCount(100).
|
|
withStringKV("general.architecture", "llama").
|
|
withStringKV("general.name", "test-model")
|
|
|
|
headerData := builder.build()
|
|
|
|
// Create a file with header + large padding
|
|
tmpDir := t.TempDir()
|
|
fixturePath := filepath.Join(tmpDir, "large.gguf")
|
|
f, err := os.Create(fixturePath)
|
|
require.NoError(t, err)
|
|
|
|
_, err = f.Write(headerData)
|
|
require.NoError(t, err)
|
|
|
|
// Write 20MB of padding (simulating tensor data)
|
|
padding := make([]byte, 20*1024*1024)
|
|
_, err = f.Write(padding)
|
|
require.NoError(t, err)
|
|
f.Close()
|
|
|
|
// Parse the file
|
|
f, err = os.Open(fixturePath)
|
|
require.NoError(t, err)
|
|
defer f.Close()
|
|
|
|
reader := file.LocationReadCloser{
|
|
Location: file.NewLocation(fixturePath),
|
|
ReadCloser: f,
|
|
}
|
|
|
|
ctx := context.Background()
|
|
pkgs, _, err := parseGGUFModel(ctx, nil, nil, reader)
|
|
|
|
require.NoError(t, err)
|
|
require.Len(t, pkgs, 1)
|
|
assert.Equal(t, "test-model", pkgs[0].Name)
|
|
}
|
|
|
|
func Test_parseGGUFModel_interface(t *testing.T) {
|
|
// This test ensures parseGGUFModel matches the generic.Parser signature
|
|
fixture := createTempGGUFFile(t, "interface-test.gguf",
|
|
newTestGGUFBuilder().
|
|
withVersion(3).
|
|
withTensorCount(100).
|
|
withStringKV("general.architecture", "llama").
|
|
withStringKV("general.name", "test").
|
|
build(),
|
|
)
|
|
defer os.Remove(fixture)
|
|
|
|
f, err := os.Open(fixture)
|
|
require.NoError(t, err)
|
|
defer f.Close()
|
|
|
|
reader := file.LocationReadCloser{
|
|
Location: file.NewLocation(fixture),
|
|
ReadCloser: f,
|
|
}
|
|
|
|
ctx := context.Background()
|
|
pkgs, rels, err := parseGGUFModel(ctx, nil, nil, reader)
|
|
require.NoError(t, err)
|
|
require.Len(t, pkgs, 1)
|
|
assert.Empty(t, rels)
|
|
|
|
// Verify basic package structure
|
|
assert.Equal(t, "test", pkgs[0].Name)
|
|
assert.Equal(t, unkownGGUFData, pkgs[0].Version)
|
|
assert.Equal(t, pkg.ModelPkg, pkgs[0].Type)
|
|
}
|
|
|
|
func TestParseGGUFModel_ReaderClosed(t *testing.T) {
|
|
// Ensure the reader is properly closed after parsing
|
|
fixture := createTempGGUFFile(t, "close-test.gguf",
|
|
newTestGGUFBuilder().
|
|
withVersion(3).
|
|
withTensorCount(100).
|
|
withStringKV("general.architecture", "llama").
|
|
withStringKV("general.name", "test").
|
|
build(),
|
|
)
|
|
defer os.Remove(fixture)
|
|
|
|
f, err := os.Open(fixture)
|
|
require.NoError(t, err)
|
|
|
|
// Wrap in a custom closer to track if Close was called
|
|
closeCalled := false
|
|
reader := file.LocationReadCloser{
|
|
Location: file.NewLocation(fixture),
|
|
ReadCloser: &testReadCloser{
|
|
Reader: f,
|
|
onClose: func() error {
|
|
closeCalled = true
|
|
return f.Close()
|
|
},
|
|
},
|
|
}
|
|
|
|
ctx := context.Background()
|
|
_, _, err = parseGGUFModel(ctx, nil, nil, reader)
|
|
require.NoError(t, err)
|
|
|
|
assert.True(t, closeCalled, "reader should be closed after parsing")
|
|
}
|
|
|
|
// createTempGGUFFile creates a temporary GGUF file for testing
|
|
func createTempGGUFFile(t *testing.T, filename string, data []byte) string {
|
|
t.Helper()
|
|
tmpDir := t.TempDir()
|
|
path := filepath.Join(tmpDir, filename)
|
|
err := os.WriteFile(path, data, 0644)
|
|
require.NoError(t, err)
|
|
return path
|
|
}
|
|
|
|
// testReadCloser wraps an io.Reader and tracks Close calls
|
|
type testReadCloser struct {
|
|
io.Reader
|
|
onClose func() error
|
|
}
|
|
|
|
func (r *testReadCloser) Close() error {
|
|
if r.onClose != nil {
|
|
return r.onClose()
|
|
}
|
|
return nil
|
|
}
|