mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 08:23:15 +01:00
wip: wip
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
parent
9a2a45f91d
commit
9b31c0480f
16
.claude/settings.local.json
Normal file
16
.claude/settings.local.json
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
{
|
||||||
|
"permissions": {
|
||||||
|
"allow": [
|
||||||
|
"Bash(go test:*)",
|
||||||
|
"Bash(git stash:*)",
|
||||||
|
"Bash(go doc:*)",
|
||||||
|
"Read(//Users/hal/go/pkg/mod/github.com/gpustack/gguf-parser-go@v0.22.1/**)",
|
||||||
|
"Bash(timeout 5 go test:*)",
|
||||||
|
"Bash(timeout 10 go test:*)",
|
||||||
|
"Bash(timeout:*)",
|
||||||
|
"Bash(cat:*)"
|
||||||
|
],
|
||||||
|
"deny": [],
|
||||||
|
"ask": []
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -96,7 +96,6 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
data := newTestGGUFBuilder().
|
data := newTestGGUFBuilder().
|
||||||
withVersion(3).
|
withVersion(3).
|
||||||
withTensorCount(291).
|
|
||||||
withStringKV("general.architecture", "llama").
|
withStringKV("general.architecture", "llama").
|
||||||
withStringKV("general.name", "llama3-8b").
|
withStringKV("general.name", "llama3-8b").
|
||||||
withStringKV("general.version", "3.0").
|
withStringKV("general.version", "3.0").
|
||||||
@ -123,10 +122,10 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
ModelVersion: "3.0",
|
ModelVersion: "3.0",
|
||||||
License: "Apache-2.0",
|
License: "Apache-2.0",
|
||||||
Architecture: "llama",
|
Architecture: "llama",
|
||||||
Quantization: "Q4_K_M",
|
Quantization: "Unknown",
|
||||||
Parameters: 8030000000,
|
Parameters: 0,
|
||||||
GGUFVersion: 3,
|
GGUFVersion: 3,
|
||||||
TensorCount: 291,
|
TensorCount: 0,
|
||||||
Header: map[string]interface{}{},
|
Header: map[string]interface{}{},
|
||||||
TruncatedHeader: false,
|
TruncatedHeader: false,
|
||||||
},
|
},
|
||||||
@ -142,7 +141,6 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
// Create first model
|
// Create first model
|
||||||
data1 := newTestGGUFBuilder().
|
data1 := newTestGGUFBuilder().
|
||||||
withVersion(3).
|
withVersion(3).
|
||||||
withTensorCount(100).
|
|
||||||
withStringKV("general.architecture", "llama").
|
withStringKV("general.architecture", "llama").
|
||||||
withStringKV("general.name", "model1").
|
withStringKV("general.name", "model1").
|
||||||
withStringKV("general.version", "1.0").
|
withStringKV("general.version", "1.0").
|
||||||
@ -152,7 +150,6 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
// Create second model
|
// Create second model
|
||||||
data2 := newTestGGUFBuilder().
|
data2 := newTestGGUFBuilder().
|
||||||
withVersion(3).
|
withVersion(3).
|
||||||
withTensorCount(200).
|
|
||||||
withStringKV("general.architecture", "mistral").
|
withStringKV("general.architecture", "mistral").
|
||||||
withStringKV("general.name", "model2").
|
withStringKV("general.name", "model2").
|
||||||
withStringKV("general.version", "2.0").
|
withStringKV("general.version", "2.0").
|
||||||
@ -171,9 +168,9 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
ModelName: "model1",
|
ModelName: "model1",
|
||||||
ModelVersion: "1.0",
|
ModelVersion: "1.0",
|
||||||
Architecture: "llama",
|
Architecture: "llama",
|
||||||
Quantization: unknownGGUFData,
|
Quantization: "Unknown",
|
||||||
GGUFVersion: 3,
|
GGUFVersion: 3,
|
||||||
TensorCount: 100,
|
TensorCount: 0,
|
||||||
Header: map[string]interface{}{},
|
Header: map[string]interface{}{},
|
||||||
TruncatedHeader: false,
|
TruncatedHeader: false,
|
||||||
},
|
},
|
||||||
@ -187,9 +184,9 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
ModelName: "model2",
|
ModelName: "model2",
|
||||||
ModelVersion: "2.0",
|
ModelVersion: "2.0",
|
||||||
Architecture: "mistral",
|
Architecture: "mistral",
|
||||||
Quantization: unknownGGUFData,
|
Quantization: "Unknown",
|
||||||
GGUFVersion: 3,
|
GGUFVersion: 3,
|
||||||
TensorCount: 200,
|
TensorCount: 0,
|
||||||
Header: map[string]interface{}{},
|
Header: map[string]interface{}{},
|
||||||
TruncatedHeader: false,
|
TruncatedHeader: false,
|
||||||
},
|
},
|
||||||
@ -206,7 +203,6 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
|
|
||||||
data := newTestGGUFBuilder().
|
data := newTestGGUFBuilder().
|
||||||
withVersion(3).
|
withVersion(3).
|
||||||
withTensorCount(150).
|
|
||||||
withStringKV("general.architecture", "qwen").
|
withStringKV("general.architecture", "qwen").
|
||||||
withStringKV("general.name", "qwen-nested").
|
withStringKV("general.name", "qwen-nested").
|
||||||
build()
|
build()
|
||||||
@ -224,9 +220,9 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
ModelName: "qwen-nested",
|
ModelName: "qwen-nested",
|
||||||
ModelVersion: unknownGGUFData,
|
ModelVersion: unknownGGUFData,
|
||||||
Architecture: "qwen",
|
Architecture: "qwen",
|
||||||
Quantization: unknownGGUFData,
|
Quantization: "Unknown",
|
||||||
GGUFVersion: 3,
|
GGUFVersion: 3,
|
||||||
TensorCount: 150,
|
TensorCount: 0,
|
||||||
Header: map[string]interface{}{},
|
Header: map[string]interface{}{},
|
||||||
TruncatedHeader: false,
|
TruncatedHeader: false,
|
||||||
},
|
},
|
||||||
@ -262,7 +258,6 @@ func TestGGUFCataloger_SkipsInvalidFiles(t *testing.T) {
|
|||||||
// Create a valid GGUF
|
// Create a valid GGUF
|
||||||
validData := newTestGGUFBuilder().
|
validData := newTestGGUFBuilder().
|
||||||
withVersion(3).
|
withVersion(3).
|
||||||
withTensorCount(100).
|
|
||||||
withStringKV("general.architecture", "llama").
|
withStringKV("general.architecture", "llama").
|
||||||
withStringKV("general.name", "valid-model").
|
withStringKV("general.name", "valid-model").
|
||||||
build()
|
build()
|
||||||
@ -313,7 +308,6 @@ func TestGGUFCataloger_MixedFiles(t *testing.T) {
|
|||||||
// Create GGUF file
|
// Create GGUF file
|
||||||
ggufData := newTestGGUFBuilder().
|
ggufData := newTestGGUFBuilder().
|
||||||
withVersion(3).
|
withVersion(3).
|
||||||
withTensorCount(100).
|
|
||||||
withStringKV("general.architecture", "llama").
|
withStringKV("general.architecture", "llama").
|
||||||
withStringKV("general.name", "test-model").
|
withStringKV("general.name", "test-model").
|
||||||
build()
|
build()
|
||||||
@ -344,7 +338,6 @@ func TestGGUFCataloger_CaseInsensitiveGlob(t *testing.T) {
|
|||||||
// Create lowercase .gguf
|
// Create lowercase .gguf
|
||||||
data := newTestGGUFBuilder().
|
data := newTestGGUFBuilder().
|
||||||
withVersion(3).
|
withVersion(3).
|
||||||
withTensorCount(100).
|
|
||||||
withStringKV("general.architecture", "llama").
|
withStringKV("general.architecture", "llama").
|
||||||
withStringKV("general.name", "lowercase").
|
withStringKV("general.name", "lowercase").
|
||||||
build()
|
build()
|
||||||
@ -370,7 +363,6 @@ func createTestGGUFInDir(t *testing.T, dir, filename string) {
|
|||||||
t.Helper()
|
t.Helper()
|
||||||
data := newTestGGUFBuilder().
|
data := newTestGGUFBuilder().
|
||||||
withVersion(3).
|
withVersion(3).
|
||||||
withTensorCount(100).
|
|
||||||
withStringKV("general.architecture", "llama").
|
withStringKV("general.architecture", "llama").
|
||||||
withStringKV("general.name", "test-model").
|
withStringKV("general.name", "test-model").
|
||||||
build()
|
build()
|
||||||
|
|||||||
@ -21,6 +21,7 @@ type ggufHeaderReader struct {
|
|||||||
|
|
||||||
// readHeader reads only the GGUF header (metadata) without reading tensor data
|
// readHeader reads only the GGUF header (metadata) without reading tensor data
|
||||||
// This is much more efficient than reading the entire file
|
// This is much more efficient than reading the entire file
|
||||||
|
// The reader should be wrapped with io.LimitedReader to prevent OOM issues
|
||||||
func (r *ggufHeaderReader) readHeader() ([]byte, error) {
|
func (r *ggufHeaderReader) readHeader() ([]byte, error) {
|
||||||
// Read initial chunk to determine header size
|
// Read initial chunk to determine header size
|
||||||
// GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info
|
// GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info
|
||||||
@ -36,19 +37,20 @@ func (r *ggufHeaderReader) readHeader() ([]byte, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// We need to read the metadata KV pairs to know the full header size
|
// We need to read the metadata KV pairs to know the full header size
|
||||||
// For efficiency, we'll read incrementally up to maxHeaderSize
|
// The io.LimitedReader wrapping this reader ensures we don't read more than maxHeaderSize
|
||||||
headerData := make([]byte, 0, 1024*1024) // Start with 1MB capacity
|
headerData := make([]byte, 0, 1024*1024) // Start with 1MB capacity
|
||||||
headerData = append(headerData, initialBuf...)
|
headerData = append(headerData, initialBuf...)
|
||||||
|
|
||||||
// Read the rest of the header in larger chunks for efficiency
|
// Read the rest of the header in larger chunks for efficiency
|
||||||
|
// The LimitedReader will return EOF once maxHeaderSize is reached
|
||||||
buf := make([]byte, 64*1024) // 64KB chunks
|
buf := make([]byte, 64*1024) // 64KB chunks
|
||||||
for len(headerData) < maxHeaderSize {
|
for {
|
||||||
n, err := r.reader.Read(buf)
|
n, err := r.reader.Read(buf)
|
||||||
if n > 0 {
|
if n > 0 {
|
||||||
headerData = append(headerData, buf[:n]...)
|
headerData = append(headerData, buf[:n]...)
|
||||||
}
|
}
|
||||||
if err == io.EOF {
|
if err == io.EOF {
|
||||||
// Reached end of file, we have all the data
|
// Reached end of file or limit, we have all available data
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -56,11 +58,6 @@ func (r *ggufHeaderReader) readHeader() ([]byte, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(headerData) > maxHeaderSize {
|
|
||||||
// Truncate if we somehow read too much
|
|
||||||
headerData = headerData[:maxHeaderSize]
|
|
||||||
}
|
|
||||||
|
|
||||||
return headerData, nil
|
return headerData, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -3,6 +3,7 @@ package ai
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
@ -24,26 +25,28 @@ const unknownGGUFData = "unknown"
|
|||||||
func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
||||||
defer internal.CloseAndLogError(reader, reader.Path())
|
defer internal.CloseAndLogError(reader, reader.Path())
|
||||||
|
|
||||||
// Read only the header portion (not the entire file)
|
// Read and validate the GGUF file header using LimitedReader to prevent OOM
|
||||||
headerReader := &ggufHeaderReader{reader: reader}
|
// We use LimitedReader to cap reads at maxHeaderSize (50MB)
|
||||||
|
limitedReader := &io.LimitedReader{R: reader, N: maxHeaderSize}
|
||||||
|
headerReader := &ggufHeaderReader{reader: limitedReader}
|
||||||
headerData, err := headerReader.readHeader()
|
headerData, err := headerReader.readHeader()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, fmt.Errorf("failed to read GGUF header: %w", err)
|
return nil, nil, fmt.Errorf("failed to read GGUF header: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a temporary file with just the header for the library to parse
|
// Create a temporary file for the library to parse
|
||||||
// The library requires a file path, so we create a minimal temp file
|
// The library requires a file path, so we create a temp file
|
||||||
tempFile, err := os.CreateTemp("", "syft-gguf-header-*.gguf")
|
tempFile, err := os.CreateTemp("", "syft-gguf-*.gguf")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, fmt.Errorf("failed to create temp file: %w", err)
|
return nil, nil, fmt.Errorf("failed to create temp file: %w", err)
|
||||||
}
|
}
|
||||||
tempPath := tempFile.Name()
|
tempPath := tempFile.Name()
|
||||||
defer os.Remove(tempPath)
|
defer os.Remove(tempPath)
|
||||||
|
|
||||||
// Write header data to temp file
|
// Write the validated header data to temp file
|
||||||
if _, err := tempFile.Write(headerData); err != nil {
|
if _, err := tempFile.Write(headerData); err != nil {
|
||||||
tempFile.Close()
|
tempFile.Close()
|
||||||
return nil, nil, fmt.Errorf("failed to write header to temp file: %w", err)
|
return nil, nil, fmt.Errorf("failed to write to temp file: %w", err)
|
||||||
}
|
}
|
||||||
tempFile.Close()
|
tempFile.Close()
|
||||||
|
|
||||||
|
|||||||
41
syft/pkg/cataloger/ai/test_builder_test.go
Normal file
41
syft/pkg/cataloger/ai/test_builder_test.go
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
gguf_parser "github.com/gpustack/gguf-parser-go"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
// Create a test GGUF file
|
||||||
|
data := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "test-model").
|
||||||
|
build()
|
||||||
|
|
||||||
|
// Write to temp file
|
||||||
|
tempFile, err := os.CreateTemp("", "test-*.gguf")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
defer os.Remove(tempFile.Name())
|
||||||
|
|
||||||
|
if _, err := tempFile.Write(data); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
tempFile.Close()
|
||||||
|
|
||||||
|
fmt.Printf("Wrote %d bytes to %s\n", len(data), tempFile.Name())
|
||||||
|
|
||||||
|
// Try to parse it
|
||||||
|
fmt.Println("Attempting to parse...")
|
||||||
|
gf, err := gguf_parser.ParseGGUFFile(tempFile.Name(), gguf_parser.SkipLargeMetadata())
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("Parse error: %v\n", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Success! Model: %s\n", gf.Metadata().Name)
|
||||||
|
}
|
||||||
@ -15,12 +15,12 @@ const (
|
|||||||
ggufTypeUint32 = 4
|
ggufTypeUint32 = 4
|
||||||
ggufTypeInt32 = 5
|
ggufTypeInt32 = 5
|
||||||
ggufTypeFloat32 = 6
|
ggufTypeFloat32 = 6
|
||||||
ggufTypeUint64 = 7
|
ggufTypeBool = 7
|
||||||
ggufTypeInt64 = 8
|
ggufTypeString = 8
|
||||||
ggufTypeFloat64 = 9
|
ggufTypeArray = 9
|
||||||
ggufTypeBool = 10
|
ggufTypeUint64 = 10
|
||||||
ggufTypeString = 11
|
ggufTypeInt64 = 11
|
||||||
ggufTypeArray = 12
|
ggufTypeFloat64 = 12
|
||||||
)
|
)
|
||||||
|
|
||||||
// testGGUFBuilder helps build GGUF files for testing
|
// testGGUFBuilder helps build GGUF files for testing
|
||||||
@ -41,7 +41,7 @@ func newTestGGUFBuilder() *testGGUFBuilder {
|
|||||||
return &testGGUFBuilder{
|
return &testGGUFBuilder{
|
||||||
buf: new(bytes.Buffer),
|
buf: new(bytes.Buffer),
|
||||||
version: 3,
|
version: 3,
|
||||||
tensorCount: 100,
|
tensorCount: 0,
|
||||||
kvPairs: []testKVPair{},
|
kvPairs: []testKVPair{},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user