mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 08:23:15 +01:00
Compare commits
3 Commits
cdb41b0c76
...
9609ce2b36
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9609ce2b36 | ||
|
|
56761cee6f | ||
|
|
b80592f735 |
@ -1443,10 +1443,6 @@
|
||||
"type": "string",
|
||||
"description": "ModelName is the name of the model (from general.name or filename)"
|
||||
},
|
||||
"modelVersion": {
|
||||
"type": "string",
|
||||
"description": "ModelVersion is the version of the model (if available in header, else \"unknown\")"
|
||||
},
|
||||
"fileSize": {
|
||||
"type": "integer",
|
||||
"description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
|
||||
|
||||
@ -1443,10 +1443,6 @@
|
||||
"type": "string",
|
||||
"description": "ModelName is the name of the model (from general.name or filename)"
|
||||
},
|
||||
"modelVersion": {
|
||||
"type": "string",
|
||||
"description": "ModelVersion is the version of the model (if available in header, else \"unknown\")"
|
||||
},
|
||||
"fileSize": {
|
||||
"type": "integer",
|
||||
"description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
|
||||
|
||||
@ -6,7 +6,6 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"github.com/anchore/syft/syft/artifact"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
@ -14,6 +13,28 @@ import (
|
||||
)
|
||||
|
||||
func TestGGUFCataloger_Globs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
fixture string
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "obtain gguf files",
|
||||
fixture: "test-fixtures/glob-paths",
|
||||
expected: []string{
|
||||
"models/model.gguf",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
pkgtest.NewCatalogTester().
|
||||
FromDirectory(t, test.fixture).
|
||||
ExpectsResolverContentQueries(test.expected).
|
||||
TestCataloger(t, NewGGUFCataloger())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGGUFCataloger_Integration(t *testing.T) {
|
||||
@ -50,15 +71,14 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
||||
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
|
||||
),
|
||||
Metadata: pkg.GGUFFileHeader{
|
||||
ModelName: "llama3-8b",
|
||||
ModelVersion: "3.0",
|
||||
License: "Apache-2.0",
|
||||
Architecture: "llama",
|
||||
Quantization: "Unknown",
|
||||
Parameters: 0,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 0,
|
||||
Header: map[string]interface{}{},
|
||||
ModelName: "llama3-8b",
|
||||
License: "Apache-2.0",
|
||||
Architecture: "llama",
|
||||
Quantization: "Unknown",
|
||||
Parameters: 0,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 0,
|
||||
Header: map[string]interface{}{},
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -77,16 +97,11 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
||||
IgnoreLocationLayer().
|
||||
IgnorePackageFields("FoundBy", "Locations"). // These are set by the cataloger
|
||||
WithCompareOptions(
|
||||
// Ignore Hash as it's computed dynamically
|
||||
cmpopts.IgnoreFields(pkg.GGUFFileHeader{}, "Hash"),
|
||||
// Ignore MetadataHash as it's computed dynamically
|
||||
cmpopts.IgnoreFields(pkg.GGUFFileHeader{}, "MetadataHash"),
|
||||
)
|
||||
|
||||
tester.TestCataloger(t, NewGGUFCataloger())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGGUFCataloger_Name(t *testing.T) {
|
||||
cataloger := NewGGUFCataloger()
|
||||
assert.Equal(t, "gguf-cataloger", cataloger.Name())
|
||||
}
|
||||
|
||||
@ -1,25 +1,14 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
)
|
||||
|
||||
func newGGUFPackage(metadata *pkg.GGUFFileHeader, locations ...file.Location) pkg.Package {
|
||||
// Compute hash if not already set
|
||||
if metadata.Hash == "" {
|
||||
metadata.Hash = computeMetadataHash(metadata)
|
||||
}
|
||||
|
||||
func newGGUFPackage(metadata *pkg.GGUFFileHeader, version string, locations ...file.Location) pkg.Package {
|
||||
p := pkg.Package{
|
||||
Name: metadata.ModelName,
|
||||
Version: metadata.ModelVersion,
|
||||
Version: version,
|
||||
Locations: file.NewLocationSet(locations...),
|
||||
Type: pkg.ModelPkg,
|
||||
Licenses: pkg.NewLicenseSet(),
|
||||
@ -37,33 +26,3 @@ func newGGUFPackage(metadata *pkg.GGUFFileHeader, locations ...file.Location) pk
|
||||
|
||||
return p
|
||||
}
|
||||
|
||||
// computeMetadataHash computes a stable hash of the metadata for use as a global identifier
|
||||
func computeMetadataHash(metadata *pkg.GGUFFileHeader) string {
|
||||
// Create a stable representation of the metadata
|
||||
hashData := struct {
|
||||
Format string
|
||||
Name string
|
||||
Version string
|
||||
Architecture string
|
||||
GGUFVersion uint32
|
||||
TensorCount uint64
|
||||
}{
|
||||
Name: metadata.ModelName,
|
||||
Version: metadata.ModelVersion,
|
||||
Architecture: metadata.Architecture,
|
||||
GGUFVersion: metadata.GGUFVersion,
|
||||
TensorCount: metadata.TensorCount,
|
||||
}
|
||||
|
||||
// Marshal to JSON for stable hashing
|
||||
jsonBytes, err := json.Marshal(hashData)
|
||||
if err != nil {
|
||||
log.Debugf("failed to marshal metadata for hashing: %v", err)
|
||||
return ""
|
||||
}
|
||||
|
||||
// Compute xxhash
|
||||
hash := xxhash.Sum64(jsonBytes)
|
||||
return fmt.Sprintf("%016x", hash) // 16 hex chars (64 bits)
|
||||
}
|
||||
|
||||
@ -15,21 +15,22 @@ func TestNewGGUFPackage(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
metadata *pkg.GGUFFileHeader
|
||||
version string
|
||||
locations []file.Location
|
||||
checkFunc func(t *testing.T, p pkg.Package)
|
||||
}{
|
||||
{
|
||||
name: "complete GGUF package with all fields",
|
||||
name: "complete GGUF package with all fields",
|
||||
version: "3.0",
|
||||
metadata: &pkg.GGUFFileHeader{
|
||||
ModelName: "llama3-8b-instruct",
|
||||
ModelVersion: "3.0",
|
||||
License: "Apache-2.0",
|
||||
Architecture: "llama",
|
||||
Quantization: "Q4_K_M",
|
||||
Parameters: 8030000000,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 291,
|
||||
Header: map[string]any{},
|
||||
ModelName: "llama3-8b-instruct",
|
||||
License: "Apache-2.0",
|
||||
Architecture: "llama",
|
||||
Quantization: "Q4_K_M",
|
||||
Parameters: 8030000000,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 291,
|
||||
Header: map[string]any{},
|
||||
},
|
||||
locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
|
||||
checkFunc: func(t *testing.T, p pkg.Package) {
|
||||
@ -51,10 +52,10 @@ func TestNewGGUFPackage(t *testing.T) {
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "minimal GGUF package",
|
||||
name: "minimal GGUF package",
|
||||
version: "1.0",
|
||||
metadata: &pkg.GGUFFileHeader{
|
||||
ModelName: "simple-model",
|
||||
ModelVersion: "1.0",
|
||||
Architecture: "gpt2",
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 50,
|
||||
@ -75,10 +76,10 @@ func TestNewGGUFPackage(t *testing.T) {
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "GGUF package with multiple locations",
|
||||
name: "GGUF package with multiple locations",
|
||||
version: "1.5",
|
||||
metadata: &pkg.GGUFFileHeader{
|
||||
ModelName: "multi-location-model",
|
||||
ModelVersion: "1.5",
|
||||
Architecture: "llama",
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 150,
|
||||
@ -95,12 +96,12 @@ func TestNewGGUFPackage(t *testing.T) {
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
p := newGGUFPackage(tt.metadata, tt.locations...)
|
||||
p := newGGUFPackage(tt.metadata, tt.version, tt.locations...)
|
||||
|
||||
if d := cmp.Diff(tt.metadata.ModelName, p.Name); d != "" {
|
||||
t.Errorf("Name mismatch (-want +got):\n%s", d)
|
||||
}
|
||||
if d := cmp.Diff(tt.metadata.ModelVersion, p.Version); d != "" {
|
||||
if d := cmp.Diff(tt.version, p.Version); d != "" {
|
||||
t.Errorf("Version mismatch (-want +got):\n%s", d)
|
||||
}
|
||||
if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
|
||||
|
||||
@ -62,15 +62,17 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
|
||||
// Extract metadata
|
||||
metadata := ggufFile.Metadata()
|
||||
|
||||
// Extract version separately (will be set on Package.Version)
|
||||
modelVersion := extractVersion(ggufFile.Header.MetadataKV)
|
||||
|
||||
// Convert to syft metadata structure
|
||||
syftMetadata := &pkg.GGUFFileHeader{
|
||||
ModelName: metadata.Name,
|
||||
ModelVersion: extractVersion(ggufFile.Header.MetadataKV),
|
||||
License: metadata.License,
|
||||
Architecture: metadata.Architecture,
|
||||
Quantization: metadata.FileTypeDescriptor,
|
||||
Parameters: uint64(metadata.Parameters),
|
||||
GGUFVersion: uint32(ggufFile.Header.Version),
|
||||
ModelName: metadata.Name,
|
||||
License: metadata.License,
|
||||
Architecture: metadata.Architecture,
|
||||
Quantization: metadata.FileTypeDescriptor,
|
||||
Parameters: uint64(metadata.Parameters),
|
||||
GGUFVersion: uint32(ggufFile.Header.Version),
|
||||
TensorCount: ggufFile.Header.TensorCount,
|
||||
Header: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
|
||||
MetadataHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
|
||||
@ -84,6 +86,7 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
|
||||
// Create package from metadata
|
||||
p := newGGUFPackage(
|
||||
syftMetadata,
|
||||
modelVersion,
|
||||
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
|
||||
)
|
||||
|
||||
|
||||
@ -3,7 +3,7 @@ package ai
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
|
||||
gguf_parser "github.com/gpustack/gguf-parser-go"
|
||||
)
|
||||
|
||||
@ -14,21 +14,21 @@ func main() {
|
||||
withStringKV("general.architecture", "llama").
|
||||
withStringKV("general.name", "test-model").
|
||||
build()
|
||||
|
||||
|
||||
// Write to temp file
|
||||
tempFile, err := os.CreateTemp("", "test-*.gguf")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer os.Remove(tempFile.Name())
|
||||
|
||||
|
||||
if _, err := tempFile.Write(data); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
tempFile.Close()
|
||||
|
||||
|
||||
fmt.Printf("Wrote %d bytes to %s\n", len(data), tempFile.Name())
|
||||
|
||||
|
||||
// Try to parse it
|
||||
fmt.Println("Attempting to parse...")
|
||||
gf, err := gguf_parser.ParseGGUFFile(tempFile.Name(), gguf_parser.SkipLargeMetadata())
|
||||
@ -36,6 +36,6 @@ func main() {
|
||||
fmt.Printf("Parse error: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
fmt.Printf("Success! Model: %s\n", gf.Metadata().Name)
|
||||
}
|
||||
|
||||
@ -10,9 +10,6 @@ type GGUFFileHeader struct {
|
||||
// ModelName is the name of the model (from general.name or filename)
|
||||
ModelName string `json:"modelName" cyclonedx:"modelName"`
|
||||
|
||||
// ModelVersion is the version of the model (if available in header, else "unknown")
|
||||
ModelVersion string `json:"modelVersion,omitempty" cyclonedx:"modelVersion"`
|
||||
|
||||
// FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)
|
||||
FileSize int64 `json:"fileSize,omitempty" cyclonedx:"fileSize"`
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user