Compare commits

..

No commits in common. "9609ce2b366803548de6d75e93cce0a2e7eff447" and "cdb41b0c766a43ec0d6eea75ebd3a951fa3e0461" have entirely different histories.

10 changed files with 100 additions and 67 deletions

View File

@ -1443,6 +1443,10 @@
"type": "string",
"description": "ModelName is the name of the model (from general.name or filename)"
},
"modelVersion": {
"type": "string",
"description": "ModelVersion is the version of the model (if available in header, else \"unknown\")"
},
"fileSize": {
"type": "integer",
"description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"

View File

@ -1443,6 +1443,10 @@
"type": "string",
"description": "ModelName is the name of the model (from general.name or filename)"
},
"modelVersion": {
"type": "string",
"description": "ModelVersion is the version of the model (if available in header, else \"unknown\")"
},
"fileSize": {
"type": "integer",
"description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"

BIN
syft-test Executable file

Binary file not shown.

View File

@ -6,6 +6,7 @@ import (
"testing"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/stretchr/testify/assert"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
@ -13,28 +14,6 @@ import (
)
func TestGGUFCataloger_Globs(t *testing.T) {
tests := []struct {
name string
fixture string
expected []string
}{
{
name: "obtain gguf files",
fixture: "test-fixtures/glob-paths",
expected: []string{
"models/model.gguf",
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
pkgtest.NewCatalogTester().
FromDirectory(t, test.fixture).
ExpectsResolverContentQueries(test.expected).
TestCataloger(t, NewGGUFCataloger())
})
}
}
func TestGGUFCataloger_Integration(t *testing.T) {
@ -71,14 +50,15 @@ func TestGGUFCataloger_Integration(t *testing.T) {
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
),
Metadata: pkg.GGUFFileHeader{
ModelName: "llama3-8b",
License: "Apache-2.0",
Architecture: "llama",
Quantization: "Unknown",
Parameters: 0,
GGUFVersion: 3,
TensorCount: 0,
Header: map[string]interface{}{},
ModelName: "llama3-8b",
ModelVersion: "3.0",
License: "Apache-2.0",
Architecture: "llama",
Quantization: "Unknown",
Parameters: 0,
GGUFVersion: 3,
TensorCount: 0,
Header: map[string]interface{}{},
},
},
},
@ -97,11 +77,16 @@ func TestGGUFCataloger_Integration(t *testing.T) {
IgnoreLocationLayer().
IgnorePackageFields("FoundBy", "Locations"). // These are set by the cataloger
WithCompareOptions(
// Ignore MetadataHash as it's computed dynamically
cmpopts.IgnoreFields(pkg.GGUFFileHeader{}, "MetadataHash"),
// Ignore Hash as it's computed dynamically
cmpopts.IgnoreFields(pkg.GGUFFileHeader{}, "Hash"),
)
tester.TestCataloger(t, NewGGUFCataloger())
})
}
}
func TestGGUFCataloger_Name(t *testing.T) {
cataloger := NewGGUFCataloger()
assert.Equal(t, "gguf-cataloger", cataloger.Name())
}

View File

@ -1,14 +1,25 @@
package ai
import (
"encoding/json"
"fmt"
"github.com/cespare/xxhash/v2"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
)
func newGGUFPackage(metadata *pkg.GGUFFileHeader, version string, locations ...file.Location) pkg.Package {
func newGGUFPackage(metadata *pkg.GGUFFileHeader, locations ...file.Location) pkg.Package {
// Compute hash if not already set
if metadata.Hash == "" {
metadata.Hash = computeMetadataHash(metadata)
}
p := pkg.Package{
Name: metadata.ModelName,
Version: version,
Version: metadata.ModelVersion,
Locations: file.NewLocationSet(locations...),
Type: pkg.ModelPkg,
Licenses: pkg.NewLicenseSet(),
@ -26,3 +37,33 @@ func newGGUFPackage(metadata *pkg.GGUFFileHeader, version string, locations ...f
return p
}
// computeMetadataHash computes a stable hash of the metadata for use as a global identifier
func computeMetadataHash(metadata *pkg.GGUFFileHeader) string {
// Create a stable representation of the metadata
hashData := struct {
Format string
Name string
Version string
Architecture string
GGUFVersion uint32
TensorCount uint64
}{
Name: metadata.ModelName,
Version: metadata.ModelVersion,
Architecture: metadata.Architecture,
GGUFVersion: metadata.GGUFVersion,
TensorCount: metadata.TensorCount,
}
// Marshal to JSON for stable hashing
jsonBytes, err := json.Marshal(hashData)
if err != nil {
log.Debugf("failed to marshal metadata for hashing: %v", err)
return ""
}
// Compute xxhash
hash := xxhash.Sum64(jsonBytes)
return fmt.Sprintf("%016x", hash) // 16 hex chars (64 bits)
}

View File

@ -15,22 +15,21 @@ func TestNewGGUFPackage(t *testing.T) {
tests := []struct {
name string
metadata *pkg.GGUFFileHeader
version string
locations []file.Location
checkFunc func(t *testing.T, p pkg.Package)
}{
{
name: "complete GGUF package with all fields",
version: "3.0",
name: "complete GGUF package with all fields",
metadata: &pkg.GGUFFileHeader{
ModelName: "llama3-8b-instruct",
License: "Apache-2.0",
Architecture: "llama",
Quantization: "Q4_K_M",
Parameters: 8030000000,
GGUFVersion: 3,
TensorCount: 291,
Header: map[string]any{},
ModelName: "llama3-8b-instruct",
ModelVersion: "3.0",
License: "Apache-2.0",
Architecture: "llama",
Quantization: "Q4_K_M",
Parameters: 8030000000,
GGUFVersion: 3,
TensorCount: 291,
Header: map[string]any{},
},
locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
checkFunc: func(t *testing.T, p pkg.Package) {
@ -52,10 +51,10 @@ func TestNewGGUFPackage(t *testing.T) {
},
},
{
name: "minimal GGUF package",
version: "1.0",
name: "minimal GGUF package",
metadata: &pkg.GGUFFileHeader{
ModelName: "simple-model",
ModelVersion: "1.0",
Architecture: "gpt2",
GGUFVersion: 3,
TensorCount: 50,
@ -76,10 +75,10 @@ func TestNewGGUFPackage(t *testing.T) {
},
},
{
name: "GGUF package with multiple locations",
version: "1.5",
name: "GGUF package with multiple locations",
metadata: &pkg.GGUFFileHeader{
ModelName: "multi-location-model",
ModelVersion: "1.5",
Architecture: "llama",
GGUFVersion: 3,
TensorCount: 150,
@ -96,12 +95,12 @@ func TestNewGGUFPackage(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
p := newGGUFPackage(tt.metadata, tt.version, tt.locations...)
p := newGGUFPackage(tt.metadata, tt.locations...)
if d := cmp.Diff(tt.metadata.ModelName, p.Name); d != "" {
t.Errorf("Name mismatch (-want +got):\n%s", d)
}
if d := cmp.Diff(tt.version, p.Version); d != "" {
if d := cmp.Diff(tt.metadata.ModelVersion, p.Version); d != "" {
t.Errorf("Version mismatch (-want +got):\n%s", d)
}
if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {

View File

@ -62,17 +62,15 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
// Extract metadata
metadata := ggufFile.Metadata()
// Extract version separately (will be set on Package.Version)
modelVersion := extractVersion(ggufFile.Header.MetadataKV)
// Convert to syft metadata structure
syftMetadata := &pkg.GGUFFileHeader{
ModelName: metadata.Name,
License: metadata.License,
Architecture: metadata.Architecture,
Quantization: metadata.FileTypeDescriptor,
Parameters: uint64(metadata.Parameters),
GGUFVersion: uint32(ggufFile.Header.Version),
ModelName: metadata.Name,
ModelVersion: extractVersion(ggufFile.Header.MetadataKV),
License: metadata.License,
Architecture: metadata.Architecture,
Quantization: metadata.FileTypeDescriptor,
Parameters: uint64(metadata.Parameters),
GGUFVersion: uint32(ggufFile.Header.Version),
TensorCount: ggufFile.Header.TensorCount,
Header: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
MetadataHash: computeKVMetadataHash(ggufFile.Header.MetadataKV),
@ -86,7 +84,6 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
// Create package from metadata
p := newGGUFPackage(
syftMetadata,
modelVersion,
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
)

View File

@ -3,7 +3,7 @@ package ai
import (
"fmt"
"os"
gguf_parser "github.com/gpustack/gguf-parser-go"
)
@ -14,21 +14,21 @@ func main() {
withStringKV("general.architecture", "llama").
withStringKV("general.name", "test-model").
build()
// Write to temp file
tempFile, err := os.CreateTemp("", "test-*.gguf")
if err != nil {
panic(err)
}
defer os.Remove(tempFile.Name())
if _, err := tempFile.Write(data); err != nil {
panic(err)
}
tempFile.Close()
fmt.Printf("Wrote %d bytes to %s\n", len(data), tempFile.Name())
// Try to parse it
fmt.Println("Attempting to parse...")
gf, err := gguf_parser.ParseGGUFFile(tempFile.Name(), gguf_parser.SkipLargeMetadata())
@ -36,6 +36,6 @@ func main() {
fmt.Printf("Parse error: %v\n", err)
return
}
fmt.Printf("Success! Model: %s\n", gf.Metadata().Name)
}

View File

@ -10,6 +10,9 @@ type GGUFFileHeader struct {
// ModelName is the name of the model (from general.name or filename)
ModelName string `json:"modelName" cyclonedx:"modelName"`
// ModelVersion is the version of the model (if available in header, else "unknown")
ModelVersion string `json:"modelVersion,omitempty" cyclonedx:"modelVersion"`
// FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)
FileSize int64 `json:"fileSize,omitempty" cyclonedx:"fileSize"`