mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 16:33:21 +01:00
Compare commits
No commits in common. "9609ce2b366803548de6d75e93cce0a2e7eff447" and "cdb41b0c766a43ec0d6eea75ebd3a951fa3e0461" have entirely different histories.
9609ce2b36
...
cdb41b0c76
@ -1443,6 +1443,10 @@
|
|||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "ModelName is the name of the model (from general.name or filename)"
|
"description": "ModelName is the name of the model (from general.name or filename)"
|
||||||
},
|
},
|
||||||
|
"modelVersion": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "ModelVersion is the version of the model (if available in header, else \"unknown\")"
|
||||||
|
},
|
||||||
"fileSize": {
|
"fileSize": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
|
"description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
|
||||||
|
|||||||
@ -1443,6 +1443,10 @@
|
|||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "ModelName is the name of the model (from general.name or filename)"
|
"description": "ModelName is the name of the model (from general.name or filename)"
|
||||||
},
|
},
|
||||||
|
"modelVersion": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "ModelVersion is the version of the model (if available in header, else \"unknown\")"
|
||||||
|
},
|
||||||
"fileSize": {
|
"fileSize": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
|
"description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
|
||||||
|
|||||||
@ -6,6 +6,7 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/google/go-cmp/cmp/cmpopts"
|
"github.com/google/go-cmp/cmp/cmpopts"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
"github.com/anchore/syft/syft/artifact"
|
"github.com/anchore/syft/syft/artifact"
|
||||||
"github.com/anchore/syft/syft/pkg"
|
"github.com/anchore/syft/syft/pkg"
|
||||||
@ -13,28 +14,6 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TestGGUFCataloger_Globs(t *testing.T) {
|
func TestGGUFCataloger_Globs(t *testing.T) {
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
fixture string
|
|
||||||
expected []string
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "obtain gguf files",
|
|
||||||
fixture: "test-fixtures/glob-paths",
|
|
||||||
expected: []string{
|
|
||||||
"models/model.gguf",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, test := range tests {
|
|
||||||
t.Run(test.name, func(t *testing.T) {
|
|
||||||
pkgtest.NewCatalogTester().
|
|
||||||
FromDirectory(t, test.fixture).
|
|
||||||
ExpectsResolverContentQueries(test.expected).
|
|
||||||
TestCataloger(t, NewGGUFCataloger())
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGGUFCataloger_Integration(t *testing.T) {
|
func TestGGUFCataloger_Integration(t *testing.T) {
|
||||||
@ -72,6 +51,7 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
),
|
),
|
||||||
Metadata: pkg.GGUFFileHeader{
|
Metadata: pkg.GGUFFileHeader{
|
||||||
ModelName: "llama3-8b",
|
ModelName: "llama3-8b",
|
||||||
|
ModelVersion: "3.0",
|
||||||
License: "Apache-2.0",
|
License: "Apache-2.0",
|
||||||
Architecture: "llama",
|
Architecture: "llama",
|
||||||
Quantization: "Unknown",
|
Quantization: "Unknown",
|
||||||
@ -97,11 +77,16 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
IgnoreLocationLayer().
|
IgnoreLocationLayer().
|
||||||
IgnorePackageFields("FoundBy", "Locations"). // These are set by the cataloger
|
IgnorePackageFields("FoundBy", "Locations"). // These are set by the cataloger
|
||||||
WithCompareOptions(
|
WithCompareOptions(
|
||||||
// Ignore MetadataHash as it's computed dynamically
|
// Ignore Hash as it's computed dynamically
|
||||||
cmpopts.IgnoreFields(pkg.GGUFFileHeader{}, "MetadataHash"),
|
cmpopts.IgnoreFields(pkg.GGUFFileHeader{}, "Hash"),
|
||||||
)
|
)
|
||||||
|
|
||||||
tester.TestCataloger(t, NewGGUFCataloger())
|
tester.TestCataloger(t, NewGGUFCataloger())
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGGUFCataloger_Name(t *testing.T) {
|
||||||
|
cataloger := NewGGUFCataloger()
|
||||||
|
assert.Equal(t, "gguf-cataloger", cataloger.Name())
|
||||||
|
}
|
||||||
|
|||||||
@ -1,14 +1,25 @@
|
|||||||
package ai
|
package ai
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/cespare/xxhash/v2"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/internal/log"
|
||||||
"github.com/anchore/syft/syft/file"
|
"github.com/anchore/syft/syft/file"
|
||||||
"github.com/anchore/syft/syft/pkg"
|
"github.com/anchore/syft/syft/pkg"
|
||||||
)
|
)
|
||||||
|
|
||||||
func newGGUFPackage(metadata *pkg.GGUFFileHeader, version string, locations ...file.Location) pkg.Package {
|
func newGGUFPackage(metadata *pkg.GGUFFileHeader, locations ...file.Location) pkg.Package {
|
||||||
|
// Compute hash if not already set
|
||||||
|
if metadata.Hash == "" {
|
||||||
|
metadata.Hash = computeMetadataHash(metadata)
|
||||||
|
}
|
||||||
|
|
||||||
p := pkg.Package{
|
p := pkg.Package{
|
||||||
Name: metadata.ModelName,
|
Name: metadata.ModelName,
|
||||||
Version: version,
|
Version: metadata.ModelVersion,
|
||||||
Locations: file.NewLocationSet(locations...),
|
Locations: file.NewLocationSet(locations...),
|
||||||
Type: pkg.ModelPkg,
|
Type: pkg.ModelPkg,
|
||||||
Licenses: pkg.NewLicenseSet(),
|
Licenses: pkg.NewLicenseSet(),
|
||||||
@ -26,3 +37,33 @@ func newGGUFPackage(metadata *pkg.GGUFFileHeader, version string, locations ...f
|
|||||||
|
|
||||||
return p
|
return p
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// computeMetadataHash computes a stable hash of the metadata for use as a global identifier
|
||||||
|
func computeMetadataHash(metadata *pkg.GGUFFileHeader) string {
|
||||||
|
// Create a stable representation of the metadata
|
||||||
|
hashData := struct {
|
||||||
|
Format string
|
||||||
|
Name string
|
||||||
|
Version string
|
||||||
|
Architecture string
|
||||||
|
GGUFVersion uint32
|
||||||
|
TensorCount uint64
|
||||||
|
}{
|
||||||
|
Name: metadata.ModelName,
|
||||||
|
Version: metadata.ModelVersion,
|
||||||
|
Architecture: metadata.Architecture,
|
||||||
|
GGUFVersion: metadata.GGUFVersion,
|
||||||
|
TensorCount: metadata.TensorCount,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Marshal to JSON for stable hashing
|
||||||
|
jsonBytes, err := json.Marshal(hashData)
|
||||||
|
if err != nil {
|
||||||
|
log.Debugf("failed to marshal metadata for hashing: %v", err)
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute xxhash
|
||||||
|
hash := xxhash.Sum64(jsonBytes)
|
||||||
|
return fmt.Sprintf("%016x", hash) // 16 hex chars (64 bits)
|
||||||
|
}
|
||||||
|
|||||||
@ -15,15 +15,14 @@ func TestNewGGUFPackage(t *testing.T) {
|
|||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
metadata *pkg.GGUFFileHeader
|
metadata *pkg.GGUFFileHeader
|
||||||
version string
|
|
||||||
locations []file.Location
|
locations []file.Location
|
||||||
checkFunc func(t *testing.T, p pkg.Package)
|
checkFunc func(t *testing.T, p pkg.Package)
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
name: "complete GGUF package with all fields",
|
name: "complete GGUF package with all fields",
|
||||||
version: "3.0",
|
|
||||||
metadata: &pkg.GGUFFileHeader{
|
metadata: &pkg.GGUFFileHeader{
|
||||||
ModelName: "llama3-8b-instruct",
|
ModelName: "llama3-8b-instruct",
|
||||||
|
ModelVersion: "3.0",
|
||||||
License: "Apache-2.0",
|
License: "Apache-2.0",
|
||||||
Architecture: "llama",
|
Architecture: "llama",
|
||||||
Quantization: "Q4_K_M",
|
Quantization: "Q4_K_M",
|
||||||
@ -53,9 +52,9 @@ func TestNewGGUFPackage(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "minimal GGUF package",
|
name: "minimal GGUF package",
|
||||||
version: "1.0",
|
|
||||||
metadata: &pkg.GGUFFileHeader{
|
metadata: &pkg.GGUFFileHeader{
|
||||||
ModelName: "simple-model",
|
ModelName: "simple-model",
|
||||||
|
ModelVersion: "1.0",
|
||||||
Architecture: "gpt2",
|
Architecture: "gpt2",
|
||||||
GGUFVersion: 3,
|
GGUFVersion: 3,
|
||||||
TensorCount: 50,
|
TensorCount: 50,
|
||||||
@ -77,9 +76,9 @@ func TestNewGGUFPackage(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "GGUF package with multiple locations",
|
name: "GGUF package with multiple locations",
|
||||||
version: "1.5",
|
|
||||||
metadata: &pkg.GGUFFileHeader{
|
metadata: &pkg.GGUFFileHeader{
|
||||||
ModelName: "multi-location-model",
|
ModelName: "multi-location-model",
|
||||||
|
ModelVersion: "1.5",
|
||||||
Architecture: "llama",
|
Architecture: "llama",
|
||||||
GGUFVersion: 3,
|
GGUFVersion: 3,
|
||||||
TensorCount: 150,
|
TensorCount: 150,
|
||||||
@ -96,12 +95,12 @@ func TestNewGGUFPackage(t *testing.T) {
|
|||||||
|
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
p := newGGUFPackage(tt.metadata, tt.version, tt.locations...)
|
p := newGGUFPackage(tt.metadata, tt.locations...)
|
||||||
|
|
||||||
if d := cmp.Diff(tt.metadata.ModelName, p.Name); d != "" {
|
if d := cmp.Diff(tt.metadata.ModelName, p.Name); d != "" {
|
||||||
t.Errorf("Name mismatch (-want +got):\n%s", d)
|
t.Errorf("Name mismatch (-want +got):\n%s", d)
|
||||||
}
|
}
|
||||||
if d := cmp.Diff(tt.version, p.Version); d != "" {
|
if d := cmp.Diff(tt.metadata.ModelVersion, p.Version); d != "" {
|
||||||
t.Errorf("Version mismatch (-want +got):\n%s", d)
|
t.Errorf("Version mismatch (-want +got):\n%s", d)
|
||||||
}
|
}
|
||||||
if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
|
if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
|
||||||
|
|||||||
@ -62,12 +62,10 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
|
|||||||
// Extract metadata
|
// Extract metadata
|
||||||
metadata := ggufFile.Metadata()
|
metadata := ggufFile.Metadata()
|
||||||
|
|
||||||
// Extract version separately (will be set on Package.Version)
|
|
||||||
modelVersion := extractVersion(ggufFile.Header.MetadataKV)
|
|
||||||
|
|
||||||
// Convert to syft metadata structure
|
// Convert to syft metadata structure
|
||||||
syftMetadata := &pkg.GGUFFileHeader{
|
syftMetadata := &pkg.GGUFFileHeader{
|
||||||
ModelName: metadata.Name,
|
ModelName: metadata.Name,
|
||||||
|
ModelVersion: extractVersion(ggufFile.Header.MetadataKV),
|
||||||
License: metadata.License,
|
License: metadata.License,
|
||||||
Architecture: metadata.Architecture,
|
Architecture: metadata.Architecture,
|
||||||
Quantization: metadata.FileTypeDescriptor,
|
Quantization: metadata.FileTypeDescriptor,
|
||||||
@ -86,7 +84,6 @@ func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment,
|
|||||||
// Create package from metadata
|
// Create package from metadata
|
||||||
p := newGGUFPackage(
|
p := newGGUFPackage(
|
||||||
syftMetadata,
|
syftMetadata,
|
||||||
modelVersion,
|
|
||||||
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
|
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -10,6 +10,9 @@ type GGUFFileHeader struct {
|
|||||||
// ModelName is the name of the model (from general.name or filename)
|
// ModelName is the name of the model (from general.name or filename)
|
||||||
ModelName string `json:"modelName" cyclonedx:"modelName"`
|
ModelName string `json:"modelName" cyclonedx:"modelName"`
|
||||||
|
|
||||||
|
// ModelVersion is the version of the model (if available in header, else "unknown")
|
||||||
|
ModelVersion string `json:"modelVersion,omitempty" cyclonedx:"modelVersion"`
|
||||||
|
|
||||||
// FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)
|
// FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)
|
||||||
FileSize int64 `json:"fileSize,omitempty" cyclonedx:"fileSize"`
|
FileSize int64 `json:"fileSize,omitempty" cyclonedx:"fileSize"`
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user